[enh] parse and filter Content-Type.

svg, mathml, multipart, xml (because of namespace) are forbidden.
the charset parameters in Content-Type is only set when it is by the original server.
the */xhtml+* Content-Type : the conversion to UTF-8 is now done (it wasn't the case before).
string type is used because of the mime package API.

git-svn-id: file:///srv/svn/repo/yukari/trunk@63 f3bd38d9-da89-464d-a02a-eb04e43141b5
This commit is contained in:
alex 2016-12-15 22:32:34 +00:00
parent 781a5a6588
commit 04ff44b7ef
3 changed files with 430 additions and 13 deletions

View File

@ -0,0 +1,98 @@
package contenttype
import (
"mime"
"strings"
)
type ContentType struct {
TopLevelType string
SubType string
Suffix string
Parameters map[string]string
}
func (contenttype *ContentType) String() string {
var mimetype string
if contenttype.Suffix == "" {
if contenttype.SubType == "" {
mimetype = contenttype.TopLevelType
} else {
mimetype = contenttype.TopLevelType + "/" + contenttype.SubType
}
} else {
mimetype = contenttype.TopLevelType + "/" + contenttype.SubType + "+" + contenttype.Suffix
}
return mime.FormatMediaType(mimetype, contenttype.Parameters)
}
func (contenttype *ContentType) Equals(other ContentType) bool {
if contenttype.TopLevelType != other.TopLevelType ||
contenttype.SubType != other.SubType ||
contenttype.Suffix != other.Suffix ||
len(contenttype.Parameters) != len(other.Parameters) {
return false
}
for k, v := range contenttype.Parameters {
if other.Parameters[k] != v {
return false
}
}
return true
}
func (contenttype *ContentType) FilterParameters(parameters map[string]bool) {
for k, _ := range contenttype.Parameters {
if !parameters[k] {
delete(contenttype.Parameters, k)
}
}
}
func ParseContentType(contenttype string) (ContentType, error) {
mimetype, params, err := mime.ParseMediaType(contenttype)
if err != nil {
return ContentType{"", "", "", params}, err
}
splitted_mimetype := strings.SplitN(strings.ToLower(mimetype), "/", 2)
if len(splitted_mimetype) <= 1 {
return ContentType{splitted_mimetype[0], "", "", params}, nil
} else {
splitted_subtype := strings.SplitN(splitted_mimetype[1], "+", 2)
if len(splitted_subtype) == 1 {
return ContentType{splitted_mimetype[0], splitted_subtype[0], "", params}, nil
} else {
return ContentType{splitted_mimetype[0], splitted_subtype[0], splitted_subtype[1], params}, nil
}
}
}
type Filter func(contenttype ContentType) bool
func NewFilterContains(partialMimeType string) Filter {
return func(contenttype ContentType) bool {
return strings.Contains(contenttype.TopLevelType, partialMimeType) ||
strings.Contains(contenttype.SubType, partialMimeType) ||
strings.Contains(contenttype.Suffix, partialMimeType)
}
}
func NewFilterEquals(TopLevelType, SubType, Suffix string) Filter {
return func(contenttype ContentType) bool {
return ((TopLevelType != "*" && TopLevelType == contenttype.TopLevelType) || (TopLevelType == "*")) &&
((SubType != "*" && SubType == contenttype.SubType) || (SubType == "*")) &&
((Suffix != "*" && Suffix == contenttype.Suffix) || (Suffix == "*"))
}
}
func NewFilterOr(contentTypeFilterList []Filter) Filter {
return func(contenttype ContentType) bool {
for _, contentTypeFilter := range contentTypeFilterList {
if contentTypeFilter(contenttype) {
return true
}
}
return false
}
}

View File

@ -0,0 +1,247 @@
package contenttype
import (
"testing"
)
type ParseContentTypeTestCase struct {
Input string
ExpectedOutput *ContentType /* or nil if an error is expected */
ExpectedString *string /* or nil if equals to Input */
}
var parseContentTypeTestCases []ParseContentTypeTestCase = []ParseContentTypeTestCase{
ParseContentTypeTestCase{
"text/html",
&ContentType{"text", "html", "", map[string]string{}},
nil,
},
ParseContentTypeTestCase{
"text/svg+xml; charset=UTF-8",
&ContentType{"text", "svg", "xml", map[string]string{"charset": "UTF-8"}},
nil,
},
ParseContentTypeTestCase{
"text/",
nil,
nil,
},
ParseContentTypeTestCase{
"text; charset=UTF-8",
&ContentType{"text", "", "", map[string]string{"charset": "UTF-8"}},
nil,
},
ParseContentTypeTestCase{
"text/+xml; charset=UTF-8",
&ContentType{"text", "", "xml", map[string]string{"charset": "UTF-8"}},
nil,
},
}
type ContentTypeEqualsTestCase struct {
A, B ContentType
Equals bool
}
var Map_Empty map[string]string = map[string]string{}
var Map_A map[string]string = map[string]string{"a": "value_a"}
var Map_B map[string]string = map[string]string{"b": "value_b"}
var Map_AB map[string]string = map[string]string{"a": "value_a", "b": "value_b"}
var ContentType_E ContentType = ContentType{"a", "b", "c", Map_Empty}
var ContentType_A ContentType = ContentType{"a", "b", "c", Map_A}
var ContentType_B ContentType = ContentType{"a", "b", "c", Map_B}
var ContentType_AB ContentType = ContentType{"a", "b", "c", Map_AB}
var contentTypeEqualsTestCases []ContentTypeEqualsTestCase = []ContentTypeEqualsTestCase{
// TopLevelType, SubType, Suffix
ContentTypeEqualsTestCase{ContentType_E, ContentType{"a", "b", "c", Map_Empty}, true},
ContentTypeEqualsTestCase{ContentType_E, ContentType{"o", "b", "c", Map_Empty}, false},
ContentTypeEqualsTestCase{ContentType_E, ContentType{"a", "o", "c", Map_Empty}, false},
ContentTypeEqualsTestCase{ContentType_E, ContentType{"a", "b", "o", Map_Empty}, false},
// Parameters
ContentTypeEqualsTestCase{ContentType_A, ContentType_A, true},
ContentTypeEqualsTestCase{ContentType_B, ContentType_B, true},
ContentTypeEqualsTestCase{ContentType_AB, ContentType_AB, true},
ContentTypeEqualsTestCase{ContentType_A, ContentType_E, false},
ContentTypeEqualsTestCase{ContentType_A, ContentType_B, false},
ContentTypeEqualsTestCase{ContentType_B, ContentType_A, false},
ContentTypeEqualsTestCase{ContentType_AB, ContentType_A, false},
ContentTypeEqualsTestCase{ContentType_AB, ContentType_E, false},
ContentTypeEqualsTestCase{ContentType_A, ContentType_AB, false},
}
type FilterTestCase struct {
Input Filter
TrueValues []ContentType
FalseValues []ContentType
}
var filterTestCases []FilterTestCase = []FilterTestCase{
FilterTestCase{
NewFilterContains("xml"),
[]ContentType{
ContentType{"xml", "", "", Map_Empty},
ContentType{"text", "xml", "", Map_Empty},
ContentType{"text", "html", "xml", Map_Empty},
},
[]ContentType{
ContentType{"text", "svg", "", map[string]string{"script": "javascript"}},
ContentType{"java", "script", "", Map_Empty},
},
},
FilterTestCase{
NewFilterEquals("application", "xhtml", "*"),
[]ContentType{
ContentType{"application", "xhtml", "xml", Map_Empty},
ContentType{"application", "xhtml", "", Map_Empty},
ContentType{"application", "xhtml", "zip", Map_Empty},
ContentType{"application", "xhtml", "zip", Map_AB},
},
[]ContentType{
ContentType{"application", "javascript", "", Map_Empty},
ContentType{"text", "xhtml", "", Map_Empty},
},
},
FilterTestCase{
NewFilterEquals("application", "*", ""),
[]ContentType{
ContentType{"application", "xhtml", "", Map_Empty},
ContentType{"application", "javascript", "", Map_Empty},
},
[]ContentType{
ContentType{"text", "xhtml", "", Map_Empty},
ContentType{"text", "xhtml", "xml", Map_Empty},
},
},
FilterTestCase{
NewFilterEquals("*", "javascript", ""),
[]ContentType{
ContentType{"application", "javascript", "", Map_Empty},
ContentType{"text", "javascript", "", Map_Empty},
},
[]ContentType{
ContentType{"text", "html", "", Map_Empty},
ContentType{"text", "javascript", "zip", Map_Empty},
},
},
FilterTestCase{
NewFilterOr([]Filter{
NewFilterEquals("application", "*", ""),
NewFilterEquals("*", "javascript", ""),
}),
[]ContentType{
ContentType{"application", "javascript", "", Map_Empty},
ContentType{"text", "javascript", "", Map_Empty},
ContentType{"application", "xhtml", "", Map_Empty},
},
[]ContentType{
ContentType{"text", "html", "", Map_Empty},
ContentType{"application", "xhtml", "xml", Map_Empty},
},
},
}
type FilterParametersTestCase struct {
Input map[string]string
Filter map[string]bool
Output map[string]string
}
var filterParametersTestCases []FilterParametersTestCase = []FilterParametersTestCase{
FilterParametersTestCase{
map[string]string{},
map[string]bool{"A": true, "B": true},
map[string]string{},
},
FilterParametersTestCase{
map[string]string{"A": "value_A", "B": "value_B"},
map[string]bool{},
map[string]string{},
},
FilterParametersTestCase{
map[string]string{"A": "value_A", "B": "value_B"},
map[string]bool{"A": true},
map[string]string{"A": "value_A"},
},
FilterParametersTestCase{
map[string]string{"A": "value_A", "B": "value_B"},
map[string]bool{"A": true, "B": true},
map[string]string{"A": "value_A", "B": "value_B"},
},
}
func TestContentTypeEquals(t *testing.T) {
for _, testCase := range contentTypeEqualsTestCases {
if !testCase.A.Equals(testCase.B) && testCase.Equals {
t.Errorf(`Must be equals "%s"="%s"`, testCase.A, testCase.B)
} else if testCase.A.Equals(testCase.B) && !testCase.Equals {
t.Errorf(`Mustn't be equals "%s"!="%s"`, testCase.A, testCase.B)
}
}
}
func TestParseContentType(t *testing.T) {
for _, testCase := range parseContentTypeTestCases {
// test ParseContentType
contentType, err := ParseContentType(testCase.Input)
if testCase.ExpectedOutput == nil {
// error expected
if err == nil {
// but there is no error
t.Errorf(`Expecting error for "%s"`, testCase.Input)
}
} else {
// no expected error
if err != nil {
t.Errorf(`Unexpecting error for "%s" : %s`, testCase.Input, err)
} else if !contentType.Equals(*testCase.ExpectedOutput) {
// the parsed contentType doesn't matched
t.Errorf(`Unexpecting result for "%s", instead got "%s"`, testCase.ExpectedOutput.String(), contentType.String())
} else {
// ParseContentType is fine, checking String()
contentTypeString := contentType.String()
expectedString := testCase.Input
if testCase.ExpectedString != nil {
expectedString = *testCase.ExpectedString
}
if contentTypeString != expectedString {
t.Errorf(`Error with String() output of "%s", got "%s", ContentType{"%s", "%s", "%s", "%s"}`, expectedString, contentTypeString, contentType.TopLevelType, contentType.SubType, contentType.Suffix, contentType.Parameters)
}
}
}
}
}
func TestFilters(t *testing.T) {
for _, testCase := range filterTestCases {
for _, contentType := range testCase.TrueValues {
if !testCase.Input(contentType) {
t.Errorf(`Filter "%s" must accept the value "%s"`, testCase.Input, contentType)
}
}
for _, contentType := range testCase.FalseValues {
if testCase.Input(contentType) {
t.Errorf(`Filter "%s" mustn't accept the value "%s"`, testCase.Input, contentType)
}
}
}
}
func TestFilterParameters(t *testing.T) {
for _, testCase := range filterParametersTestCases {
// copy Input since the map will be modified
InputCopy := make(map[string]string)
for k, v := range testCase.Input {
InputCopy[k] = v
}
// apply filter
contentType := ContentType{"", "", "", InputCopy}
contentType.FilterParameters(testCase.Filter)
// test
contentTypeOutput := ContentType{"", "", "", testCase.Output}
if !contentTypeOutput.Equals(contentType) {
t.Errorf(`FilterParameters error : %s becomes %s with this filter %s`, testCase.Input, contentType.Parameters, testCase.Filter)
}
}
}

View File

@ -20,6 +20,8 @@ import (
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"github.com/dalf/morty/contenttype"
)
const (
@ -34,6 +36,57 @@ var CLIENT *fasthttp.Client = &fasthttp.Client{
var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types
// https://www.w3.org/TR/2009/WD-MathML3-20090604/mathml.pdf
// http://planetsvg.com/tools/mime.php
var FORBIDDEN_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
// javascript
contenttype.NewFilterContains("javascript"),
contenttype.NewFilterContains("ecmascript"),
contenttype.NewFilterEquals("application", "js", "*"),
// no xml (can contain xhtml or css)
contenttype.NewFilterEquals("text", "xml", "*"),
contenttype.NewFilterEquals("text", "xml-external-parsed-entity", "*"),
contenttype.NewFilterEquals("application", "xml", "*"),
contenttype.NewFilterEquals("application", "xml-external-parsed-entity", "*"),
contenttype.NewFilterEquals("application", "xslt", "xml"),
// no mathml
contenttype.NewFilterEquals("application", "mathml", "xml"),
contenttype.NewFilterEquals("application", "mathml-presentation", "xml"),
contenttype.NewFilterEquals("application", "mathml-content", "xml"),
// no svg
contenttype.NewFilterEquals("image", "svg", "xml"),
contenttype.NewFilterEquals("image", "svg-xml", "*"),
// no cache
contenttype.NewFilterEquals("text", "cache-manifest", "*"),
// no multipart
contenttype.NewFilterEquals("multipart", "*", "*"),
// no xul
contenttype.NewFilterEquals("application", "vnd.mozilla.xul", "xml"),
// no htc
contenttype.NewFilterEquals("text", "x-component", "*"),
// no flash
contenttype.NewFilterEquals("application", "x-shockwave-flash", "*"),
contenttype.NewFilterEquals("video", "x-flv", ""),
contenttype.NewFilterEquals("video", "vnd.sealed-swf", ""),
// no know format to have issues
contenttype.NewFilterEquals("image", "wmf", "*"),
contenttype.NewFilterEquals("image", "emf", "*"),
// some of the microsoft and IE mime types
contenttype.NewFilterEquals("text", "vbs", "*"),
contenttype.NewFilterEquals("text", "vbscript", "*"),
contenttype.NewFilterEquals("text", "scriptlet", "*"),
contenttype.NewFilterEquals("application", "x-vbs", "*"),
contenttype.NewFilterEquals("application", "olescript", "*"),
contenttype.NewFilterEquals("application", "x-msmetafile", "*"),
// no css (sometime, rendering depend on the browser)
contenttype.NewFilterEquals("application", "x-pointplus", "*"),
})
var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
"charset": true,
}
var UNSAFE_ELEMENTS [][]byte = [][]byte{
[]byte("applet"),
[]byte("canvas"),
@ -251,26 +304,43 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
return
}
contentType := resp.Header.Peek("Content-Type")
contentTypeBytes := resp.Header.Peek("Content-Type")
if contentType == nil {
if contentTypeBytes == nil {
// HTTP status code 503 : Service Unavailable
p.serveMainPage(ctx, 503, errors.New("invalid content type"))
return
}
if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
contentTypeString := string(contentTypeBytes)
// decode Content-Type header
contentType, error := contenttype.ParseContentType(contentTypeString)
if error != nil {
// HTTP status code 503 : Service Unavailable
p.serveMainPage(ctx, 503, errors.New("invalid content type"))
return
}
// deny access to forbidden content type
if FORBIDDEN_CONTENTTYPE_FILTER(contentType) {
// HTTP status code 403 : Forbidden
p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
return
}
contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
// HACK : replace */xhtml by text/html
if contentType.SubType == "xhtml" {
contentType.TopLevelType = "text"
contentType.SubType = "html"
contentType.Suffix = ""
}
// conversion to UTF-8
var responseBody []byte
if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) {
e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType))
if contentType.TopLevelType == "text" {
e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
responseBody, err = e.NewDecoder().Bytes(resp.Body())
if err != nil {
@ -281,20 +351,22 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
} else {
responseBody = resp.Body()
}
// update the charset or specify it
contentType.Parameters["charset"] = "UTF-8"
} else {
responseBody = resp.Body()
}
if bytes.Contains(contentType, []byte("xhtml")) {
ctx.SetContentType("text/html; charset=UTF-8")
} else {
ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
}
//
contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
// set the content type
ctx.SetContentType(contentType.String())
switch {
case bytes.Contains(contentType, []byte("css")):
case contentType.SubType == "css" && contentType.Suffix == "":
sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
case bytes.Contains(contentType, []byte("html")):
case contentType.SubType == "html" && contentType.Suffix == "":
sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
default:
if ctx.Request.Header.Peek("Content-Disposition") != nil {