From 04ff44b7efa8f9fd52799a580967d7eaa2b14503 Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 15 Dec 2016 22:32:34 +0000 Subject: [PATCH] [enh] parse and filter Content-Type. svg, mathml, multipart, xml (because of namespace) are forbidden. the charset parameters in Content-Type is only set when it is by the original server. the */xhtml+* Content-Type : the conversion to UTF-8 is now done (it wasn't the case before). string type is used because of the mime package API. git-svn-id: file:///srv/svn/repo/yukari/trunk@63 f3bd38d9-da89-464d-a02a-eb04e43141b5 --- contenttype/contenttype.go | 98 +++++++++++++ contenttype/contenttype_test.go | 247 ++++++++++++++++++++++++++++++++ morty.go | 98 +++++++++++-- 3 files changed, 430 insertions(+), 13 deletions(-) create mode 100644 contenttype/contenttype.go create mode 100644 contenttype/contenttype_test.go diff --git a/contenttype/contenttype.go b/contenttype/contenttype.go new file mode 100644 index 0000000..4be3405 --- /dev/null +++ b/contenttype/contenttype.go @@ -0,0 +1,98 @@ +package contenttype + +import ( + "mime" + "strings" +) + +type ContentType struct { + TopLevelType string + SubType string + Suffix string + Parameters map[string]string +} + +func (contenttype *ContentType) String() string { + var mimetype string + if contenttype.Suffix == "" { + if contenttype.SubType == "" { + mimetype = contenttype.TopLevelType + } else { + mimetype = contenttype.TopLevelType + "/" + contenttype.SubType + } + } else { + mimetype = contenttype.TopLevelType + "/" + contenttype.SubType + "+" + contenttype.Suffix + } + return mime.FormatMediaType(mimetype, contenttype.Parameters) +} + +func (contenttype *ContentType) Equals(other ContentType) bool { + if contenttype.TopLevelType != other.TopLevelType || + contenttype.SubType != other.SubType || + contenttype.Suffix != other.Suffix || + len(contenttype.Parameters) != len(other.Parameters) { + return false + } + for k, v := range contenttype.Parameters { + if other.Parameters[k] != v { + return false + } + } + return true +} + +func (contenttype *ContentType) FilterParameters(parameters map[string]bool) { + for k, _ := range contenttype.Parameters { + if !parameters[k] { + delete(contenttype.Parameters, k) + } + } +} + +func ParseContentType(contenttype string) (ContentType, error) { + mimetype, params, err := mime.ParseMediaType(contenttype) + if err != nil { + return ContentType{"", "", "", params}, err + } + splitted_mimetype := strings.SplitN(strings.ToLower(mimetype), "/", 2) + if len(splitted_mimetype) <= 1 { + return ContentType{splitted_mimetype[0], "", "", params}, nil + } else { + splitted_subtype := strings.SplitN(splitted_mimetype[1], "+", 2) + if len(splitted_subtype) == 1 { + return ContentType{splitted_mimetype[0], splitted_subtype[0], "", params}, nil + } else { + return ContentType{splitted_mimetype[0], splitted_subtype[0], splitted_subtype[1], params}, nil + } + } + +} + +type Filter func(contenttype ContentType) bool + +func NewFilterContains(partialMimeType string) Filter { + return func(contenttype ContentType) bool { + return strings.Contains(contenttype.TopLevelType, partialMimeType) || + strings.Contains(contenttype.SubType, partialMimeType) || + strings.Contains(contenttype.Suffix, partialMimeType) + } +} + +func NewFilterEquals(TopLevelType, SubType, Suffix string) Filter { + return func(contenttype ContentType) bool { + return ((TopLevelType != "*" && TopLevelType == contenttype.TopLevelType) || (TopLevelType == "*")) && + ((SubType != "*" && SubType == contenttype.SubType) || (SubType == "*")) && + ((Suffix != "*" && Suffix == contenttype.Suffix) || (Suffix == "*")) + } +} + +func NewFilterOr(contentTypeFilterList []Filter) Filter { + return func(contenttype ContentType) bool { + for _, contentTypeFilter := range contentTypeFilterList { + if contentTypeFilter(contenttype) { + return true + } + } + return false + } +} diff --git a/contenttype/contenttype_test.go b/contenttype/contenttype_test.go new file mode 100644 index 0000000..1b634f0 --- /dev/null +++ b/contenttype/contenttype_test.go @@ -0,0 +1,247 @@ +package contenttype + +import ( + "testing" +) + +type ParseContentTypeTestCase struct { + Input string + ExpectedOutput *ContentType /* or nil if an error is expected */ + ExpectedString *string /* or nil if equals to Input */ +} + +var parseContentTypeTestCases []ParseContentTypeTestCase = []ParseContentTypeTestCase{ + ParseContentTypeTestCase{ + "text/html", + &ContentType{"text", "html", "", map[string]string{}}, + nil, + }, + ParseContentTypeTestCase{ + "text/svg+xml; charset=UTF-8", + &ContentType{"text", "svg", "xml", map[string]string{"charset": "UTF-8"}}, + nil, + }, + ParseContentTypeTestCase{ + "text/", + nil, + nil, + }, + ParseContentTypeTestCase{ + "text; charset=UTF-8", + &ContentType{"text", "", "", map[string]string{"charset": "UTF-8"}}, + nil, + }, + ParseContentTypeTestCase{ + "text/+xml; charset=UTF-8", + &ContentType{"text", "", "xml", map[string]string{"charset": "UTF-8"}}, + nil, + }, +} + +type ContentTypeEqualsTestCase struct { + A, B ContentType + Equals bool +} + +var Map_Empty map[string]string = map[string]string{} +var Map_A map[string]string = map[string]string{"a": "value_a"} +var Map_B map[string]string = map[string]string{"b": "value_b"} +var Map_AB map[string]string = map[string]string{"a": "value_a", "b": "value_b"} + +var ContentType_E ContentType = ContentType{"a", "b", "c", Map_Empty} +var ContentType_A ContentType = ContentType{"a", "b", "c", Map_A} +var ContentType_B ContentType = ContentType{"a", "b", "c", Map_B} +var ContentType_AB ContentType = ContentType{"a", "b", "c", Map_AB} + +var contentTypeEqualsTestCases []ContentTypeEqualsTestCase = []ContentTypeEqualsTestCase{ + // TopLevelType, SubType, Suffix + ContentTypeEqualsTestCase{ContentType_E, ContentType{"a", "b", "c", Map_Empty}, true}, + ContentTypeEqualsTestCase{ContentType_E, ContentType{"o", "b", "c", Map_Empty}, false}, + ContentTypeEqualsTestCase{ContentType_E, ContentType{"a", "o", "c", Map_Empty}, false}, + ContentTypeEqualsTestCase{ContentType_E, ContentType{"a", "b", "o", Map_Empty}, false}, + // Parameters + ContentTypeEqualsTestCase{ContentType_A, ContentType_A, true}, + ContentTypeEqualsTestCase{ContentType_B, ContentType_B, true}, + ContentTypeEqualsTestCase{ContentType_AB, ContentType_AB, true}, + ContentTypeEqualsTestCase{ContentType_A, ContentType_E, false}, + ContentTypeEqualsTestCase{ContentType_A, ContentType_B, false}, + ContentTypeEqualsTestCase{ContentType_B, ContentType_A, false}, + ContentTypeEqualsTestCase{ContentType_AB, ContentType_A, false}, + ContentTypeEqualsTestCase{ContentType_AB, ContentType_E, false}, + ContentTypeEqualsTestCase{ContentType_A, ContentType_AB, false}, +} + +type FilterTestCase struct { + Input Filter + TrueValues []ContentType + FalseValues []ContentType +} + +var filterTestCases []FilterTestCase = []FilterTestCase{ + FilterTestCase{ + NewFilterContains("xml"), + []ContentType{ + ContentType{"xml", "", "", Map_Empty}, + ContentType{"text", "xml", "", Map_Empty}, + ContentType{"text", "html", "xml", Map_Empty}, + }, + []ContentType{ + ContentType{"text", "svg", "", map[string]string{"script": "javascript"}}, + ContentType{"java", "script", "", Map_Empty}, + }, + }, + FilterTestCase{ + NewFilterEquals("application", "xhtml", "*"), + []ContentType{ + ContentType{"application", "xhtml", "xml", Map_Empty}, + ContentType{"application", "xhtml", "", Map_Empty}, + ContentType{"application", "xhtml", "zip", Map_Empty}, + ContentType{"application", "xhtml", "zip", Map_AB}, + }, + []ContentType{ + ContentType{"application", "javascript", "", Map_Empty}, + ContentType{"text", "xhtml", "", Map_Empty}, + }, + }, + FilterTestCase{ + NewFilterEquals("application", "*", ""), + []ContentType{ + ContentType{"application", "xhtml", "", Map_Empty}, + ContentType{"application", "javascript", "", Map_Empty}, + }, + []ContentType{ + ContentType{"text", "xhtml", "", Map_Empty}, + ContentType{"text", "xhtml", "xml", Map_Empty}, + }, + }, + FilterTestCase{ + NewFilterEquals("*", "javascript", ""), + []ContentType{ + ContentType{"application", "javascript", "", Map_Empty}, + ContentType{"text", "javascript", "", Map_Empty}, + }, + []ContentType{ + ContentType{"text", "html", "", Map_Empty}, + ContentType{"text", "javascript", "zip", Map_Empty}, + }, + }, + FilterTestCase{ + NewFilterOr([]Filter{ + NewFilterEquals("application", "*", ""), + NewFilterEquals("*", "javascript", ""), + }), + []ContentType{ + ContentType{"application", "javascript", "", Map_Empty}, + ContentType{"text", "javascript", "", Map_Empty}, + ContentType{"application", "xhtml", "", Map_Empty}, + }, + []ContentType{ + ContentType{"text", "html", "", Map_Empty}, + ContentType{"application", "xhtml", "xml", Map_Empty}, + }, + }, +} + +type FilterParametersTestCase struct { + Input map[string]string + Filter map[string]bool + Output map[string]string +} + +var filterParametersTestCases []FilterParametersTestCase = []FilterParametersTestCase{ + FilterParametersTestCase{ + map[string]string{}, + map[string]bool{"A": true, "B": true}, + map[string]string{}, + }, + FilterParametersTestCase{ + map[string]string{"A": "value_A", "B": "value_B"}, + map[string]bool{}, + map[string]string{}, + }, + FilterParametersTestCase{ + map[string]string{"A": "value_A", "B": "value_B"}, + map[string]bool{"A": true}, + map[string]string{"A": "value_A"}, + }, + FilterParametersTestCase{ + map[string]string{"A": "value_A", "B": "value_B"}, + map[string]bool{"A": true, "B": true}, + map[string]string{"A": "value_A", "B": "value_B"}, + }, +} + +func TestContentTypeEquals(t *testing.T) { + for _, testCase := range contentTypeEqualsTestCases { + if !testCase.A.Equals(testCase.B) && testCase.Equals { + t.Errorf(`Must be equals "%s"="%s"`, testCase.A, testCase.B) + } else if testCase.A.Equals(testCase.B) && !testCase.Equals { + t.Errorf(`Mustn't be equals "%s"!="%s"`, testCase.A, testCase.B) + } + } +} + +func TestParseContentType(t *testing.T) { + for _, testCase := range parseContentTypeTestCases { + // test ParseContentType + contentType, err := ParseContentType(testCase.Input) + if testCase.ExpectedOutput == nil { + // error expected + if err == nil { + // but there is no error + t.Errorf(`Expecting error for "%s"`, testCase.Input) + } + } else { + // no expected error + if err != nil { + t.Errorf(`Unexpecting error for "%s" : %s`, testCase.Input, err) + } else if !contentType.Equals(*testCase.ExpectedOutput) { + // the parsed contentType doesn't matched + t.Errorf(`Unexpecting result for "%s", instead got "%s"`, testCase.ExpectedOutput.String(), contentType.String()) + } else { + // ParseContentType is fine, checking String() + contentTypeString := contentType.String() + expectedString := testCase.Input + if testCase.ExpectedString != nil { + expectedString = *testCase.ExpectedString + } + if contentTypeString != expectedString { + t.Errorf(`Error with String() output of "%s", got "%s", ContentType{"%s", "%s", "%s", "%s"}`, expectedString, contentTypeString, contentType.TopLevelType, contentType.SubType, contentType.Suffix, contentType.Parameters) + } + } + } + } +} + +func TestFilters(t *testing.T) { + for _, testCase := range filterTestCases { + for _, contentType := range testCase.TrueValues { + if !testCase.Input(contentType) { + t.Errorf(`Filter "%s" must accept the value "%s"`, testCase.Input, contentType) + } + } + for _, contentType := range testCase.FalseValues { + if testCase.Input(contentType) { + t.Errorf(`Filter "%s" mustn't accept the value "%s"`, testCase.Input, contentType) + } + } + } +} + +func TestFilterParameters(t *testing.T) { + for _, testCase := range filterParametersTestCases { + // copy Input since the map will be modified + InputCopy := make(map[string]string) + for k, v := range testCase.Input { + InputCopy[k] = v + } + // apply filter + contentType := ContentType{"", "", "", InputCopy} + contentType.FilterParameters(testCase.Filter) + // test + contentTypeOutput := ContentType{"", "", "", testCase.Output} + if !contentTypeOutput.Equals(contentType) { + t.Errorf(`FilterParameters error : %s becomes %s with this filter %s`, testCase.Input, contentType.Parameters, testCase.Filter) + } + } +} diff --git a/morty.go b/morty.go index 822f8e5..4369356 100644 --- a/morty.go +++ b/morty.go @@ -20,6 +20,8 @@ import ( "golang.org/x/net/html" "golang.org/x/net/html/charset" "golang.org/x/text/encoding" + + "github.com/dalf/morty/contenttype" ) const ( @@ -34,6 +36,57 @@ var CLIENT *fasthttp.Client = &fasthttp.Client{ var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?") +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types +// https://www.w3.org/TR/2009/WD-MathML3-20090604/mathml.pdf +// http://planetsvg.com/tools/mime.php +var FORBIDDEN_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{ + // javascript + contenttype.NewFilterContains("javascript"), + contenttype.NewFilterContains("ecmascript"), + contenttype.NewFilterEquals("application", "js", "*"), + // no xml (can contain xhtml or css) + contenttype.NewFilterEquals("text", "xml", "*"), + contenttype.NewFilterEquals("text", "xml-external-parsed-entity", "*"), + contenttype.NewFilterEquals("application", "xml", "*"), + contenttype.NewFilterEquals("application", "xml-external-parsed-entity", "*"), + contenttype.NewFilterEquals("application", "xslt", "xml"), + // no mathml + contenttype.NewFilterEquals("application", "mathml", "xml"), + contenttype.NewFilterEquals("application", "mathml-presentation", "xml"), + contenttype.NewFilterEquals("application", "mathml-content", "xml"), + // no svg + contenttype.NewFilterEquals("image", "svg", "xml"), + contenttype.NewFilterEquals("image", "svg-xml", "*"), + // no cache + contenttype.NewFilterEquals("text", "cache-manifest", "*"), + // no multipart + contenttype.NewFilterEquals("multipart", "*", "*"), + // no xul + contenttype.NewFilterEquals("application", "vnd.mozilla.xul", "xml"), + // no htc + contenttype.NewFilterEquals("text", "x-component", "*"), + // no flash + contenttype.NewFilterEquals("application", "x-shockwave-flash", "*"), + contenttype.NewFilterEquals("video", "x-flv", ""), + contenttype.NewFilterEquals("video", "vnd.sealed-swf", ""), + // no know format to have issues + contenttype.NewFilterEquals("image", "wmf", "*"), + contenttype.NewFilterEquals("image", "emf", "*"), + // some of the microsoft and IE mime types + contenttype.NewFilterEquals("text", "vbs", "*"), + contenttype.NewFilterEquals("text", "vbscript", "*"), + contenttype.NewFilterEquals("text", "scriptlet", "*"), + contenttype.NewFilterEquals("application", "x-vbs", "*"), + contenttype.NewFilterEquals("application", "olescript", "*"), + contenttype.NewFilterEquals("application", "x-msmetafile", "*"), + // no css (sometime, rendering depend on the browser) + contenttype.NewFilterEquals("application", "x-pointplus", "*"), +}) + +var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{ + "charset": true, +} + var UNSAFE_ELEMENTS [][]byte = [][]byte{ []byte("applet"), []byte("canvas"), @@ -251,26 +304,43 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) { return } - contentType := resp.Header.Peek("Content-Type") + contentTypeBytes := resp.Header.Peek("Content-Type") - if contentType == nil { + if contentTypeBytes == nil { // HTTP status code 503 : Service Unavailable p.serveMainPage(ctx, 503, errors.New("invalid content type")) return } - if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) { + contentTypeString := string(contentTypeBytes) + + // decode Content-Type header + contentType, error := contenttype.ParseContentType(contentTypeString) + if error != nil { + // HTTP status code 503 : Service Unavailable + p.serveMainPage(ctx, 503, errors.New("invalid content type")) + return + } + + // deny access to forbidden content type + if FORBIDDEN_CONTENTTYPE_FILTER(contentType) { // HTTP status code 403 : Forbidden p.serveMainPage(ctx, 403, errors.New("forbidden content type")) return } - contentInfo := bytes.SplitN(contentType, []byte(";"), 2) + // HACK : replace */xhtml by text/html + if contentType.SubType == "xhtml" { + contentType.TopLevelType = "text" + contentType.SubType = "html" + contentType.Suffix = "" + } + // conversion to UTF-8 var responseBody []byte - if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) { - e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType)) + if contentType.TopLevelType == "text" { + e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString) if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) { responseBody, err = e.NewDecoder().Bytes(resp.Body()) if err != nil { @@ -281,20 +351,22 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) { } else { responseBody = resp.Body() } + // update the charset or specify it + contentType.Parameters["charset"] = "UTF-8" } else { responseBody = resp.Body() } - if bytes.Contains(contentType, []byte("xhtml")) { - ctx.SetContentType("text/html; charset=UTF-8") - } else { - ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0])) - } + // + contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS) + + // set the content type + ctx.SetContentType(contentType.String()) switch { - case bytes.Contains(contentType, []byte("css")): + case contentType.SubType == "css" && contentType.Suffix == "": sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody) - case bytes.Contains(contentType, []byte("html")): + case contentType.SubType == "html" && contentType.Suffix == "": sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody) default: if ctx.Request.Header.Peek("Content-Disposition") != nil {