From cd487310a3602353a2adf1bec4f8a33eae49da3c Mon Sep 17 00:00:00 2001 From: alex Date: Mon, 28 Nov 2016 17:38:17 +0000 Subject: [PATCH] * ignore svg and math tags (most of attributes are ignored in the current implementation) * add hreflang attribute as a safe one ( allows ) * if the meta tag contains a http_equiv attribute: ** if the value is safe, output the meta tag with the http_equiv attribute (avoid the ) ** if the value is not safe, ignore the meta tag * ignore link tag with unsafe rel attribute (preconnect, prerender, etc... : see http://microformats.org/wiki/existing-rel-values ) * output always output at the top of the head. git-svn-id: file:///srv/svn/repo/yukari/trunk@46 f3bd38d9-da89-464d-a02a-eb04e43141b5 --- morty.go | 69 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/morty.go b/morty.go index 942dc7a..5df2a93 100644 --- a/morty.go +++ b/morty.go @@ -38,7 +38,9 @@ var UNSAFE_ELEMENTS [][]byte = [][]byte{ []byte("canvas"), []byte("embed"), //[]byte("iframe"), + []byte("math"), []byte("script"), + []byte("svg"), } var SAFE_ATTRIBUTES [][]byte = [][]byte{ @@ -58,6 +60,7 @@ var SAFE_ATTRIBUTES [][]byte = [][]byte{ []byte("for"), []byte("height"), []byte("hidden"), + []byte("hreflang"), []byte("id"), []byte("lang"), []byte("media"), @@ -95,6 +98,37 @@ var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{ []byte("wbr"), } +var LINK_REL_SAFE_VALUES [][]byte = [][]byte{ + []byte("alternate"), + []byte("archives"), + []byte("author"), + []byte("copyright"), + []byte("first"), + []byte("help"), + []byte("icon"), + []byte("index"), + []byte("last"), + []byte("license"), + []byte("manifest"), + []byte("next"), + []byte("pingback"), + []byte("prev"), + []byte("publisher"), + []byte("search"), + []byte("shortcut icon"), + []byte("stylesheet"), + []byte("up"), +} + +var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{ + // X-UA-Compatible will be added automaticaly, so it can be skipped + []byte("date"), + []byte("last-modified"), + []byte("refresh"), // URL rewrite + // []byte("location"), TODO URL rewrite + []byte("content-language"), +} + type Proxy struct { Key []byte RequestTimeout time.Duration @@ -123,7 +157,9 @@ input[type=checkbox]#mortytoggle:checked ~ div { display: none; } ` -var HTML_META_CONTENT_TYPE string = "" +var HTML_HEAD_CONTENT_TYPE string = ` + +` func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) { @@ -166,11 +202,18 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) { defer fasthttp.ReleaseRequest(req) req.SetConnectionClose() - requestURIStr := string(requestURI) + reqQuery := parsedURI.Query() + ctx.QueryArgs().VisitAll(func(key, value []byte) { + reqQuery.Add(string(key), string(value)) + }) - log.Println("getting", requestURIStr) + parsedURI.RawQuery = reqQuery.Encode() - req.SetRequestURI(requestURIStr) + uriStr := parsedURI.String() + + log.Println("getting", uriStr) + + req.SetRequestURI(uriStr) req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36")) resp := fasthttp.AcquireResponse() @@ -209,7 +252,7 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) { } } } - error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr) + error_message := fmt.Sprintf("invalid response: %d", resp.StatusCode()) p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message)) return } @@ -408,7 +451,7 @@ func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) { } if bytes.Equal(tag, []byte("head")) { - fmt.Fprintf(out, HTML_META_CONTENT_TYPE) + fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE) } if bytes.Equal(tag, []byte("form")) { @@ -486,7 +529,7 @@ func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) { attrName := attr[0] attrValue := attr[1] if bytes.Equal(attrName, []byte("rel")) { - if bytes.Equal(attrValue, []byte("dns-prefetch")) { + if !inArray(attrValue, LINK_REL_SAFE_VALUES) { exclude = true break } @@ -517,6 +560,10 @@ func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) { attrValue := attr[1] if bytes.Equal(attrName, []byte("http-equiv")) { http_equiv = bytes.ToLower(attrValue) + // exclude some + if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) { + return + } } if bytes.Equal(attrName, []byte("content")) { content = attrValue @@ -527,10 +574,6 @@ func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) { } } - if bytes.Equal(http_equiv, []byte("content-type")) { - return - } - out.Write([]byte(" 0 { + fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv) + } sanitizeAttrs(rc, out, attrs) } out.Write([]byte(">")) @@ -605,7 +651,6 @@ func (rc *RequestConfig) ProxifyURI(uri string) (string, error) { if rc.Key == nil { return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil } - return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil }