* ignore svg and math tags (most of attributes are ignored in the current implementation)

* add hreflang attribute as a safe one ( allows <meta rel="alternate" hreflang="x" href="" /> )
* if the meta tag contains a http_equiv attribute:
** if the value is safe, output the meta tag with the http_equiv attribute (avoid the <meta content="IE=edge">)
** if the value is not safe, ignore the meta tag
* ignore link tag with unsafe rel attribute (preconnect, prerender, etc... : see http://microformats.org/wiki/existing-rel-values )
* output always output <meta http-equiv="X-UA-Compatible" content="IE=edge"> at the top of the head.

git-svn-id: file:///srv/svn/repo/yukari/trunk@46 f3bd38d9-da89-464d-a02a-eb04e43141b5
This commit is contained in:
alex 2016-11-28 17:38:17 +00:00
parent 9aee27e43d
commit cd487310a3

View File

@ -38,7 +38,9 @@ var UNSAFE_ELEMENTS [][]byte = [][]byte{
[]byte("canvas"),
[]byte("embed"),
//[]byte("iframe"),
[]byte("math"),
[]byte("script"),
[]byte("svg"),
}
var SAFE_ATTRIBUTES [][]byte = [][]byte{
@ -58,6 +60,7 @@ var SAFE_ATTRIBUTES [][]byte = [][]byte{
[]byte("for"),
[]byte("height"),
[]byte("hidden"),
[]byte("hreflang"),
[]byte("id"),
[]byte("lang"),
[]byte("media"),
@ -95,6 +98,37 @@ var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
[]byte("wbr"),
}
var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
[]byte("alternate"),
[]byte("archives"),
[]byte("author"),
[]byte("copyright"),
[]byte("first"),
[]byte("help"),
[]byte("icon"),
[]byte("index"),
[]byte("last"),
[]byte("license"),
[]byte("manifest"),
[]byte("next"),
[]byte("pingback"),
[]byte("prev"),
[]byte("publisher"),
[]byte("search"),
[]byte("shortcut icon"),
[]byte("stylesheet"),
[]byte("up"),
}
var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
// X-UA-Compatible will be added automaticaly, so it can be skipped
[]byte("date"),
[]byte("last-modified"),
[]byte("refresh"), // URL rewrite
// []byte("location"), TODO URL rewrite
[]byte("content-language"),
}
type Proxy struct {
Key []byte
RequestTimeout time.Duration
@ -123,7 +157,9 @@ input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
</style>
`
var HTML_META_CONTENT_TYPE string = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"
var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
`
func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
@ -166,11 +202,18 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
defer fasthttp.ReleaseRequest(req)
req.SetConnectionClose()
requestURIStr := string(requestURI)
reqQuery := parsedURI.Query()
ctx.QueryArgs().VisitAll(func(key, value []byte) {
reqQuery.Add(string(key), string(value))
})
log.Println("getting", requestURIStr)
parsedURI.RawQuery = reqQuery.Encode()
req.SetRequestURI(requestURIStr)
uriStr := parsedURI.String()
log.Println("getting", uriStr)
req.SetRequestURI(uriStr)
req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
resp := fasthttp.AcquireResponse()
@ -209,7 +252,7 @@ func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
}
}
}
error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
error_message := fmt.Sprintf("invalid response: %d", resp.StatusCode())
p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
return
}
@ -408,7 +451,7 @@ func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
}
if bytes.Equal(tag, []byte("head")) {
fmt.Fprintf(out, HTML_META_CONTENT_TYPE)
fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
}
if bytes.Equal(tag, []byte("form")) {
@ -486,7 +529,7 @@ func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
attrName := attr[0]
attrValue := attr[1]
if bytes.Equal(attrName, []byte("rel")) {
if bytes.Equal(attrValue, []byte("dns-prefetch")) {
if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
exclude = true
break
}
@ -517,6 +560,10 @@ func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
attrValue := attr[1]
if bytes.Equal(attrName, []byte("http-equiv")) {
http_equiv = bytes.ToLower(attrValue)
// exclude some <meta http-equiv="..." ..>
if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
return
}
}
if bytes.Equal(attrName, []byte("content")) {
content = attrValue
@ -527,10 +574,6 @@ func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
}
}
if bytes.Equal(http_equiv, []byte("content-type")) {
return
}
out.Write([]byte("<meta"))
urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
@ -546,6 +589,9 @@ func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
}
} else {
if len(http_equiv) > 0 {
fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
}
sanitizeAttrs(rc, out, attrs)
}
out.Write([]byte(">"))
@ -605,7 +651,6 @@ func (rc *RequestConfig) ProxifyURI(uri string) (string, error) {
if rc.Key == nil {
return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
}
return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
}