package main import ( "bytes" "crypto/hmac" "crypto/sha256" "encoding/hex" "errors" "flag" "fmt" "io" "log" "net/url" "regexp" "strings" "time" "unicode/utf8" "github.com/valyala/fasthttp" "golang.org/x/net/html" "golang.org/x/net/html/charset" "golang.org/x/text/encoding" ) const ( STATE_DEFAULT int = 0 STATE_IN_STYLE int = 1 STATE_IN_NOSCRIPT int = 2 ) var CLIENT *fasthttp.Client = &fasthttp.Client{ MaxResponseBodySize: 10 * 1024 * 1024, // 10M } var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?") var UNSAFE_ELEMENTS [][]byte = [][]byte{ []byte("applet"), []byte("canvas"), []byte("embed"), //[]byte("iframe"), []byte("math"), []byte("script"), []byte("svg"), } var SAFE_ATTRIBUTES [][]byte = [][]byte{ []byte("abbr"), []byte("accesskey"), []byte("align"), []byte("alt"), []byte("as"), []byte("autocomplete"), []byte("charset"), []byte("checked"), []byte("class"), []byte("content"), []byte("contenteditable"), []byte("contextmenu"), []byte("dir"), []byte("for"), []byte("height"), []byte("hidden"), []byte("hreflang"), []byte("id"), []byte("lang"), []byte("media"), []byte("method"), []byte("name"), []byte("nowrap"), []byte("placeholder"), []byte("property"), []byte("rel"), []byte("spellcheck"), []byte("tabindex"), []byte("target"), []byte("title"), []byte("translate"), []byte("type"), []byte("value"), []byte("width"), } var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{ []byte("area"), []byte("base"), []byte("br"), []byte("col"), []byte("embed"), []byte("hr"), []byte("img"), []byte("input"), []byte("keygen"), []byte("link"), []byte("meta"), []byte("param"), []byte("source"), []byte("track"), []byte("wbr"), } var LINK_REL_SAFE_VALUES [][]byte = [][]byte{ []byte("alternate"), []byte("archives"), []byte("author"), []byte("copyright"), []byte("first"), []byte("help"), []byte("icon"), []byte("index"), []byte("last"), []byte("license"), []byte("manifest"), []byte("next"), []byte("pingback"), []byte("prev"), []byte("publisher"), []byte("search"), []byte("shortcut icon"), []byte("stylesheet"), []byte("up"), } var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{ // X-UA-Compatible will be added automaticaly, so it can be skipped []byte("date"), []byte("last-modified"), []byte("refresh"), // URL rewrite // []byte("location"), TODO URL rewrite []byte("content-language"), } type Proxy struct { Key []byte RequestTimeout time.Duration } type RequestConfig struct { Key []byte BaseURL *url.URL } var HTML_FORM_EXTENSION string = `` var HTML_BODY_EXTENSION string = `

This is a proxified and sanitized view of the page,
visit original site.

` var HTML_HEAD_CONTENT_TYPE string = ` ` func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) { if appRequestHandler(ctx) { return } requestHash := popRequestParam(ctx, []byte("mortyhash")) requestURI := popRequestParam(ctx, []byte("mortyurl")) if requestURI == nil { p.serveMainPage(ctx, 200, nil) return } if p.Key != nil { if !verifyRequestURI(requestURI, requestHash, p.Key) { // HTTP status code 403 : Forbidden p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`)) return } } parsedURI, err := url.Parse(string(requestURI)) if strings.HasSuffix(parsedURI.Host, ".onion") { // HTTP status code 501 : Not Implemented p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet")) return } if err != nil { // HTTP status code 500 : Internal Server Error p.serveMainPage(ctx, 500, err) return } req := fasthttp.AcquireRequest() defer fasthttp.ReleaseRequest(req) req.SetConnectionClose() requestURIStr := string(requestURI) log.Println("getting", requestURIStr) req.SetRequestURI(requestURIStr) req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")) resp := fasthttp.AcquireResponse() defer fasthttp.ReleaseResponse(resp) req.Header.SetMethodBytes(ctx.Method()) if ctx.IsPost() || ctx.IsPut() { req.SetBody(ctx.PostBody()) } err = CLIENT.DoTimeout(req, resp, p.RequestTimeout) if err != nil { if err == fasthttp.ErrTimeout { // HTTP status code 504 : Gateway Time-Out p.serveMainPage(ctx, 504, err) } else { // HTTP status code 500 : Internal Server Error p.serveMainPage(ctx, 500, err) } return } if resp.StatusCode() != 200 { switch resp.StatusCode() { case 301, 302, 303, 307, 308: loc := resp.Header.Peek("Location") if loc != nil { rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI} url, err := rc.ProxifyURI(loc) if err == nil { ctx.SetStatusCode(resp.StatusCode()) ctx.Response.Header.Add("Location", url) log.Println("redirect to", string(loc)) return } } } error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr) p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message)) return } contentType := resp.Header.Peek("Content-Type") if contentType == nil { // HTTP status code 503 : Service Unavailable p.serveMainPage(ctx, 503, errors.New("invalid content type")) return } if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) { // HTTP status code 403 : Forbidden p.serveMainPage(ctx, 403, errors.New("forbidden content type")) return } contentInfo := bytes.SplitN(contentType, []byte(";"), 2) var responseBody []byte if len(contentInfo) == 2 && bytes.Contains(contentInfo[0], []byte("text")) { e, ename, _ := charset.DetermineEncoding(resp.Body(), string(contentType)) if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) { responseBody, err = e.NewDecoder().Bytes(resp.Body()) if err != nil { // HTTP status code 503 : Service Unavailable p.serveMainPage(ctx, 503, err) return } } else { responseBody = resp.Body() } } else { responseBody = resp.Body() } if bytes.Contains(contentType, []byte("xhtml")) { ctx.SetContentType("text/html; charset=UTF-8") } else { ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0])) } switch { case bytes.Contains(contentType, []byte("css")): sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody) case bytes.Contains(contentType, []byte("html")): sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody) default: if ctx.Request.Header.Peek("Content-Disposition") != nil { ctx.Response.Header.AddBytesV("Content-Disposition", ctx.Request.Header.Peek("Content-Disposition")) } ctx.Write(responseBody) } } func appRequestHandler(ctx *fasthttp.RequestCtx) bool { // serve robots.txt if bytes.Equal(ctx.Path(), []byte("/robots.txt")) { ctx.SetContentType("text/plain") ctx.Write([]byte("User-Agent: *\nDisallow: /\n")) return true } return false } func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte { param := ctx.QueryArgs().PeekBytes(paramName) if param == nil { param = ctx.PostArgs().PeekBytes(paramName) if param != nil { ctx.PostArgs().DelBytes(paramName) } } else { ctx.QueryArgs().DelBytes(paramName) } return param } func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) { // TODO urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1) if urlSlices == nil { out.Write(css) return } startIndex := 0 for _, s := range urlSlices { urlStart := s[4] urlEnd := s[5] if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil { out.Write(css[startIndex:urlStart]) out.Write([]byte(uri)) startIndex = urlEnd } else { log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd])) } } if startIndex < len(css) { out.Write(css[startIndex:len(css)]) } } func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) { r := bytes.NewReader(htmlDoc) decoder := html.NewTokenizer(r) decoder.AllowCDATA(true) unsafeElements := make([][]byte, 0, 8) state := STATE_DEFAULT for { token := decoder.Next() if token == html.ErrorToken { err := decoder.Err() if err != io.EOF { log.Println("failed to parse HTML:") } break } if len(unsafeElements) == 0 { switch token { case html.StartTagToken, html.SelfClosingTagToken: tag, hasAttrs := decoder.TagName() safe := !inArray(tag, UNSAFE_ELEMENTS) if !safe { if !inArray(tag, SELF_CLOSING_ELEMENTS) { var unsafeTag []byte = make([]byte, len(tag)) copy(unsafeTag, tag) unsafeElements = append(unsafeElements, unsafeTag) } break } if bytes.Equal(tag, []byte("base")) { for { attrName, attrValue, moreAttr := decoder.TagAttr() if bytes.Equal(attrName, []byte("href")) { parsedURI, err := url.Parse(string(attrValue)) if err == nil { rc.BaseURL = parsedURI } } if !moreAttr { break } } break } if bytes.Equal(tag, []byte("noscript")) { state = STATE_IN_NOSCRIPT break } var attrs [][][]byte if hasAttrs { for { attrName, attrValue, moreAttr := decoder.TagAttr() attrs = append(attrs, [][]byte{ attrName, attrValue, []byte(html.EscapeString(string(attrValue))), }) if !moreAttr { break } } } if bytes.Equal(tag, []byte("link")) { sanitizeLinkTag(rc, out, attrs) break } if bytes.Equal(tag, []byte("meta")) { sanitizeMetaTag(rc, out, attrs) break } fmt.Fprintf(out, "<%s", tag) if hasAttrs { sanitizeAttrs(rc, out, attrs) } if token == html.SelfClosingTagToken { fmt.Fprintf(out, " />") } else { fmt.Fprintf(out, ">") if bytes.Equal(tag, []byte("style")) { state = STATE_IN_STYLE } } if bytes.Equal(tag, []byte("head")) { fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE) } if bytes.Equal(tag, []byte("form")) { var formURL *url.URL for _, attr := range attrs { if bytes.Equal(attr[0], []byte("action")) { formURL, _ = url.Parse(string(attr[1])) formURL = mergeURIs(rc.BaseURL, formURL) break } } if formURL == nil { formURL = rc.BaseURL } urlStr := formURL.String() var key string if rc.Key != nil { key = hash(urlStr, rc.Key) } fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key) } case html.EndTagToken: tag, _ := decoder.TagName() writeEndTag := true switch string(tag) { case "body": fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String()) case "style": state = STATE_DEFAULT case "noscript": state = STATE_DEFAULT writeEndTag = false } // skip noscript tags - only the tag, not the content, because javascript is sanitized if writeEndTag { fmt.Fprintf(out, "", tag) } case html.TextToken: switch state { case STATE_DEFAULT: fmt.Fprintf(out, "%s", decoder.Raw()) case STATE_IN_STYLE: sanitizeCSS(rc, out, decoder.Raw()) case STATE_IN_NOSCRIPT: sanitizeHTML(rc, out, decoder.Raw()) } case html.CommentToken: // ignore comment. TODO : parse IE conditional comment case html.DoctypeToken: out.Write(decoder.Raw()) } } else { switch token { case html.StartTagToken: tag, _ := decoder.TagName() if inArray(tag, UNSAFE_ELEMENTS) { unsafeElements = append(unsafeElements, tag) } case html.EndTagToken: tag, _ := decoder.TagName() if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) { unsafeElements = unsafeElements[:len(unsafeElements)-1] } } } } } func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) { exclude := false for _, attr := range attrs { attrName := attr[0] attrValue := attr[1] if bytes.Equal(attrName, []byte("rel")) { if !inArray(attrValue, LINK_REL_SAFE_VALUES) { exclude = true break } } if bytes.Equal(attrName, []byte("as")) { if bytes.Equal(attrValue, []byte("script")) { exclude = true break } } } if !exclude { out.Write([]byte("")) } } func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) { var http_equiv []byte var content []byte for _, attr := range attrs { attrName := attr[0] attrValue := attr[1] if bytes.Equal(attrName, []byte("http-equiv")) { http_equiv = bytes.ToLower(attrValue) // exclude some if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) { return } } if bytes.Equal(attrName, []byte("content")) { content = attrValue } if bytes.Equal(attrName, []byte("charset")) { // exclude return } } out.Write([]byte(" if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) { if contentUrl[0] == contentUrl[len(contentUrl)-1] { contentUrl = contentUrl[1 : len(contentUrl)-1] } } // output proxify result if uri, err := rc.ProxifyURI(contentUrl); err == nil { fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri) } } else { if len(http_equiv) > 0 { fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv) } sanitizeAttrs(rc, out, attrs) } out.Write([]byte(">")) } func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) { for _, attr := range attrs { sanitizeAttr(rc, out, attr[0], attr[1], attr[2]) } } func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) { if inArray(attrName, SAFE_ATTRIBUTES) { fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue) return } switch string(attrName) { case "src", "href", "action": if uri, err := rc.ProxifyURI(attrValue); err == nil { fmt.Fprintf(out, " %s=\"%s\"", attrName, uri) } else { log.Println("cannot proxify uri:", string(attrValue)) } case "style": cssAttr := bytes.NewBuffer(nil) sanitizeCSS(rc, cssAttr, attrValue) fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes()))) } } func mergeURIs(u1, u2 *url.URL) *url.URL { return u1.ResolveReference(u2) } // Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme. // avoid memory allocation (except for the scheme) func sanitizeURI(uri []byte) ([]byte, string) { first_rune_index := 0 first_rune_seen := false scheme_last_index := -1 buffer := bytes.NewBuffer(make([]byte, 0, 10)) // remove trailing space and special characters uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20") // loop over byte by byte for i, c := range uri { // ignore special characters and space (c <= 32) if c > 32 { // append to the lower case of the rune to buffer if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' { c = c + 'a' - 'A' } buffer.WriteByte(c) // update the first rune index that is not a special rune if !first_rune_seen { first_rune_index = i first_rune_seen = true } if c == ':' { // colon rune found, we have found the scheme scheme_last_index = i break } else if c == '/' || c == '?' || c == '\\' || c == '#' { // special case : most probably a relative URI break } } } if scheme_last_index != -1 { // scheme found // copy the "lower case without special runes scheme" before the ":" rune scheme_start_index := scheme_last_index - buffer.Len() + 1 copy(uri[scheme_start_index:], buffer.Bytes()) // and return the result return uri[scheme_start_index:], buffer.String() } else { // scheme NOT found return uri[first_rune_index:], "" } } func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) { // sanitize URI uri, scheme := sanitizeURI(uri) // remove javascript protocol if scheme == "javascript:" { return "", nil } // TODO check malicious data: - e.g. data:script if scheme == "data:" { if bytes.HasPrefix(uri, []byte("data:image/png")) || bytes.HasPrefix(uri, []byte("data:image/jpeg")) || bytes.HasPrefix(uri, []byte("data:image/pjpeg")) || bytes.HasPrefix(uri, []byte("data:image/gif")) || bytes.HasPrefix(uri, []byte("data:image/webp")) { // should be safe return string(uri), nil } else { // unsafe data return "", nil } } // parse the uri u, err := url.Parse(string(uri)) if err != nil { return "", err } // get the fragment (with the prefix "#") fragment := "" if len(u.Fragment) > 0 { fragment = "#" + u.Fragment } // reset the fragment: it is not included in the mortyurl u.Fragment = "" // merge the URI with the document URI u = mergeURIs(rc.BaseURL, u) // simple internal link ? // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment if u.Scheme == rc.BaseURL.Scheme && (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) && u.Host == rc.BaseURL.Host && u.Path == rc.BaseURL.Path && u.RawQuery == rc.BaseURL.RawQuery { // the fragment is the only difference between the document URI and the uri parameter return fragment, nil } // return full URI and fragment (if not empty) morty_uri := u.String() if rc.Key == nil { return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil } return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil } func inArray(b []byte, a [][]byte) bool { for _, b2 := range a { if bytes.Equal(b, b2) { return true } } return false } func hash(msg string, key []byte) string { mac := hmac.New(sha256.New, key) mac.Write([]byte(msg)) return hex.EncodeToString(mac.Sum(nil)) } func verifyRequestURI(uri, hashMsg, key []byte) bool { h := make([]byte, hex.DecodedLen(len(hashMsg))) _, err := hex.Decode(h, hashMsg) if err != nil { log.Println("hmac error:", err) return false } mac := hmac.New(sha256.New, key) mac.Write(uri) return hmac.Equal(h, mac.Sum(nil)) } func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) { ctx.SetContentType("text/html") ctx.SetStatusCode(statusCode) ctx.Write([]byte(` MortyProxy

MortyProxy

`)) if err != nil { log.Println("error:", err) ctx.Write([]byte("

Error: ")) ctx.Write([]byte(html.EscapeString(err.Error()))) ctx.Write([]byte("

")) } if p.Key == nil { ctx.Write([]byte(`
Visit url:
`)) } else { ctx.Write([]byte(`

Warning! This instance does not support direct URL opening.

`)) } ctx.Write([]byte(`
`)) } func main() { listen := flag.String("listen", "127.0.0.1:3000", "Listen address") key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable") ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests") requestTimeout := flag.Uint("timeout", 2, "Request timeout") flag.Parse() if *ipv6 { CLIENT.Dial = fasthttp.DialDualStack } p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second} if *key != "" { p.Key = []byte(*key) } log.Println("listening on", *listen) if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil { log.Fatal("Error in ListenAndServe:", err) } }