diff --git a/.env.default b/.env.default new file mode 100644 index 0000000..cf071ec --- /dev/null +++ b/.env.default @@ -0,0 +1,13 @@ +COMMON_APP_ENV=production +COMMON_DEV_TOOL_URL=http://chromedp:9222 +URLSCAN_API_KEY= +URLSCAN_API_URL=https://urlscan.io/api +GOOGLE_SAFE_BROWSING_API_KEY= +GOOGLE_SAFE_BROWSING_API_URL=https://safebrowsing.googleapis.com/v4/threatMatches:find +GOOGLE_TRANSPARENCYREPORT_API_URL=https://transparencyreport.google.com/transparencyreport/api/v3/safebrowsing/ +# These are used for requesting to external APIs. +COMMON_MAX_IDLE_CONNS=200 +COMMON_MAX_IDLE_CONN_SPER_HOST=200 +COMMON_MAX_CONNS_PER_HOST=200 +COMMON_IDLE_CONN_TIMEOUT=60 +COMMON_DISABLE_COMPRESSION=true \ No newline at end of file diff --git a/.realize.yaml b/.realize.yaml new file mode 100755 index 0000000..a9c3809 --- /dev/null +++ b/.realize.yaml @@ -0,0 +1,23 @@ +settings: + legacy: + force: false + interval: 0s +schema: +- name: studio-abuse-detector + path: . + commands: + install: + status: true + method: go build -o app + run: + status: true + method: ./app + watcher: + extensions: + - go + paths: + - / + ignored_paths: + - .git + - .realize + - vendor diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1f86ce5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM golang:1.14.9-alpine3.12 as build + +WORKDIR /go/app + +COPY . . +COPY .env . + +RUN apk add --no-cache git \ + && go build -o app + +FROM alpine:3.12.0 + +WORKDIR /app + +COPY --from=build /go/app/app . + +RUN apk add --update --no-cache go git \ + && export GOPATH=/root/go \ + && export PATH=${GOPATH}/bin:/usr/local/go/bin:$PATH \ + && export GOBIN=$GOROOT/bin \ + && mkdir -p ${GOPATH}/src ${GOPATH}/bin \ + && addgroup go \ + && adduser -D -G go go \ + && chown -R go:go /app/app \ + && chmod +x /app/app + +CMD ["go", "run", "main.go"] \ No newline at end of file diff --git a/DockerfileChrome b/DockerfileChrome new file mode 100644 index 0000000..fab818d --- /dev/null +++ b/DockerfileChrome @@ -0,0 +1,3 @@ +FROM zenika/alpine-chrome + +CMD ["--no-sandbox", "--remote-debugging-address=0.0.0.0", "--remote-debugging-port=9222"] diff --git a/README.md b/README.md index 74a02e6..bd57c8c 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,102 @@ # Abuse Detector -This application is for previnting phishing sites are created on Studio. +This application is for preventing phishing sites are created on Studio. ## Requirements - Go 1.14.9 >= -- Docker -- Docker Compose +- Docker 2.4.0.0 >= +- Docker Compose 1.27.4 >= +## Usage +### Request verification +This API verifies if the site does not include malicious links, such as fishing. +``` +http://localhost:3000/verify?url=https://www.google.com/ +``` +The response would look like below if the site is not malicious. +``` +{ + "strategyName": "", + "link": [], + "malicious": false, + "statusCode": 200, + "error": null +} +``` +If it's malicious, the response looks like below. +``` +{ + "strategyName": "TransparencyReportVerifyStrategy", + "link": ["http://sucursalvirtualpersonas-sa.com"], + "malicious": true, + "statusCode": 200, + "error": null +} +``` ## How to build ```shell script -go build +go build *.go ``` -## How to run -```shell script -go run main.go -``` \ No newline at end of file +## How to run for Development +1. Create `.env` based off from `.env.default`. For API keys required, please refer documents below in this README. +1. In `.env` file, Remove `production` string from `COMMON_APP_ENV` as follows. + ``` + COMMON_APP_ENV= + ``` +1. Start Chrome Headless Server + ``` + docker run -d -p 9222:9222 --rm --name headless-shell --shm-size 2G chromedp/headless-shell + ``` +1. Run server as below. `realize` command allows Hot reloading. + ```shell script + realize start + ``` + +## How to run all tests +``` +go test -v -race -run=. -bench=. ./... +``` + +##How to run for production +1. Create `.env` based off from `.env.default` +1. Set API Keys accordingly. +1. Run command below. + ``` + docker-compose up + ``` + +## How to build Docker image +This is how to build and confirm the image is built correctly. +``` +docker build -t studio-abuse-detector . +docker run -p 3000:3000 -d --name studio-abuse-detector studio-abuse-detector:latest +curl localhost:3000 +``` + +## Opearation Related +### How to remove all images including running +```~~~~ +docker rm -f `docker ps -qa` +``` +### How to access an image +``` +docker-compose exec app /bin/sh +``` + +## Appendix +- [cdp, Chrome Dev Tools Protocl](https://github.com/mafredri/cdp) +- [Headless Chrome server base for Dockerfile, Zenika/alpine-chrome](https://github.com/Zenika/alpine-chrome) + +### How to get API key for urlscan.io +1. Go to `https://urlscan.io/` and create an account. +1. Go to [Settings & API](https://urlscan.io/user/profile/) and create an API Key +1. Copy the `Key` and set it to `URLSCAN_API_KEY` in the `.env` file + +### How to get API key for Google Safe Browsing API +1. Access to [Google API Console](https://console.developers.google.com/) and create a project +1. Create API key in the project. +1. Look for `Google Safe Browsing API` in `Liberary` tab and add it for the API Key created. +1. Copy the `Key` and set it to `GOOGLE_SAFE_BROWSING_API_KEY` in the `.env` file + +## Caveat +- Chrome Headless server in use may need load balancing for a more massive load of access. +- Test links are real phishing sites for now. They become offline or removed in the short term, so tests highly likely to fail. \ No newline at end of file diff --git a/cmd/verify/fetch_site.go b/cmd/verify/fetch_site.go deleted file mode 100644 index dc0c696..0000000 --- a/cmd/verify/fetch_site.go +++ /dev/null @@ -1,46 +0,0 @@ -package verify - -import ( - "log" - "net/http" - "net/url" - "strings" - "time" -) - -// Validate schema -// return true if the schema is https or false -func IsHttps(urlStr string) (bool, error) { - parsedUrl, err := url.Parse(urlStr) - - if nil != err { - log.Fatal(err) - return false, err - } - - return strings.EqualFold(parsedUrl.Scheme,"https"), nil -} - -// Fetch URL response -// Automatically detect https or http -func Fetch(url string) (resp *http.Response, err error) { - ret, err := IsHttps(url) - if err != nil { - return &http.Response{}, err - } - - if true == ret { - // HTTPS - tr := &http.Transport{ - MaxIdleConns: 10, - IdleConnTimeout: 30 * time.Second, - DisableCompression: true, - } - client := &http.Client{Transport: tr} - return client.Get(url) - } else { - // HTTP - return http.Get(url) - } -} - diff --git a/cmd/verify/verify.go b/cmd/verify/verify.go deleted file mode 100644 index a9054cb..0000000 --- a/cmd/verify/verify.go +++ /dev/null @@ -1,47 +0,0 @@ -package verify - -import ( - "errors" - "fmt" - "github.com/PuerkitoBio/goquery" - "log" -) - -func Parse(url string, links *[]string) (bool, error) { - // Request the HTML page. - res, err := Fetch(url) - if err != nil { - log.Fatal(err) - return false, err - } - - defer res.Body.Close() - if res.StatusCode != 200 { - msg := fmt.Sprintf("status code error: %d %s", res.StatusCode, res.Status) - log.Fatal(msg) - return false, errors.New(msg) - } - - // Load the HTML document - doc, err := goquery.NewDocumentFromReader(res.Body) - if err != nil { - log.Fatal(err) - return false, err - } - - // Find the review items - doc.Find("a").Each(func(i int, s *goquery.Selection) { - // For each item found, get the band and title - attr, exists := s.Attr("href") - - if true == exists { - *links = append(*links, attr) - } - }) - - if len(*links) <= 0 { - return false, nil - } - - return true, nil -} diff --git a/cmd/verify/verity_test.go b/cmd/verify/verity_test.go deleted file mode 100644 index 72a59e8..0000000 --- a/cmd/verify/verity_test.go +++ /dev/null @@ -1,19 +0,0 @@ -package verify - -import ( - "github.com/kr/pretty" - "testing" -) - -func TestParse(t *testing.T) { - - url := "https://www.liferay.co.jp/" - links := []string{""} - has, err := Parse(url, &links) - - if has == false || err != nil { - t.Errorf("has %t error %x", has, err) - } - - t.Logf("links %+v", pretty.Formatter(links)) -} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..92cbcaa --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +version: "3" +services: + chromedp: + build: + context: . + dockerfile: DockerfileChrome + ports: + - 9222:9222 + app: + build: + context: . + dockerfile: Dockerfile + ports: + - 3000:3000 + volumes: + - ./:/app \ No newline at end of file diff --git a/go.mod b/go.mod index 9a64987..3ecb7fd 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,18 @@ go 1.14 require ( github.com/PuerkitoBio/goquery v1.5.1 + github.com/chromedp/cdproto v0.0.0-20200116234248-4da64dd111ac + github.com/chromedp/chromedp v0.5.3 github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-resty/resty/v2 v2.3.0 + github.com/joho/godotenv v1.3.0 + github.com/kelseyhightower/envconfig v1.4.0 github.com/kr/pretty v0.2.1 github.com/labstack/echo/v4 v4.1.17 + github.com/mafredri/cdp v0.29.2 + github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 + github.com/pkg/errors v0.8.1 + github.com/stretchr/testify v1.4.0 + github.com/thoas/go-funk v0.7.0 + gopkg.in/yaml.v2 v2.3.0 // indirect ) diff --git a/go.sum b/go.sum index e76e489..4395533 100644 --- a/go.sum +++ b/go.sum @@ -2,12 +2,34 @@ github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154Oa github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/chromedp/cdproto v0.0.0-20200116234248-4da64dd111ac h1:T7V5BXqnYd55Hj/g5uhDYumg9Fp3rMTS6bykYtTIFX4= +github.com/chromedp/cdproto v0.0.0-20200116234248-4da64dd111ac/go.mod h1:PfAWWKJqjlGFYJEidUM6aVIWPr0EpobeyVWEEmplX7g= +github.com/chromedp/chromedp v0.5.3 h1:F9LafxmYpsQhWQBdCs+6Sret1zzeeFyHS5LkRF//Ffg= +github.com/chromedp/chromedp v0.5.3/go.mod h1:YLdPtndaHQ4rCpSpBG+IPpy9JvX0VD+7aaLxYgYj28w= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/go-resty/resty/v2 v2.3.0 h1:JOOeAvjSlapTT92p8xiS19Zxev1neGikoHsXJeOq8So= +github.com/go-resty/resty/v2 v2.3.0/go.mod h1:UpN9CgLZNsv4e9XG50UU8xdI0F43UQ4HmxLBDwaroHU= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= +github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= +github.com/kelseyhightower/envconfig v1.4.0/go.mod h1:cccZRl6mQpaq41TPp5QxidR+Sa3axMbJDNb//FQX6Gg= +github.com/knq/sysutil v0.0.0-20191005231841-15668db23d08 h1:V0an7KRw92wmJysvFvtqtKMAPmvS5O0jtB0nYo6t+gs= +github.com/knq/sysutil v0.0.0-20191005231841-15668db23d08/go.mod h1:dFWs1zEqDjFtnBXsd1vPOZaLsESovai349994nHx3e0= github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -17,6 +39,11 @@ github.com/labstack/echo/v4 v4.1.17 h1:PQIBaRplyRy3OjwILGkPg89JRtH2x5bssi59G2EL3 github.com/labstack/echo/v4 v4.1.17/go.mod h1:Tn2yRQL/UclUalpb5rPdXDevbkJ+lp/2svdyFBg6CHQ= github.com/labstack/gommon v0.3.0 h1:JEeO0bvc78PKdyHxloTKiF8BD5iGrH8T6MSeGvSgob0= github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/mafredri/cdp v0.29.2 h1:8lZnyx/A1yLmhkqMK3hJP0JgIjllF8edRsWUtMzu1cc= +github.com/mafredri/cdp v0.29.2/go.mod h1:71D84qPmWUvBWYj24Zp+U69mrUof4o8qL2X1fQJ/lHc= +github.com/mafredri/go-lint v0.0.0-20180911205320-920981dfc79e/go.mod h1:k/zdyxI3q6dup24o8xpYjJKTCf2F7rfxLp6w/efTiWs= +github.com/mailru/easyjson v0.7.0 h1:aizVhC/NAAcKWb+5QsU1iNOZb4Yws5UO2I+aIprQITM= +github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.7 h1:bQGKb3vps/j0E9GfJQ03JyhRuxsvdAanXlT9BTw3mdw= github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= @@ -24,25 +51,40 @@ github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 h1:lDH9UUVJtmYCjyT0CI4q8xvlXPxeZ0gYCVvWbmPlp88= +github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/thoas/go-funk v0.7.0 h1:GmirKrs6j6zJbhJIficOsz2aAI7700KsU/5YrdHRM1Y= +github.com/thoas/go-funk v0.7.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a h1:vclmkQCjlDX5OydZ9wv8rBCcS0QyQY66Mpf/7BZbInM= golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -56,7 +98,14 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200601175630-2caf76543d99/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/main.go b/main.go index dffd8a4..74fbc88 100644 --- a/main.go +++ b/main.go @@ -1,13 +1,45 @@ -package detector +package main import ( + "context" "net/http" + "time" + "github.com/joho/godotenv" "github.com/labstack/echo/v4" "github.com/labstack/echo/v4/middleware" + "github.com/op/go-logging" + "studio.design/studio-abuse-detector/pkg/verify" ) +var log = logging.MustGetLogger("verify") + +var logFmt = logging.MustStringFormatter( + `%{color}%{time:15:04:05.000} PID=%{pid} MOD=%{module} PKG=%{shortpkg} %{shortfile} FUNC=%{shortfunc} ▶ %{level:.4s} %{id:03x} %{color:reset} %{message}`, +) + +type VerifyResponse struct { + StrategyName string `json:"strategyName" xml:"strategyName"` + Links []string `json:"link" xml:"link"` + Malicious bool `json:"malicious" xml:"malicious"` + StatusCode int `json:"statusCode" xml:"statusCode"` + Error error `json:"error" xml:"error"` +} + func main() { + err := godotenv.Load() + if err != nil { + // log.Fatal("Error loading .env file") + log.Error(err) + } + + router := NewRouter() + + // Start server + router.Logger.Fatal(router.Start(":3000")) +} + +func NewRouter() *echo.Echo { // Echo instance e := echo.New() @@ -17,14 +49,79 @@ func main() { e.Use(middleware.CORS()) // Routes - e.GET("/verify", verify) + e.GET("/verify", Verify) - // Start server - e.Logger.Fatal(e.Start(":3000")) + return e } // Handler -func verify(c echo.Context) error { - return c.String(http.StatusOK, "Hello, World!") -} +func Verify(c echo.Context) error { + url := c.QueryParam("url") + + strategies := []verify.Verify{ + verify.NewTransparencyReportVerifyStrategy(), + // verify.NewUrlScanVerifyStrategy(), + } + + // Set up channels + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + errCh := make(chan VerifyResponse, len(strategies)) + retCh := make(chan VerifyResponse, len(strategies)) + + for _, strategy := range strategies { + + // Run each verification concurrent. + // Results are verified by the response order. + // Return the result as soon as the site is confirmed, + // including malicious links. + go func() { + vr := &VerifyResponse{ + Links: []string{}, + Malicious: false, + Error: nil, + } + ret, err := strategy.Do(ctx, url) + + if err != nil { + vr.Error = err + vr.Links = ret.MaliciousLinks + vr.StrategyName = ret.StrategyName + vr.StatusCode = ret.StatusCode + errCh <- *vr + } else { + vr.Links = ret.MaliciousLinks + vr.Malicious = ret.Malicious + vr.StrategyName = ret.StrategyName + vr.StatusCode = ret.StatusCode + retCh <- *vr + } + }() + } + + for _, n := range strategies { + select { + case err := <-errCh: + cancel() + return c.JSON(http.StatusOK, err) + + case ret := <-retCh: + if true == ret.Malicious { + cancel() + log.Infof("Return from [%d] %s", n, ret.StrategyName) + return c.JSON(http.StatusOK, ret) + } + // Cancel is returned when either Timeout or Cancel occur + case <-ctx.Done(): + <-errCh + return ctx.Err() + } + } + + // No malicious links are found + return c.JSON(http.StatusOK, &VerifyResponse{ + Links: []string{}, + Malicious: false, + Error: nil, + }) +} diff --git a/pkg/verify/env_manager.go b/pkg/verify/env_manager.go new file mode 100644 index 0000000..594a7e3 --- /dev/null +++ b/pkg/verify/env_manager.go @@ -0,0 +1,24 @@ +package verify + +import ( + "os" + "regexp" + + "github.com/joho/godotenv" +) + +const projectDirName = "studio-abuse-detector" + +// LoadEnv loads env vars from .env +// https://github.com/joho/godotenv/issues/43 +func LoadEnv() { + re := regexp.MustCompile(`^(.*` + projectDirName + `)`) + cwd, _ := os.Getwd() + rootPath := re.Find([]byte(cwd)) + + err := godotenv.Load(string(rootPath) + `/.env`) + if err != nil { + log.Fatal("Problem loading .env file", err, cwd) + os.Exit(-1) + } +} diff --git a/pkg/verify/fetch_site.go b/pkg/verify/fetch_site.go new file mode 100644 index 0000000..bbe1c48 --- /dev/null +++ b/pkg/verify/fetch_site.go @@ -0,0 +1,67 @@ +package verify + +import ( + "crypto/tls" + "net/http" + "net/url" + "strings" + "time" +) + +const ( + MaxIdleConns = 200 + MaxIdleConnsPerHost = 200 + MaxConnsPerHost = 200 + IdleConnTimeout = 60 * time.Second + DisableCompression = true +) + +// Validate schema +// return true if the schema is https or false +func IsHttps(urlStr string) (bool, error) { + parsedUrl, err := url.Parse(urlStr) + + if nil != err { + log.Error(err) + return false, err + } + + return strings.EqualFold(parsedUrl.Scheme, "https"), nil +} + +// Fetch URL response +// Automatically detect https or http +// TODO : need to replace this to below. +// https://future-architect.github.io/articles/20190713/ +//import ( +// "https://godoc.org/golang.org/x/net/context/ctxhttp" +//) +// +//func accessSHS(ctx context.Context) { +// // ctxを第一引数で渡す +// res, err := ctxhttp.Get(ctx, nil, "https://shs.sh") +//} +func Fetch(url string) (resp *http.Response, err error) { + ret, err := IsHttps(url) + + if err != nil { + return &http.Response{}, err + } + + if true == ret { + // HTTPS + tr := &http.Transport{ + MaxIdleConns: MaxIdleConns, + MaxIdleConnsPerHost: MaxIdleConnsPerHost, + MaxConnsPerHost: MaxConnsPerHost, + IdleConnTimeout: IdleConnTimeout, + DisableCompression: DisableCompression, + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + client := &http.Client{Transport: tr} + return client.Get(url) + } else { + // HTTP + return http.Get(url) + } +} diff --git a/cmd/verify/fetch_site_test.go b/pkg/verify/fetch_site_test.go similarity index 100% rename from cmd/verify/fetch_site_test.go rename to pkg/verify/fetch_site_test.go diff --git a/pkg/verify/scrape.go b/pkg/verify/scrape.go new file mode 100644 index 0000000..76b841b --- /dev/null +++ b/pkg/verify/scrape.go @@ -0,0 +1,96 @@ +package verify + +import ( + "context" + "os" + "strings" + + "github.com/mafredri/cdp/devtool" + + "github.com/chromedp/cdproto/cdp" + "github.com/chromedp/chromedp" +) + +// Scrape links from a Url with Chrome headless browser +// chromedp uses the external API. For more details, please refer the link below. +// https://docs.browserless.io/docs/go.html#docsNav +func Scrape(ctx context.Context, url string, links *[]string) (bool, error) { + // Create a new goroutine and send request there. + // The result goes to errCh channel. + errCh := make(chan error, 1) + var ctxLocal context.Context + + env := os.Getenv("COMMON_APP_ENV") + if env == "production" { + devToolURL := os.Getenv("COMMON_DEV_TOOL_URL") + + // Use the DevTools HTTP/JSON API to manage targets (e.g. pages, webworkers). + devt := devtool.New(devToolURL) + pt, err := devt.Get(ctx, devtool.Page) + if err != nil { + pt, err = devt.Create(ctx) + if err != nil { + errCh <- err + } + } + + actxt, cancelActxt := chromedp.NewRemoteAllocator(ctx, pt.WebSocketDebuggerURL) + defer cancelActxt() + + ctx, _ := chromedp.NewContext(actxt) // + ctxLocal = ctx + } else { + ctx, _ := chromedp.NewContext(ctx) + ctxLocal = ctx + } + + var res []*cdp.Node + allHtml := `//a` + + go func() { + err := chromedp.Run(ctxLocal, + chromedp.Navigate(url), + chromedp.Nodes(allHtml, &res), + ) + + errCh <- err + }() + + select { + case err := <-errCh: + if err != nil { + log.Error(err) + return false, err + } + + // Timeout or Cancel comes here. + case <-ctx.Done(): + <-errCh + return false, ctx.Err() + } + + // log.Debug(NodeValues(res)) + *links = NodeValues(res) + + return true, nil +} + +func FindHref(attrs []string) (string, bool) { + for _, c := range attrs { + if strings.HasPrefix(c, "http") { + return c, true + } + } + return "", false +} + +func NodeValues(nodes []*cdp.Node) []string { + var vs []string + for _, n := range nodes { + val, ret := FindHref(n.Attributes) + if true == ret { + vs = append(vs, val) + } + } + return vs +} diff --git a/pkg/verify/scrape_test.go b/pkg/verify/scrape_test.go new file mode 100644 index 0000000..51dbb06 --- /dev/null +++ b/pkg/verify/scrape_test.go @@ -0,0 +1,63 @@ +package verify + +import ( + "context" + "testing" + "time" +) + +func TestScrape(t *testing.T) { + LoadEnv() + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + var links = []string{} + url := `https://www.google.com/` + ret, err := Scrape(ctx, url, &links) + + if err != nil || true != ret { + t.Errorf("Url <%s>, %v", url, err) + } + + if len(links) <= 0 { + t.Error("The length of links is invalid.") + } +} + +func TestFindHref(t *testing.T) { + LoadEnv() + + cases := []struct { + hrefs []string + result bool + retStr string + }{ + { + hrefs: []string{"https://vodafone-billsupport.com/"}, + result: true, + retStr: "https://vodafone-billsupport.com/", + }, + { + hrefs: []string{"href"}, + result: false, + retStr: "", + }, + { + hrefs: []string{"href", "https://vodafone-billsupport.com/"}, + result: true, + retStr: "https://vodafone-billsupport.com/", + }, + { + hrefs: []string{"https://vodafone-billsupport.com/", "href", "http://example.com"}, + result: true, + retStr: "https://vodafone-billsupport.com/", + }, + } + for _, c := range cases { + ret, stat := FindHref(c.hrefs) + if stat != c.result || ret != c.retStr { + t.Errorf("ret<%s> stat<%t> should fetch <%s>\n", ret, c.result, c.retStr) + } + } +} diff --git a/pkg/verify/verify.go b/pkg/verify/verify.go new file mode 100644 index 0000000..e562e4b --- /dev/null +++ b/pkg/verify/verify.go @@ -0,0 +1,110 @@ +package verify + +import ( + "context" + "net/url" + + "github.com/op/go-logging" +) + +var log = logging.MustGetLogger("verify") + +var logFmt = logging.MustStringFormatter( + `%{color}%{time:15:04:05.000} PID=%{pid} MOD=%{module} PKG=%{shortpkg} %{shortfile} FUNC=%{shortfunc} ▶ %{level:.4s} %{id:03x} %{color:reset} %{message}`, +) + +// Verify Interface +type Verify interface { + Request(ctx context.Context, url string) Response + Do(ctx context.Context, url string) (Result, error) +} + +// Verify Result +type Result struct { + StrategyName string + Malicious bool + StatusCode int + Error error + MaliciousLinks []string +} + +// Verify Response +type Response struct { + Result bool + StatusCode int + Error error + Malicious bool +} + +type HostNames struct { + URL string + HostName string +} + +type Env struct { + DevToolUrl string + MaxIdleConns int + MaxIdleConnsPerHost int + MaxConnsPerHost int + IdleConnTimeout int + DisableCompression bool +} + +var veryfyEnv Env + +// Initialize +func init() { + +} + +// Extract valid URL for verification API +// Return URL with either http or https or return empty string +func ExtractHostName(urlStr string) (HostNames, error) { + hn := &HostNames{ + URL: "", + HostName: "", + } + + u, err := url.Parse(urlStr) + + if err != nil { + log.Error(err) + return *hn, err + } + + isSchema, err := IsSchema(urlStr) + + if err != nil { + log.Error(err) + return *hn, err + } + + if u.Hostname() != "" && true == isSchema { + hn.URL = u.Scheme + "://" + u.Hostname() + hn.HostName = u.Hostname() + } + + return *hn, nil +} + +// Check if the URL includes schema +// true if it does or false +func IsSchema(urlStr string) (bool, error) { + parsedUrl, err := url.Parse(urlStr) + + if nil != err { + log.Error(err) + return false, err + } + + var bSchema bool = true + if len(parsedUrl.Scheme) == 0 { + // No schema + bSchema = false + } else if parsedUrl.Scheme != "http" && parsedUrl.Scheme != "https" { + // Neither http nor https + bSchema = false + } + + return bSchema, nil +} diff --git a/pkg/verify/verify_transparency_report.go b/pkg/verify/verify_transparency_report.go new file mode 100644 index 0000000..7445f99 --- /dev/null +++ b/pkg/verify/verify_transparency_report.go @@ -0,0 +1,182 @@ +package verify + +import ( + "context" + "errors" + "fmt" + "io/ioutil" + "net/http" + "os" + "regexp" + "strings" +) + +// For Strategy Pattern +type TransparencyReportVerifyStrategy struct{} + +func NewTransparencyReportVerifyStrategy() *TransparencyReportVerifyStrategy { + return &TransparencyReportVerifyStrategy{} +} + +// There's no official documentation exposed for transparencyreport. +// This definitions are based on the response from the API v3 +// Could be changed without a notice as this does not look like exposed API. +const ( + errorFlag1Idx = 1 + errorFlag1Value = "2" + errorFlag2Idx = 4 + errorFlag2Value = "1" +) + +func (v *TransparencyReportVerifyStrategy) Response(respStr string) []string { + // Clean up response + var noNLstr string = strings.ReplaceAll(string(respStr), "\n", "") + r := regexp.MustCompile(`\[\[(\S+)\]\]`) + result := r.FindAllStringSubmatch(noNLstr, -1) + + return strings.Split(result[0][1], ",") +} + +func (v *TransparencyReportVerifyStrategy) IsMalcious(respStr string) bool { + resp := v.Response(respStr) + if resp[errorFlag1Idx] == errorFlag1Value && + resp[errorFlag2Idx] == errorFlag2Value { + return true + } + return false +} + +// Referred https://transparencyreport.google.com/safe-browsing/search +func (v *TransparencyReportVerifyStrategy) Request(ctx context.Context, verifyUrl string) Response { + apiUrl := os.Getenv("GOOGLE_TRANSPARENCYREPORT_API_URL") + + response := &Response{ + Result: false, + StatusCode: http.StatusOK, + Error: nil, + Malicious: false, + } + + // request the HTML page. + res, err := Fetch(apiUrl + "status?site=" + verifyUrl) + + if err != nil { + response.StatusCode = res.StatusCode + response.Error = err + log.Error(err) + return *response + } + + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + msg := fmt.Sprintf("status code error: %d %s", res.StatusCode, res.Status) + log.Error(msg) + response.StatusCode = res.StatusCode + response.Error = errors.New(msg) + return *response + } + + bodyBytes, err := ioutil.ReadAll(res.Body) + if err != nil { + log.Error(err) + response.StatusCode = res.StatusCode + response.Error = err + return *response + } + + response.Result = true + response.StatusCode = http.StatusOK + response.Error = nil + response.Malicious = v.IsMalcious(string(bodyBytes)) + return *response +} + +// TODO : need to make this func concurrent. +func (v *TransparencyReportVerifyStrategy) Exec(ctx context.Context, links *[]string) (bool, string, error) { + + errCh := make(chan error, len(*links)) + retCh := make(chan Result, len(*links)) + + // Check Links + for _, l := range *links { + go func(link string) { + retResult := &Result{} + ret := v.Request(ctx, link) + + retResult.StatusCode = ret.StatusCode + retResult.Error = ret.Error + retResult.Malicious = ret.Malicious + retResult.MaliciousLinks = append(retResult.MaliciousLinks, link) + + errCh <- ret.Error + retCh <- *retResult + }(l) + } + + for _, loopTmp := range *links { + select { + case err := <-errCh: + if err != nil { + log.Error(err) + return false, "", err + } + case retResult := <-retCh: + if true == retResult.Malicious { + log.Error("Phishing link found. => %s", retResult.MaliciousLinks[0]) + return retResult.Malicious, retResult.MaliciousLinks[0], nil + } else { + log.Info("OK <" + retResult.MaliciousLinks[0] + ">") + } + // Timeout or Cancel comes here. + case <-ctx.Done(): + <-errCh + return false, loopTmp, ctx.Err() + } + } + + return false, "", nil +} + +// TODO : Refactor this to common func with Template? +// Do Verification +func (v *TransparencyReportVerifyStrategy) Do(ctx context.Context, url string) (Result, error) { + + log.Info("Verification Start for <" + url + ">") + result := &Result{ + StrategyName: "TransparencyReportVerifyStrategy", + Malicious: false, + MaliciousLinks: []string{}, + } + + // Check URL itself if it's malicious + initRet := v.Request(ctx, url) + result.MaliciousLinks = append(result.MaliciousLinks, url) + result.Malicious = initRet.Malicious + result.StatusCode = initRet.StatusCode + if initRet.Error != nil || true == result.Malicious { + log.Error(initRet.Error) + return *result, initRet.Error + } + + // Parse site + var links []string = []string{} + has, err := Scrape(ctx, url, &links) + + if has == false || err != nil { + log.Errorf("Parse Error : result <%t>", has) + return *result, err + } + + // Check Links + ret, link, err := v.Exec(ctx, &links) + result.MaliciousLinks = append(result.MaliciousLinks, link) + result.Malicious = ret + + if err != nil { + log.Error(err) + return *result, err + } + + result.StatusCode = initRet.StatusCode + return *result, nil +} diff --git a/pkg/verify/verify_transparency_report_test.go b/pkg/verify/verify_transparency_report_test.go new file mode 100644 index 0000000..a157ad0 --- /dev/null +++ b/pkg/verify/verify_transparency_report_test.go @@ -0,0 +1,57 @@ +package verify + +import ( + "context" + "fmt" + "testing" + "time" +) + +func TestGsafeRequest(t *testing.T) { + LoadEnv() + + cases := []struct { + url string + result bool + }{ + {url: "https://qiita.com/", result: false}, + {url: "https://loginscurrentlyattwebpage.weebly.com/", result: true}, + {url: "https://my3-uk-confirm.info", result: true}, + {url: "https://github.com/", result: false}, + {url: "https://actionukee.com/WuofvBw", result: true}, + } + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + v := NewTransparencyReportVerifyStrategy() + + for _, c := range cases { + ret := v.Request(ctx, c.url) + if ret.Malicious != c.result { + t.Errorf("ret: %v result: %s}\n", ret, c.url) + } + } +} + +func TestGsafeDo(t *testing.T) { + LoadEnv() + + cases := []struct { + url string + result bool + }{ + {url: "https://www.google.com/", result: false}, + } + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + v := NewTransparencyReportVerifyStrategy() + + for _, c := range cases { + ret, err := v.Do(ctx, c.url) + if ret.Malicious != c.result || err != nil { + t.Errorf("ret: %v result: %s}\n", ret, c.url) + } + fmt.Println(ret) + } +} diff --git a/pkg/verify/verify_urlscan.go b/pkg/verify/verify_urlscan.go new file mode 100644 index 0000000..3f1f9c5 --- /dev/null +++ b/pkg/verify/verify_urlscan.go @@ -0,0 +1,235 @@ +package verify + +import ( + "context" + "encoding/json" + "net/http" + "os" + + "github.com/go-resty/resty/v2" + "github.com/pkg/errors" +) + +// For Strategy Pattern +type UrlScanVerifyStrategy struct{} + +func NewUrlScanVerifyStrategy() *UrlScanVerifyStrategy { + return &UrlScanVerifyStrategy{} +} + +type UrlScanResult struct { + Result string `json:"result"` +} + +// Initial Request response +type UrlScanSubmitResponse struct { + Results []UrlScanResult `json:"results"` +} + +// UrlScanResult details +type UrlScanOverall struct { + Malicious bool `json:"malicious"` +} + +type UrlScanVerdicts struct { + Overall UrlScanOverall `json:"overall"` +} + +type UrlScanResultDetails struct { + Verdicts UrlScanVerdicts `json:"verdicts"` +} + +// Phishing site URL validation +// true if it's phishing site or false +func (v *UrlScanVerifyStrategy) IsPhishingURL(r UrlScanResult) (bool, error) { + client := resty.New() + + resp, err := client.R(). + EnableTrace(). + Get(r.Result) + + if err != nil { + log.Error("Fail to read response") + return false, errors.Wrap(err, "Fail to read urlscan.io POST result") + } + + var result UrlScanResultDetails + err = json.Unmarshal([]byte(resp.String()), &result) + if err != nil { + log.Error(err) + //log.Error("doc %+v", pretty.Formatter(err)) + return false, err + } + + return result.Verdicts.Overall.Malicious, nil +} + +// Phishing site URL validation +// true if it's phishing site or false +func (v *UrlScanVerifyStrategy) Results(results []UrlScanResult) (bool, error) { + for _, r := range results { + ret, err := v.IsPhishingURL(r) + + if err != nil { + log.Error("Fail to read response") + return false, errors.Wrap(err, "Fail to read urlscan.io POST result") + } + + if true == ret { + // Phishing site detected. Return right away + return true, nil + } + } + + // Not a phishing site + return false, nil +} + +// Phishing site URL validation +// true if it's phishing site or false +func (v *UrlScanVerifyStrategy) Request(ctx context.Context, url string) Response { + apiUrl := os.Getenv("URLSCAN_API_URL") + response := &Response{ + Result: false, + StatusCode: http.StatusOK, + Error: nil, + Malicious: false, + } + + hn, err := ExtractHostName(url) + + if err != nil { + response.Error = err + log.Error(err) + return *response + } + + // Create a Resty Client + client := resty.New() + + resp, err := client.R(). + EnableTrace(). + SetContext(ctx). + SetHeader("Content-Type", "application/json"). + SetHeader("API-Key", os.Getenv("URLSCAN_API_KEY")). + SetQueryString("q=" + hn.HostName). + Get(apiUrl + "/v1/search") + + if err != nil { + log.Error("Fail to read response") + response.StatusCode = resp.StatusCode() + response.Error = errors.Wrap(err, "Fail to read urlscan.io POST result") + return *response + } + + var subRes UrlScanSubmitResponse + + err = json.Unmarshal([]byte(resp.String()), &subRes) + if err != nil { + log.Error(err) + response.Error = errors.Wrap(err, "Unmarshal JSON") + return *response + } + + ret, err := v.Results(subRes.Results) + + if err != nil { + log.Error(err) + response.Error = err + return *response + } + + response.Result = true + response.StatusCode = http.StatusOK + response.Error = err + response.Malicious = ret + return *response +} + +func (v *UrlScanVerifyStrategy) Exec(ctx context.Context, links *[]string) (bool, string, error) { + errCh := make(chan error, len(*links)) + retCh := make(chan Result, len(*links)) + + // Check Links + for _, l := range *links { + go func(link string) { + retResult := &Result{} + ret := v.Request(ctx, link) + + retResult.StatusCode = ret.StatusCode + retResult.Error = ret.Error + retResult.Malicious = ret.Malicious + retResult.MaliciousLinks = append(retResult.MaliciousLinks, link) + + errCh <- ret.Error + retCh <- *retResult + }(l) + } + + for _, loopTmp := range *links { + select { + case err := <-errCh: + if err != nil { + log.Error(err) + return false, "", err + } + case retResult := <-retCh: + if true == retResult.Malicious { + log.Error("Phishing link found. => %s", retResult.MaliciousLinks[0]) + return retResult.Malicious, retResult.MaliciousLinks[0], nil + } else { + log.Info("OK <" + retResult.MaliciousLinks[0] + ">") + } + // Timeout or Cancel comes here. + case <-ctx.Done(): + <-errCh + return false, loopTmp, ctx.Err() + } + } + + return false, "", nil +} + +// TODO : Refactor this to common func with Template? +// Do Verification +func (v *UrlScanVerifyStrategy) Do(ctx context.Context, url string) (Result, error) { + + log.Info("Verification Start for <" + url + ">") + result := &Result{ + StrategyName: "UrlScanVerifyStrategy", + Malicious: false, + MaliciousLinks: []string{}, + } + + // Check URL itself if it's malicious + initRet := v.Request(ctx, url) + result.MaliciousLinks = append(result.MaliciousLinks, url) + result.Malicious = initRet.Malicious + result.StatusCode = initRet.StatusCode + if initRet.Error != nil || true == result.Malicious { + log.Error(initRet.Error) + return *result, initRet.Error + } + + // Parse site + var links []string + has, err := Scrape(ctx, url, &links) + + if has == false || err != nil { + log.Error("Parse Error : has %t error %x", has, err) + return *result, err + } + + // Check Links + ret, link, err := v.Exec(ctx, &links) + result.MaliciousLinks = append(result.MaliciousLinks, link) + result.Malicious = ret + + if err != nil { + log.Error(err) + return *result, err + } + + log.Info("No malicious links found.") + return *result, nil +} diff --git a/pkg/verify/verify_urlscan_test.go b/pkg/verify/verify_urlscan_test.go new file mode 100644 index 0000000..3a4752a --- /dev/null +++ b/pkg/verify/verify_urlscan_test.go @@ -0,0 +1,31 @@ +package verify + +import ( + "context" + "testing" + "time" +) + +func TestRequest(t *testing.T) { + LoadEnv() + + cases := []struct { + url string + result bool + }{ + {url: "https://www.google.com/", result: false}, + {url: "http://paypal-support.my-sumaya.com", result: true}, + {url: "https://my3-uk-confirm.info", result: true}, + } + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + v := NewUrlScanVerifyStrategy() + + for _, c := range cases { + ret := v.Request(ctx, c.url) + if ret.Malicious != c.result { + t.Errorf("ret<%v> result<%s>}\n", ret, c.url) + } + } +} diff --git a/pkg/verify/verity_test.go b/pkg/verify/verity_test.go new file mode 100644 index 0000000..9097cbd --- /dev/null +++ b/pkg/verify/verity_test.go @@ -0,0 +1,45 @@ +package verify + +import ( + "testing" +) + +func TestExtractHostName(t *testing.T) { + cases := []struct { + url string + result string + }{ + {url: "https://www.liferay.co.jp/?q=aaa", result: "https://www.liferay.co.jp"}, + {url: "http://violet-evergarden.jp/aaa", result: "http://violet-evergarden.jp"}, + {url: "/some/path", result: ""}, + {url: "smb://some/path", result: ""}, + } + + for _, c := range cases { + ret, err := ExtractHostName(c.url) + + if err != nil || ret.URL != c.result { + t.Errorf("Url %s is error. should be %s", ret, c.result) + } + } +} + +func TestIsSchema(t *testing.T) { + cases := []struct { + url string + result bool + }{ + {url: "https://www.liferay.co.jp/", result: true}, + {url: "http://violet-evergarden.jp/", result: true}, + {url: "/some/path", result: false}, + {url: "smb://some/path", result: false}, + } + + for _, c := range cases { + ret, err := IsSchema(c.url) + + if err != nil || ret != c.result { + t.Errorf("Url %s is error. should be %t", c.url, c.result) + } + } +} diff --git a/verify_transparency_report_handler_test.go b/verify_transparency_report_handler_test.go new file mode 100644 index 0000000..c21d9e9 --- /dev/null +++ b/verify_transparency_report_handler_test.go @@ -0,0 +1,25 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "testing" + + "studio.design/studio-abuse-detector/pkg/verify" + + "github.com/stretchr/testify/assert" +) + +func TestHelloHandler(t *testing.T) { + verify.LoadEnv() + + router := NewRouter() + + req := httptest.NewRequest("GET", "/verify?url=https://bono760lbk.site/", nil) + rec := httptest.NewRecorder() + + router.ServeHTTP(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, "{\"strategyName\":\"\",\"link\":[],\"malicious\":false,\"error\":null}\n", rec.Body.String()) +}