Skip to content

Commit

Permalink
Merge pull request #23 from getlantern/issue-3451
Browse files Browse the repository at this point in the history
getlantern/lantern#3451 Doing reverse lookups of ip addresses and onl…
  • Loading branch information
uaalto committed Jan 29, 2016
2 parents 306ed42 + 0bff17c commit c5c55ad
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 29 deletions.
90 changes: 74 additions & 16 deletions analytics/analytics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ package analytics

import (
"bytes"
"math/rand"
"net"
"net/http"
"net/http/httputil"
"net/url"
Expand All @@ -12,6 +14,7 @@ import (

"github.com/getlantern/golog"
"github.com/getlantern/http-proxy-lantern/common"
"github.com/golang/groupcache/lru"
)

const (
Expand All @@ -32,18 +35,23 @@ type siteAccess struct {
// AnalyticsMiddleware allows plugging popular sites tracking into the proxy's
// handler chain.
type AnalyticsMiddleware struct {
trackingId string
next http.Handler
siteAccesses chan *siteAccess
httpClient *http.Client
trackingId string
samplePercentage float64
next http.Handler
siteAccesses chan *siteAccess
httpClient *http.Client
dnsCache *lru.Cache
}

func New(trackingId string, next http.Handler) *AnalyticsMiddleware {
func New(trackingId string, samplePercentage float64, next http.Handler) *AnalyticsMiddleware {
log.Debugf("Will report analytics to Google as %v, sampling %d percent of requests", trackingId, int(samplePercentage*100))
am := &AnalyticsMiddleware{
trackingId: trackingId,
next: next,
siteAccesses: make(chan *siteAccess, 10000),
httpClient: &http.Client{},
trackingId: trackingId,
samplePercentage: samplePercentage,
next: next,
siteAccesses: make(chan *siteAccess, 1000),
httpClient: &http.Client{},
dnsCache: lru.New(2000),
}
go am.submitToGoogle()
return am
Expand All @@ -55,22 +63,31 @@ func (am *AnalyticsMiddleware) ServeHTTP(w http.ResponseWriter, req *http.Reques
}

func (am *AnalyticsMiddleware) track(req *http.Request) {
am.siteAccesses <- &siteAccess{
ip: stripPort(req.RemoteAddr),
clientId: req.Header.Get(common.DeviceIdHeader),
site: stripPort(req.Host),
if rand.Float64() <= am.samplePercentage {
select {
case am.siteAccesses <- &siteAccess{
ip: stripPort(req.RemoteAddr),
clientId: req.Header.Get(common.DeviceIdHeader),
site: stripPort(req.Host),
}:
// Submitted
default:
log.Debug("Site access request queue is full")
}
}
}

// submitToGoogle submits tracking information to Google Analytics on a
// goroutine to avoid blocking the processing of actual requests
func (am *AnalyticsMiddleware) submitToGoogle() {
for sa := range am.siteAccesses {
am.trackSession(am.sessionVals(sa))
for _, site := range am.normalizeSite(sa.site) {
am.trackSession(am.sessionVals(sa, site))
}
}
}

func (am *AnalyticsMiddleware) sessionVals(sa *siteAccess) string {
func (am *AnalyticsMiddleware) sessionVals(sa *siteAccess, site string) string {
vals := make(url.Values, 0)

// Version 1 of the API
Expand All @@ -90,14 +107,55 @@ func (am *AnalyticsMiddleware) sessionVals(sa *siteAccess) string {

// Track this as a page view
vals.Add("t", "pageview")
vals.Add("dp", sa.site)

log.Tracef("Tracking view to site: %v", site)
vals.Add("dp", site)

// Note the absence of session tracking. We don't have a good way to tell
// when a session ends, so we don't bother with it.

return vals.Encode()
}

func (am *AnalyticsMiddleware) normalizeSite(site string) []string {
domain := site
result := make([]string, 0, 3)
isIP := net.ParseIP(site) != nil
if isIP {
// This was an ip, do a reverse lookup
cached, found := am.dnsCache.Get(site)
if !found {
names, err := net.LookupAddr(site)
if err != nil {
log.Debugf("Unable to perform reverse DNS lookup for %v: %v", site, err)
cached = site
} else {
name := names[0]
if name[len(name)-1] == '.' {
// Strip trailing period
name = name[:len(name)-1]
}
cached = name
}
am.dnsCache.Add(site, cached)
}
domain = cached.(string)
}

result = append(result, site)
if domain != site {
// If original site is not the same as domain, track that too
result = append(result, domain)
// Also track just the last two portions of the domain name
parts := strings.Split(domain, ".")
if len(parts) > 1 {
result = append(result, "/generated/"+strings.Join(parts[len(parts)-2:], "."))
}
}

return result
}

func (am *AnalyticsMiddleware) trackSession(args string) {
r, err := http.NewRequest("POST", ApiEndpoint, bytes.NewBufferString(args))

Expand Down
19 changes: 19 additions & 0 deletions analytics/analytics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package analytics

import (
"net"
"testing"

"github.com/getlantern/testify/assert"
)

func TestNormalizeSite(t *testing.T) {
am := New("12345", 1, nil)
addrs, err := net.LookupHost("edge-star-mini-shv-07-frc3.facebook.com")
if assert.NoError(t, err, "Should have been able to resolve facebook.com") {
normalized := am.normalizeSite(addrs[0])
assert.Len(t, normalized, 3, "Should have gotten two sites")
assert.Equal(t, "edge-star-mini-shv-07-frc3.facebook.com", normalized[1])
assert.Equal(t, "/generated/facebook.com", normalized[2])
}
}
27 changes: 14 additions & 13 deletions http_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,19 @@ var (
testingLocal = false
log = golog.LoggerFor("lantern-proxy")

help = flag.Bool("help", false, "Get usage help")
keyfile = flag.String("key", "", "Private key file name")
certfile = flag.String("cert", "", "Certificate file name")
https = flag.Bool("https", false, "Use TLS for client to proxy communication")
addr = flag.String("addr", ":8080", "Address to listen")
maxConns = flag.Uint64("maxconns", 0, "Max number of simultaneous connections allowed connections")
idleClose = flag.Uint64("idleclose", 30, "Time in seconds that an idle connection will be allowed before closing it")
token = flag.String("token", "", "Lantern token")
enableReports = flag.Bool("enablereports", false, "Enable stats reporting")
logglyToken = flag.String("logglytoken", "", "Token used to report to loggly.com, not reporting if empty")
pprofAddr = flag.String("pprofaddr", "", "pprof address to listen on, not activate pprof if empty")
proxiedSitesTrackingId = flag.String("proxied-sites-tracking-id", "UA-21815217-15", "The Google Analytics property id for tracking proxied sites")
help = flag.Bool("help", false, "Get usage help")
keyfile = flag.String("key", "", "Private key file name")
certfile = flag.String("cert", "", "Certificate file name")
https = flag.Bool("https", false, "Use TLS for client to proxy communication")
addr = flag.String("addr", ":8080", "Address to listen")
maxConns = flag.Uint64("maxconns", 0, "Max number of simultaneous connections allowed connections")
idleClose = flag.Uint64("idleclose", 30, "Time in seconds that an idle connection will be allowed before closing it")
token = flag.String("token", "", "Lantern token")
enableReports = flag.Bool("enablereports", false, "Enable stats reporting")
logglyToken = flag.String("logglytoken", "", "Token used to report to loggly.com, not reporting if empty")
pprofAddr = flag.String("pprofaddr", "", "pprof address to listen on, not activate pprof if empty")
proxiedSitesTrackingId = flag.String("proxied-sites-tracking-id", "UA-21815217-16", "The Google Analytics property id for tracking proxied sites")
proxiedSitesSamplePercentage = flag.Float64("proxied-sites-sample-percentage", 0.01, "The percentage of requests to sample (0.01 = 1%)")
)

func main() {
Expand Down Expand Up @@ -107,7 +108,7 @@ func main() {

deviceFilterPost := devicefilter.NewPost(commonFilter)

analyticsFilter := analytics.New(*proxiedSitesTrackingId, deviceFilterPost)
analyticsFilter := analytics.New(*proxiedSitesTrackingId, *proxiedSitesSamplePercentage, deviceFilterPost)

deviceFilterPre, err := devicefilter.NewPre(analyticsFilter)
if err != nil {
Expand Down

0 comments on commit c5c55ad

Please sign in to comment.