Skip to content

Commit

Permalink
Merge pull request #5331 from nivler/master
Browse files Browse the repository at this point in the history
Support for new bots
  • Loading branch information
Stefan Giehl committed Apr 4, 2015
2 parents e0bfc45 + aee0a81 commit 2a7292d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 1 deletion.
20 changes: 19 additions & 1 deletion Tests/fixtures/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -695,4 +695,22 @@
name: 'Tiny Tiny RSS'
-
user_agent: 'Tiny Tiny RSS/1.11.4c63934 (http://tt-rss.org/)'
name: 'Tiny Tiny RSS'
name: 'Tiny Tiny RSS'
-
user_agent: 'Mozilla/5.0 (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)'
name: 'Yahoo Gemini'
-
user_agent: 'Mozilla/5.0 (Java) outbrain'
name: 'Outbrain'
-
user_agent: 'HubPages V0.2.2 (http://hubpages.com/help/crawlingpolicy)'
name: 'HubPages'
-
user_agent: 'ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com'
name: 'ADMantX'
-
user_agent: 'Pinterest/0.2 (+http://www.pinterest.com/)'
name: 'Pinterest'
-
user_agent: 'Server Density Service Monitoring v2'
name: 'Server Density'
35 changes: 35 additions & 0 deletions regexes/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,37 @@
name: 'Yottaa'
url: 'http://www.yottaa.com/'

- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
name: 'Yahoo Gemini'
category: 'Crawler'
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
producer:
name: 'Yahoo! Inc.'
url: 'http://www.yahoo.com'

- regex: '.*Java.*outbrain'
name: 'Outbrain'
category: 'Crawler'
url: ''
producer:
name: 'Outbrain'
url: 'http://www.outbrain.com/'

- regex: 'HubPages.*crawlingpolicy'
name: 'HubPages'
category: 'Crawler'
url: 'http://hubpages.com/help/crawlingpolicy'
producer:
name: 'HubPages'
url: 'http://hubpages.com/'

- regex: 'Pinterest/\d\.\d.*www.pinterest.com.*'
name: 'Pinterest'
url: ''
category: 'Crawler'
producer:
name: 'Pinterest'
url: 'http://www.pinterest.com/'

- regex: 'lycos'
name: 'Lycos'
Expand Down Expand Up @@ -980,6 +1010,11 @@
- regex: 'NetLyzer FastProbe'
name: 'NetLyzer FastProbe'

- regex: 'AdMantX.*admantx.com'
name: 'ADMantX'

- regex: 'Server Density Service Monitoring.*'
name: 'Server Density'

- regex: '(nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex|zao|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Catchpoint bot|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
name: 'Bot'
Expand Down

0 comments on commit 2a7292d

Please sign in to comment.