Skip to content
This repository has been archived by the owner on Sep 12, 2022. It is now read-only.

Commit

Permalink
Merge pull request #181 from istresearch/P4-2412
Browse files Browse the repository at this point in the history
P4 2412 - Tweet Sampling
  • Loading branch information
andrewkcarter authored Mar 25, 2021
2 parents 9cad370 + 4835b32 commit efe8d5b
Show file tree
Hide file tree
Showing 15 changed files with 729 additions and 383 deletions.
71 changes: 51 additions & 20 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,41 @@ version: '2'
services:

traptor:
build: .
build:
context: .
image: istresearch/traptor
volumes:
- .:/code
- ./logs:/var/log/traptor
- logs:/var/log/traptor
env_file:
- ./traptor.env
depends_on:
- redis
# - kafka
# - elasticsearch
# - kopf
# - logstash
# - kibana
- kafka
environment:
- TRAPTOR_TYPE=track
- KAFKA_HOSTS=kafka:9092
- KAFKA_TOPIC=traptor
- REDIS_HOST=redis
- REDIS_PORT=6379
- REDIS_DB=1
- REDIS_PUBSUB_CHANNEL=traptor-notify
- HEARTBEAT_INTERVAL=10
- LOG_STDOUT=True
- RULE_CHECK_INTERVAL=5
- LOG_STDOUT=False
- RATE_LIMITING_ENABLED=True
- RATE_LIMITING_RATE_SEC=2.0
- RATE_LIMITING_CAPACITY=10
- RATE_LIMITING_REPORTING_INTERVAL_SEC=10
- DW_ENABLED=False
- DW_LOCAL=True # True for local dev
- DW_STATSD_HOST=statsd # on a linux host set to 172.17.0.1 and on mac os host set to docker.for.mac.localhost
- DW_STATSD_PORT=8125 # port statsd container is listening on
restart: always

healthcheck:
image: istresearch/traptor
volumes:
- .:/code
- ./logs:/var/log/traptor
- logs:/var/log/traptor
env_file:
- ./traptor.env
depends_on:
Expand Down Expand Up @@ -66,40 +69,65 @@ services:
ports:
- "9092:9092"
environment:
KAFKA_ADVERTISED_HOST_NAME: kafka
#KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092 # If trying to connect from outside docker, enable this
#KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092 # If trying to connect from inside docker, enable this
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_CREATE_TOPICS: "traptor:1:1"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
depends_on:
- zookeeper
restart: always

zookeeper:
image: wurstmeister/zookeeper
ports:
- "2181:2181"

elasticsearch:
image: elasticsearch:5.0
command: elasticsearch -E network.host=0.0.0.0 -E discovery.zen.minimum_master_nodes=1
image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0
command: elasticsearch
-E network.host=0.0.0.0
-E discovery.zen.minimum_master_nodes=1
-E cluster.name="docker-cluster"
-E node.name="master-1"
-E node.master=true
-E node.data=true
-E node.ingest=false
-E cluster.initial_master_nodes="master-1"
-E bootstrap.memory_lock=true
-E xpack.security.enabled=false
-E xpack.monitoring.enabled=false
-E xpack.graph.enabled=false
-E xpack.watcher.enabled=false
ports:
- "9200:9200"
- "9300:9300"
environment:
ES_JAVA_OPTS: "-Xms256m -Xmx256m"
ulimits:
memlock:
soft: -1
hard: -1

logstash:
image: logstash:2.4.0-1
image: docker.elastic.co/logstash/logstash:7.12.0
command: logstash -f /etc/logstash/conf.d/logstash.conf
volumes:
- ./logstash/traptor-logstash.conf:/etc/logstash/conf.d/logstash.conf
- ./logstash/logs-template.json:/etc/logstash/templates/logs-template.json
- ./logs:/var/log/traptor
- logs:/var/log/traptor

kibana:
image: kibana:5.0
image: docker.elastic.co/kibana/kibana:7.12.0
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
- XPACK_SECURITY_ENABLED=False
- XPACK_MONITORING_ENABLED=False
- XPACK_GRAPH_ENABLED=False
- XPACK_REPORTING_ENABLED=False
ports:
- "5601:5601"
environment:
- ELASTICSEARCH_URL=http://elasticsearch:9200
depends_on:
- elasticsearch

Expand Down Expand Up @@ -147,3 +175,6 @@ services:
- ./logs:/var/log/twitterapi
restart: always
command: python traptor/manager/run.py

volumes:
logs:
38 changes: 2 additions & 36 deletions logstash/logs-template.json
Original file line number Diff line number Diff line change
@@ -1,46 +1,12 @@
{
"template" : "logs-*",
"template" : "traptor-*",
"order" : 0,
"settings" : {
"index.refresh_interval" : "5s",
"number_of_shards" : "5",
"number_of_shards" : "1",
"number_of_replicas": "0"
},
"mappings" : {
"_default_" : {
"dynamic_templates" : [ {
"message_field" : {
"mapping" : {
"index" : "not_analyzed",
"omit_norms" : true,
"include_in_parent": true,
"type" : "string"
},
"match_mapping_type" : "string",
"match" : "message"
}
}, {
"string_fields" : {
"mapping" : {
"index" : "not_analyzed",
"omit_norms" : true,
"include_in_parent": true,
"type" : "string"
},
"match_mapping_type" : "string",
"match" : "*"
}
} ],
"properties" : {
"@version" : {
"index" : "not_analyzed",
"type" : "string"
}
},
"_all" : {
"enabled" : true
}
}
},
"aliases" : { }
}
3 changes: 1 addition & 2 deletions logstash/traptor-logstash.conf
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ output {
template => "/etc/logstash/templates/logs-template.json"
template_name => "logs-*"
template_overwrite => true
index => "logs-traptor-%{+YYYY.MM.dd}"
document_type => "%{[logger]}"
index => "traptor-%{+YYYY.MM.dd}"
}
}
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ uwsgi==2.0.15
pykafka==2.6.0
gevent==1.2.2
Werkzeug==0.16.1
token-bucket==0.2.0
22 changes: 0 additions & 22 deletions sample_docker_environment_file.env

This file was deleted.

82 changes: 82 additions & 0 deletions scripts/add_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python
# encoding: utf-8
import redis

location_rules = [{
"status":"active",
"rule_type":"locations",
"orig_type":"geo",
"description":"Tweets from CONUS",
"project_version_id":"None",
"campaign_id":"default",
"value":"-125.551758,24.726875,-66.401367,49.439557",
"tag":"Pulse.united_states",
"appid":"ist-dev",
"date_added":"2021-01-19 18:37:28",
"project_id":"default",
"rule_id":"1",
"node_id":"None"
}]

track_rules = [
{
"status":"active",
"rule_type":"track",
"orig_type":"keyword",
"description":"test",
"project_version_id":"None",
"campaign_id":"default",
"value":"usa",
"tag":"Pulse.united_states",
"appid":"ist-dev",
"date_added":"2021-01-19 18:37:28",
"project_id":"default",
"rule_id":"10",
"node_id":"None"
},
{
"status":"active",
"rule_type":"track",
"orig_type":"keyword",
"description":"news",
"project_version_id":"None",
"campaign_id":"default",
"value":"news",
"tag":"noise",
"appid":"ist-dev",
"date_added":"2021-01-19 18:37:28",
"project_id":"default",
"rule_id":"11",
"node_id":"None"
}
]

follow_rules = [{
"status":"active",
"rule_type":"follow",
"orig_type":"username",
"description":"news",
"project_version_id":"None",
"campaign_id":"default",
"value":"cnn",
"tag":"news",
"appid":"ist-dev",
"date_added":"2021-01-19 18:37:28",
"project_id":"default",
"rule_id":"20",
"node_id":"None"
}]

r = redis.Redis('localhost', db=1)
r.info()

traptor_id = 0

for item in location_rules:
r.hmset('traptor-locations:{}:{}'.format(traptor_id, item['rule_id']), item)

for item in track_rules:
r.hmset('traptor-track:{}:{}'.format(traptor_id, item['rule_id']), item)

for item in follow_rules:
r.hmset('traptor-follow:{}:{}'.format(traptor_id, item['rule_id']), item)
2 changes: 1 addition & 1 deletion tests/data/extended_tweets/follow_rules.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"tag": "test", "value": "735369652956766200", "status": "active", "description": "Tweets from some user", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "rule_type": "follow", "rule_id": 12345}
{"tag": "test", "value": "735369652956766200", "status": "active", "description": "Tweets from some user", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "orig_type": "userid", "rule_type": "follow", "rule_id": 12345}
2 changes: 1 addition & 1 deletion tests/data/extended_tweets/track_rules.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"tag": "test", "value": "tweet", "status": "active", "description": "Tweets for a hashtag", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "rule_type": "track", "rule_id": 12347}
{"tag": "test", "value": "tweet", "status": "active", "description": "Tweets for a hashtag", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "orig_type": "keyword", "rule_type": "track", "rule_id": 12347}
2 changes: 1 addition & 1 deletion tests/data/follow_rules.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"tag": "test", "value": "17919972", "status": "active", "description": "Tweets from some user", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "rule_type": "follow", "rule_id": 12345}
{"tag": "test", "value": "17919972", "status": "active", "description": "Tweets from some user", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "orig_type": "userid", "rule_type": "follow", "rule_id": 12345}
2 changes: 1 addition & 1 deletion tests/data/locations_rules.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"tag": "test", "value": "-122.75,36.8,-121.75,37.8", "status": "active", "description": "Tweets from some continent", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "rule_type": "locations", "rule_id": 12346}
{"tag": "test", "value": "-122.75,36.8,-121.75,37.8", "status": "active", "description": "Tweets from some continent", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "orig_type": "geo", "rule_type": "locations", "rule_id": 12346}
2 changes: 1 addition & 1 deletion tests/data/track_rules.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"tag": "test", "value": "happy", "status": "active", "description": "Tweets for a hashtag", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "rule_type": "track", "rule_id": 12347}
{"tag": "test", "value": "happy", "status": "active", "description": "Tweets for a hashtag", "appid": "test-appid", "date_added": "2016-05-10 16:58:34", "orig_type": "keyword", "rule_type": "track", "rule_id": 12347}
Loading

0 comments on commit efe8d5b

Please sign in to comment.