Skip to content

Commit 3b95340

Browse files
author
abtv
committed
lowercased hashtags
1 parent 6cd68c1 commit 3b95340

File tree

10 files changed

+158
-106
lines changed

10 files changed

+158
-106
lines changed

hashtag-filter-settings.edn

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{:clojure #{"clojure" "clojurescript"}
2+
:jvm #{"jvm" "java" "scala"}
3+
:javascript #{"javascript" "js" "makeatwitterbot" "fakeheadlinebot" "learntocode"}
4+
:golang #{"golang" "go"}
5+
:linux #{"linux" "fat"}
6+
:nosql #{"nosql"}
7+
:rust #{"rust"}
8+
:jobs #{"job" "jobs" "hiring"}}

project.clj

+30-28
Original file line numberDiff line numberDiff line change
@@ -66,35 +66,37 @@
6666

6767
:profiles {:dev {:global-vars {;*warn-on-reflection* true
6868
*assert* false}
69-
:env {:host "localhost"
70-
:port "3000"
71-
:database "jdbc:postgresql://localhost/tech_radar?user=postgres&password=postgres"
72-
:twitter-security "twitter-security.edn"
73-
:twitter-settings "twitter-settings.edn"
74-
:classify-settings "classify-settings.edn"
75-
:cache-update-timeout-s "10"
76-
:max-hashtags-per-trend "25"
77-
:max-texts-per-request "200"
78-
:max-tweet-count "10000"
79-
:metrics-timeout-s "100"
80-
:log-path "./logs/tech-radar.log"
81-
:max-log-size-mb "1"
82-
:backlog "2"}}
69+
:env {:host "localhost"
70+
:port "3000"
71+
:database "jdbc:postgresql://localhost/tech_radar?user=postgres&password=postgres"
72+
:twitter-security "twitter-security.edn"
73+
:twitter-settings "twitter-settings.edn"
74+
:classify-settings "classify-settings.edn"
75+
:hashtag-filter-settings "hashtag-filter-settings.edn"
76+
:cache-update-timeout-s "10"
77+
:max-hashtags-per-trend "25"
78+
:max-texts-per-request "200"
79+
:max-tweet-count "10000"
80+
:metrics-timeout-s "100"
81+
:log-path "./logs/tech-radar.log"
82+
:max-log-size-mb "1"
83+
:backlog "2"}}
8384
:uberjar {:main tech-radar.core
8485
:aot [tech-radar.core]
8586
:global-vars {;*warn-on-reflection* true
8687
*assert* false}
87-
:env {:host "0.0.0.0"
88-
:port "3000"
89-
:database "jdbc:postgresql://localhost/tech_radar?user=postgres&password=postgres"
90-
:twitter-security "twitter-security.edn"
91-
:twitter-settings "twitter-settings.edn"
92-
:classify-settings "classify-settings.edn"
93-
:cache-update-timeout-s "30"
94-
:max-hashtags-per-trend "25"
95-
:max-texts-per-request "200"
96-
:max-tweet-count "10000"
97-
:metrics-timeout-s "300"
98-
:log-path "./logs/tech-radar.log"
99-
:max-log-size-mb "1"
100-
:backlog "10"}}})
88+
:env {:host "0.0.0.0"
89+
:port "3000"
90+
:database "jdbc:postgresql://localhost/tech_radar?user=postgres&password=postgres"
91+
:twitter-security "twitter-security.edn"
92+
:twitter-settings "twitter-settings.edn"
93+
:classify-settings "classify-settings.edn"
94+
:hashtag-filter-settings "hashtag-filter-settings.edn"
95+
:cache-update-timeout-s "30"
96+
:max-hashtags-per-trend "25"
97+
:max-texts-per-request "200"
98+
:max-tweet-count "10000"
99+
:metrics-timeout-s "300"
100+
:log-path "./logs/tech-radar.log"
101+
:max-log-size-mb "1"
102+
:backlog "10"}}})

src/clj/tech_radar/analytics/model.clj

+29-11
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,19 @@
1313
(subvec tweets (- tweet-count max-tweet-count))
1414
tweets)))
1515

16-
(defn- add-topic-fn [tweet-model hashtags max-tweet-count]
16+
(defn- add-topic-fn [tweet-model hashtags max-tweet-count hashtag-filter-settings]
1717
(fn [data topic]
18-
(let [data* (update-in data [topic :texts] (fnil (fn [coll]
19-
(-> coll
20-
(conj tweet-model)
21-
(remove-old-tweets max-tweet-count))) []))]
18+
(let [data* (update-in data [topic :texts] (fnil (fn [coll]
19+
(-> coll
20+
(conj tweet-model)
21+
(remove-old-tweets max-tweet-count))) []))
22+
hashtag-filter (topic hashtag-filter-settings)
23+
hashtags (->> hashtags
24+
(map (fn [^String hashtag]
25+
(.toLowerCase hashtag)))
26+
(filter (fn [^String hashtag]
27+
(-> (contains? hashtag-filter hashtag)
28+
(not)))))]
2229
(update-in data* [topic :hashtags :daily]
2330
(fn [values]
2431
(let [values* (or values {})]
@@ -28,10 +35,10 @@
2835
(defn- to-tweet-model [tweet]
2936
(select-keys tweet [:id :text :created-at :twitter-id]))
3037

31-
(defn- add* [data tweet max-tweet-count]
38+
(defn- add* [data tweet max-tweet-count hashtag-filter-settings]
3239
(swap! data (fn [data {:keys [topics hashtags] :as tweet}]
3340
(let [tweet-model (to-tweet-model tweet)]
34-
(reduce (add-topic-fn tweet-model hashtags max-tweet-count) data topics))) tweet))
41+
(reduce (add-topic-fn tweet-model hashtags max-tweet-count hashtag-filter-settings) data topics))) tweet))
3542

3643
(defn- get-top-hashtags [max-count trends]
3744
(->> trends
@@ -68,8 +75,8 @@
6875
Tweet
6976
(add [this tweet]
7077
(let [{:keys [data search]} this
71-
{:keys [max-texts-per-request max-tweet-count]} (:settings this)]
72-
(add* data tweet max-texts-per-request)
78+
{:keys [max-texts-per-request max-tweet-count hashtag-filter-settings]} (:settings this)]
79+
(add* data tweet max-texts-per-request hashtag-filter-settings)
7380
(add-text search tweet)
7481
(let [texts-count (-> @search
7582
(:texts)
@@ -88,16 +95,27 @@
8895
(or (get-in @data [(keyword topic) :texts])
8996
[]))))
9097

98+
(defn- lowercase-set [set]
99+
(->> (map (fn [^String s]
100+
(.toLowerCase s)) set)
101+
(into #{})))
102+
103+
(defn- lowercase-settings [settings]
104+
(reduce (fn [settings [topic filter-settings]]
105+
(assoc settings topic (lowercase-set filter-settings))) {} settings))
106+
91107
(defn new-model [topics settings]
92-
(let [{:keys [max-tweet-count max-hashtags-per-trend max-texts-per-request]} settings]
108+
(let [{:keys [max-tweet-count max-hashtags-per-trend max-texts-per-request hashtag-filter-settings]} settings]
93109
(when-not max-tweet-count
94110
(throw (Exception. "you have to provide max-tweet-count param")))
95111
(when-not max-texts-per-request
96112
(throw (Exception. "you have to provide max-texts-per-request param")))
97113
(when-not max-hashtags-per-trend
98114
(throw (Exception. "you have to provide max-hashtags-per-trend param")))
115+
(when-not hashtag-filter-settings
116+
(throw (Exception. "you have to provide hashtag-filter-settings param")))
99117

100118
(map->Model {:data (atom {})
101119
:search (new-search)
102-
:settings settings
120+
:settings (assoc settings :hashtag-filter-settings (lowercase-settings hashtag-filter-settings))
103121
:topics topics})))

src/clj/tech_radar/components/analysis.clj

+16-11
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
[taoensso.timbre :as timbre]
1010
[environ.core :refer [env]]
1111
[tech-radar.utils.parsers :refer [parse-int]]
12-
[tech-radar.utils.settings :refer [load-classify-settings]]
12+
[tech-radar.utils.settings :refer [load-classify-settings
13+
load-hashtag-filter-settings]]
1314
[tech-radar.analytics.model :refer [new-model]]
1415
[tech-radar.analytics.cache :refer [new-cache
1516
get-cached-trends]]
@@ -41,27 +42,31 @@
4142
(do
4243
(timbre/info "Initializing analysis")
4344
(let [{:keys [topics]} (load-classify-settings)
45+
hashtag-filter-settings (load-hashtag-filter-settings)
4446
{:keys [max-hashtags-per-trend max-texts-per-request max-tweet-count cache-update-timeout-s]} (get-settings)
4547
topics (map first topics)
4648
database (:database database)
4749
analysis-chan (:analysis-chan preprocessor)
48-
model (new-model topics {:max-tweet-count max-tweet-count
49-
:max-hashtags-per-trend max-hashtags-per-trend
50-
:max-texts-per-request max-texts-per-request})
50+
model (new-model topics {:max-tweet-count max-tweet-count
51+
:max-hashtags-per-trend max-hashtags-per-trend
52+
:max-texts-per-request max-texts-per-request
53+
:hashtag-filter-settings hashtag-filter-settings})
5154
cache (new-cache)
52-
initial-data (load-data database {:topics topics
53-
:max-texts-per-request max-texts-per-request
54-
:max-tweet-count max-tweet-count})
55+
initial-data (load-data database {:topics topics
56+
:max-texts-per-request max-texts-per-request
57+
:max-tweet-count max-tweet-count
58+
:hashtag-filter-settings hashtag-filter-settings})
5559
_ (do
5660
(init model initial-data)
5761
(cache-update-fn model cache)
5862
(run-model-update {:model model
5963
:analysis-chan analysis-chan
6064
:metrics metrics}))
61-
stop-hashtags-update-fn (run-hashtags-update {:database database
62-
:topics topics
63-
:analysis-chan analysis-chan
64-
:metrics metrics})
65+
stop-hashtags-update-fn (run-hashtags-update {:database database
66+
:topics topics
67+
:hashtag-filter-settings hashtag-filter-settings
68+
:analysis-chan analysis-chan
69+
:metrics metrics})
6570
stop-cache-update-fn (run-cache-update {:model model
6671
:cache cache
6772
:cache-update-timeout-s cache-update-timeout-s})]

src/clj/tech_radar/database/hashtags.clj

+31-16
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,13 @@
3737
:from from
3838
:to to}))))
3939

40-
(defn- load-hashtags* [database {:keys [topic offset-back max-hashtags]}]
40+
(defn hashtags->map [max-hashtags data]
41+
(->> data
42+
(sort-by second >)
43+
(take max-hashtags)
44+
(into {})))
45+
46+
(defn- load-hashtags* [database {:keys [topic offset-back max-hashtags hashtag-filter]}]
4147
(let [from (-> (local/local-now)
4248
(time/minus offset-back)
4349
(to-sql-time))
@@ -46,23 +52,32 @@
4652
hashtags-daily-query* (hashtags-query {:topic topic
4753
:from from
4854
:to to
49-
:max-hashtags max-hashtags})]
55+
:max-hashtags (+ max-hashtags (* 10 (count hashtag-filter)))})]
5056
(->> (jdbc/query database hashtags-daily-query* :identifiers to-dashes)
5157
(map (fn [{:keys [hashtag count]}]
52-
[hashtag count]))
53-
(into {}))))
58+
[(.toLowerCase hashtag) count]))
59+
(reduce (fn [coll [hashtag count]]
60+
(if (hashtag-filter hashtag)
61+
coll
62+
(if-let [old-count (get coll hashtag)]
63+
(assoc coll hashtag (+ old-count count))
64+
(assoc coll hashtag count)))) {})
65+
(hashtags->map max-hashtags))))
5466

55-
(defn load-daily-hashtags [database topic]
56-
(load-hashtags* database {:topic topic
57-
:offset-back (time/days 1)
58-
:max-hashtags 1000}))
67+
(defn load-daily-hashtags [database topic hashtag-filter]
68+
(load-hashtags* database {:topic topic
69+
:offset-back (time/days 1)
70+
:max-hashtags 1000
71+
:hashtag-filter hashtag-filter}))
5972

60-
(defn load-weekly-hashtags [database topic]
61-
(load-hashtags* database {:topic topic
62-
:offset-back (time/weeks 1)
63-
:max-hashtags 20}))
73+
(defn load-weekly-hashtags [database topic hashtag-filter]
74+
(load-hashtags* database {:topic topic
75+
:offset-back (time/weeks 1)
76+
:max-hashtags 25
77+
:hashtag-filter hashtag-filter}))
6478

65-
(defn load-monthly-hashtags [database topic]
66-
(load-hashtags* database {:topic topic
67-
:offset-back (time/months 1)
68-
:max-hashtags 20}))
79+
(defn load-monthly-hashtags [database topic hashtag-filter]
80+
(load-hashtags* database {:topic topic
81+
:offset-back (time/months 1)
82+
:max-hashtags 25
83+
:hashtag-filter hashtag-filter}))

0 commit comments

Comments
 (0)