File tree 4 files changed +192
-6
lines changed
4 files changed +192
-6
lines changed Original file line number Diff line number Diff line change 82
82
:max-tweet-count " 1000"
83
83
:hype-tweet-count " 10000"
84
84
:similarity-threshold " 0.7"
85
+ :stop-words " stopwords.txt"
85
86
:metrics-timeout-s " 100"
86
87
:log-path " ./logs/tech-radar.log"
87
88
:max-log-size-mb " 1"
102
103
:max-tweet-count " 500000"
103
104
:hype-tweet-count " 10000"
104
105
:similarity-threshold " 0.7"
106
+ :stop-words " stopwords.txt"
105
107
:metrics-timeout-s " 300"
106
108
:log-path " ./logs/tech-radar.log"
107
109
:max-log-size-mb " 1"
Original file line number Diff line number Diff line change 17
17
get-cached-trends]]
18
18
[tech-radar.analytics.protocols :as protocols]
19
19
[immutant.scheduling :refer [schedule every in stop id]]
20
- [tech-radar.services.hype-meter :as hype-meter]))
20
+ [tech-radar.services.hype-meter :as hype-meter]
21
+ [clojure.string :as s]))
21
22
22
23
(defn- get-settings []
23
24
{:max-hashtags-per-trend (-> (env :max-hashtags-per-trend )
29
30
:cache-update-timeout-s (-> (env :cache-update-timeout-s )
30
31
(parse-int ))})
31
32
33
+ (defn- load-stop-words [file-name]
34
+ (->> (slurp file-name)
35
+ (s/split-lines )
36
+ (filter (comp not s/blank?))
37
+ (set )))
38
+
32
39
(defrecord Analysis [database metrics preprocessor
33
40
stop-hashtags-update-fn stop-cache-update-fn
34
41
hype-meter-job
74
81
:hype-tweet-count (-> (env :hype-tweet-count )
75
82
(parse-int ))
76
83
:similarity-threshold (-> (env :similarity-threshold )
77
- (parse-double ))})]
84
+ (parse-double ))
85
+ :stop-words (-> (env :stop-words )
86
+ (load-stop-words ))})]
78
87
(assoc component :stop-hashtags-update-fn stop-hashtags-update-fn
79
88
:stop-cache-update-fn stop-cache-update-fn
80
89
:hype-meter-job (schedule hype-meter-fn
Original file line number Diff line number Diff line change 3
3
[tech-radar.database.tweets :as tweets]
4
4
[taoensso.timbre :as timbre]))
5
5
6
- (defn run-hype-meter [{:keys [database topics hype-tweet-count similarity-threshold]}]
6
+ (defn run-hype-meter [{:keys [database topics hype-tweet-count similarity-threshold stop-words ]}]
7
7
(reduce (fn [acc topic]
8
8
(let [tweets (tweets/load-daily-tweets-per-topic database {:topic topic
9
9
:max-record-count hype-tweet-count})
10
- popular-tweets (hype-meter/popular-tweets tweets {:stop-words #{}
10
+ popular-tweets (hype-meter/popular-tweets tweets {:stop-words stop-words
11
11
:hype-count 10
12
12
:similarity-threshold similarity-threshold})]
13
13
(assoc acc topic popular-tweets))) {} topics))
14
14
15
- (defn new-hype-meter-fn [{:keys [cache database topics hype-tweet-count similarity-threshold]}]
15
+ (defn new-hype-meter-fn [{:keys [cache database topics hype-tweet-count similarity-threshold stop-words ]}]
16
16
(let [busy (atom false )]
17
17
(fn []
18
18
(when-not @busy
23
23
popular-tweets (run-hype-meter {:database database
24
24
:topics topics
25
25
:hype-tweet-count hype-tweet-count
26
- :similarity-threshold similarity-threshold})]
26
+ :similarity-threshold similarity-threshold
27
+ :stop-words stop-words})]
27
28
(swap! cache (fn [cache]
28
29
(reduce (fn [cache [topic tweets]]
29
30
(assoc-in cache [topic :popular-tweets ] tweets))
Original file line number Diff line number Diff line change
1
+ a
2
+ about
3
+ above
4
+ after
5
+ again
6
+ against
7
+ all
8
+ am
9
+ an
10
+ and
11
+ any
12
+ are
13
+ aren't
14
+ as
15
+ at
16
+ be
17
+ because
18
+ been
19
+ before
20
+ being
21
+ below
22
+ between
23
+ both
24
+ but
25
+ by
26
+ can't
27
+ cannot
28
+ could
29
+ couldn't
30
+ did
31
+ didn't
32
+ do
33
+ does
34
+ doesn't
35
+ doing
36
+ don't
37
+ down
38
+ during
39
+ each
40
+ few
41
+ for
42
+ from
43
+ further
44
+ had
45
+ hadn't
46
+ has
47
+ hasn't
48
+ have
49
+ haven't
50
+ having
51
+ he
52
+ he'd
53
+ he'll
54
+ he's
55
+ her
56
+ here
57
+ here's
58
+ hers
59
+ herself
60
+ him
61
+ himself
62
+ his
63
+ how
64
+ how's
65
+ i
66
+ i'd
67
+ i'll
68
+ i'm
69
+ i've
70
+ if
71
+ in
72
+ into
73
+ is
74
+ isn't
75
+ it
76
+ it's
77
+ its
78
+ itself
79
+ let's
80
+ me
81
+ more
82
+ most
83
+ mustn't
84
+ my
85
+ myself
86
+ no
87
+ nor
88
+ not
89
+ of
90
+ off
91
+ on
92
+ once
93
+ only
94
+ or
95
+ other
96
+ ought
97
+ our
98
+ ours
99
+ ourselves
100
+ out
101
+ over
102
+ own
103
+ same
104
+ shan't
105
+ she
106
+ she'd
107
+ she'll
108
+ she's
109
+ should
110
+ shouldn't
111
+ so
112
+ some
113
+ such
114
+ than
115
+ that
116
+ that's
117
+ the
118
+ their
119
+ theirs
120
+ them
121
+ themselves
122
+ then
123
+ there
124
+ there's
125
+ these
126
+ they
127
+ they'd
128
+ they'll
129
+ they're
130
+ they've
131
+ this
132
+ those
133
+ through
134
+ to
135
+ too
136
+ under
137
+ until
138
+ up
139
+ very
140
+ was
141
+ wasn't
142
+ we
143
+ we'd
144
+ we'll
145
+ we're
146
+ we've
147
+ were
148
+ weren't
149
+ what
150
+ what's
151
+ when
152
+ when's
153
+ where
154
+ where's
155
+ which
156
+ while
157
+ who
158
+ who's
159
+ whom
160
+ why
161
+ why's
162
+ with
163
+ won't
164
+ would
165
+ wouldn't
166
+ you
167
+ you'd
168
+ you'll
169
+ you're
170
+ you've
171
+ your
172
+ yours
173
+ yourself
174
+ yourselves
You can’t perform that action at this time.
0 commit comments