-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkatya.go
248 lines (217 loc) · 7.74 KB
/
katya.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
package main
import (
_ "embed"
"fmt"
"net/http"
"os"
"os/exec"
"os/signal"
"time"
"github.com/gorilla/mux"
"github.com/patrickmn/go-cache"
"github.com/pterm/pterm"
"github.com/rs/cors"
"github.com/thecsw/katya/analysis"
"github.com/thecsw/katya/log"
"github.com/thecsw/katya/storage"
)
const (
// development environment keys
katyaDevEnvironment = true
// ListenAddress is the port number of our HTTP router
ListenAddress = ":32000"
// CrawlersDir is the directory for our crawlers (spiders)
CrawlersDir = "./scrapy/katya_crawlers/spiders/"
// LogsDir is where we send the logs from our crawlers (spiders)
LogsDir = "./logs/"
// ScrapyDir is the home directory of our scrapy instance
ScrapyDir = "./scrapy/"
// RESTClientCert is the location of HTTP router's certificate
RESTClientCert string = "./certs/fullchain.pem"
// RESTClientKey is the location of HTTP router's private key
RESTClientKey string = "./certs/privkey.pem"
// DbHost is the destination address of our DB
DbHost = "katya-api.sandyuraz.com"
// DbPort is the DB port that we have
DbPort = 5432
// DbName is the database name of our DB (usually username)
DbName = "sandy"
// DbUser is the DB user we will be working as
DbUser = "sandy"
// DbSSLMode dictates on how we check our SSL
DbSSLMode = "verify-full"
// DbSSLCertificate is the certificate CA signed for us
DbSSLCertificate = "./tools/client/client.crt"
// DbSSLKey is our private key to prove our identity
DbSSLKey = "./tools/client/client.key"
// DbSSLRootCertificate is the certificate list of the ruling CA (self-CA)
DbSSLRootCertificate = "./tools/ca/ca.crt"
)
var (
// banner to show below the katya text banner
katyaBannerStrip = "Production Environment"
//go:embed data/template_spider.py
templateCrawler string
// dsn to connect to Postgres.
dsn = fmt.Sprintf(
"host=%s port=%d user=%s dbname=%s sslmode=%s sslcert=%s sslkey=%s sslrootcert=%s",
DbHost, DbPort, DbUser, DbName, DbSSLMode, DbSSLCertificate, DbSSLKey, DbSSLRootCertificate,
)
// HTTP stuff for CORS pre-flight requests
allowedOrigins = []string{"https://katya.sandyuraz.com", "https://katya-kappa.vercel.app"}
allowedMethods = []string{http.MethodPost, http.MethodGet, http.MethodDelete}
allowedHeaders = []string{"Authorization", "Content-Type", "Access-Control-Allow-Methods"}
)
func main() {
// Enable debug environment if the global flag is true
if katyaDevEnvironment {
dsn = "host=127.0.0.1 port=5432 user=sandy dbname=sandy"
katyaBannerStrip = "Development Environment"
allowedOrigins = append(allowedOrigins, "http://localhost:8080")
}
// Print the big banner
fmt.Println()
s, _ := pterm.DefaultBigText.WithLetters(
pterm.NewLettersFromStringWithStyle("K", pterm.NewStyle(pterm.FgMagenta)),
pterm.NewLettersFromStringWithStyle("atya", pterm.NewStyle(pterm.FgGreen)),
).Srender()
pterm.DefaultCenter.Print(s)
pterm.DefaultCenter.
WithCenterEachLineSeparately().
Println("Katya and friends or The Liberated Corpus")
pterm.DefaultCenter.Print(
pterm.DefaultHeader.
WithFullWidth().
WithBackgroundStyle(pterm.NewStyle(pterm.BgBlack)).
WithMargin(1).Sprint(katyaBannerStrip))
// Initialize our log instance
log.Init()
// Initializing the database connection
log.Format("Initializing the database connection", log.Params{"DSN": dsn})
if err := storage.InitDB(dsn); err != nil {
log.Error("Failed opening a database connection", err, log.Params{"DSN": dsn})
return
}
defer func() {
log.Format("Closing the database connection", log.Params{"DSN": dsn})
err := storage.CloseDB()
if err != nil {
log.Error("Failed closing the database connection", err, log.Params{"DSN": dsn})
return
}
}()
// +-------------------------------------+
// | OTHER STUFF |
// +-------------------------------------+
log.Info("Checking for the existence of the global element")
if !storage.DoesGlobalExist() {
log.Info("Creating the global element")
if err := storage.CreateGlobal(); err != nil {
log.Error("failed to create a global element", err, nil)
}
}
// Create the delta caches
log.Info("Creating delta words/sentences caches")
_ = globalNumWordsDelta.Add(globalDeltaCacheKey, uint(0), cache.NoExpiration)
_ = globalNumSentencesDelta.Add(globalDeltaCacheKey, uint(0), cache.NoExpiration)
log.Info("Spinning up the words/sentences goroutines")
go func() {
for {
time.Sleep(deltaUpdateInterval)
updateGlobalWordSentencesDeltas()
}
}()
go func() {
for {
time.Sleep(deltaUpdateInterval)
updateSourcesWordSentencesDeltas()
}
}()
// Loading stopwords
log.Info("Loading stopwords")
analysis.LoadStopwords()
// +-------------------------------------+
// | HTTP Router |
// +-------------------------------------+
log.Info("Creating our HTTP (API) router")
myRouter := mux.NewRouter()
myRouter.Use(basicMiddleware)
myRouter.HandleFunc("/", helloReceiver).Methods(http.MethodGet)
myRouter.HandleFunc("/text", textReceiver).Methods(http.MethodPost)
myRouter.HandleFunc("/status", statusReceiver).Methods(http.MethodPost)
subRouter := myRouter.PathPrefix("").Subrouter()
subRouter.HandleFunc("/auth", verifyAuth).Methods(http.MethodPost)
subRouter.HandleFunc("/find", findQueryInTexts).Methods(http.MethodGet)
subRouter.HandleFunc("/trigger", crawlerRunner).Methods(http.MethodPost)
subRouter.HandleFunc("/sources", userGetSources).Methods(http.MethodGet)
subRouter.HandleFunc("/allocate", crawlerCreator).Methods(http.MethodPost)
subRouter.HandleFunc("/source", userCreateSource).Methods(http.MethodPost)
subRouter.HandleFunc("/source", userDeleteSource).Methods(http.MethodDelete)
subRouter.HandleFunc("/frequencies", frequencyFinder).Methods(http.MethodGet)
subRouter.HandleFunc("/relations", findRelations).Methods(http.MethodGet)
subRouter.HandleFunc("/clean", cleanTexts).Methods(http.MethodGet)
subRouter.HandleFunc("/status", crawlerStatusReceiver).Methods(http.MethodGet)
log.Info("Enabled the auth portal for the API router")
subRouter.Use(loggingMiddleware)
// Final preparations for the dev environment if enabled
if katyaDevEnvironment {
// Create a default user
storage.CreateUser("sandy", "urazayev")
}
// Start the yagami processing service
log.Info("Starting the yagami service")
quotesCmd := exec.Command("python3", "yagami.py")
quotesCmd.Stdout = os.Stdout
go func() {
err := quotesCmd.Run()
if err != nil {
panic(err)
}
}()
time.Sleep(2 * time.Second)
// Declare and define our HTTP handler
log.Info("Configuring the HTTP router")
corsOptions := cors.New(cors.Options{
AllowedOrigins: allowedOrigins,
AllowedMethods: allowedMethods,
AllowedHeaders: allowedHeaders,
ExposedHeaders: []string{},
MaxAge: 0,
AllowCredentials: true,
OptionsPassthrough: false,
Debug: false,
})
handler := corsOptions.Handler(myRouter)
srv := &http.Server{
Handler: handler,
Addr: ListenAddress,
// Good practice: enforce timeouts for servers you create!
WriteTimeout: 15 * time.Second,
ReadTimeout: 15 * time.Second,
IdleTimeout: 60 * time.Second,
}
// Fire up the router
go func() {
if katyaDevEnvironment {
if err := srv.ListenAndServe(); err != nil {
log.Error("Failed to fire up the router", err, nil)
}
} else {
if err := srv.ListenAndServeTLS(RESTClientCert, RESTClientKey); err != nil {
log.Error("Failed to fire up the router", err, nil)
}
}
}()
log.Info("Started the HTTP router, port " + ListenAddress)
// Listen to SIGINT and other shutdown signals
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
<-c
log.Info("API is shutting down")
// Run the final updates
log.Info("Flushing last delta updates")
updateGlobalWordSentencesDeltas()
updateSourcesWordSentencesDeltas()
log.Info("Killing Yagami")
quotesCmd.Process.Kill()
}