Skip to content

Commit 1f3a958

Browse files
committed
feat: enhance data model associations and remove unused components
1 parent c48d74b commit 1f3a958

File tree

17 files changed

+326
-497
lines changed

17 files changed

+326
-497
lines changed

core/controllers/schedule.go

Lines changed: 13 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ package controllers
22

33
import (
44
errors2 "errors"
5-
mongo2 "github.com/crawlab-team/crawlab/core/mongo"
6-
"go.mongodb.org/mongo-driver/bson"
75
"go.mongodb.org/mongo-driver/mongo"
86

97
"github.com/crawlab-team/crawlab/core/interfaces"
@@ -45,73 +43,35 @@ func GetScheduleById(_ *gin.Context, params *GetByIdParams) (response *Response[
4543

4644
func GetScheduleList(_ *gin.Context, params *GetListParams) (response *ListResponse[models.Schedule], err error) {
4745
query := ConvertToBsonMFromListParams(params)
48-
4946
sort, err := GetSortOptionFromString(params.Sort)
5047
if err != nil {
5148
return GetErrorListResponse[models.Schedule](errors.BadRequestf("invalid request parameters: %v", err))
5249
}
53-
54-
schedules, err := service.NewModelService[models.Schedule]().GetMany(query, &mongo2.FindOptions{
55-
Sort: sort,
56-
Skip: params.Size * (params.Page - 1),
57-
Limit: params.Size,
58-
})
59-
if err != nil {
60-
if !errors.Is(err, mongo.ErrNoDocuments) {
61-
return GetErrorListResponse[models.Schedule](err)
62-
}
63-
return GetListResponse[models.Schedule]([]models.Schedule{}, 0)
64-
}
65-
if len(schedules) == 0 {
66-
return GetListResponse[models.Schedule]([]models.Schedule{}, 0)
67-
}
50+
skip, limit := GetSkipLimitFromListParams(params)
6851

6952
// total count
7053
total, err := service.NewModelService[models.Schedule]().Count(query)
7154
if err != nil {
7255
return GetErrorListResponse[models.Schedule](err)
7356
}
7457

75-
// ids
76-
var ids []primitive.ObjectID
77-
var spiderIds []primitive.ObjectID
78-
for _, s := range schedules {
79-
ids = append(ids, s.Id)
80-
if !s.SpiderId.IsZero() {
81-
spiderIds = append(spiderIds, s.SpiderId)
82-
}
83-
}
84-
85-
// spider dict cache
86-
var spiders []models.Spider
87-
if len(spiderIds) > 0 {
88-
spiders, err = service.NewModelService[models.Spider]().GetMany(bson.M{"_id": bson.M{"$in": spiderIds}}, nil)
89-
if err != nil {
90-
return GetErrorListResponse[models.Schedule](err)
91-
}
92-
}
93-
dictSpider := map[primitive.ObjectID]models.Spider{}
94-
for _, p := range spiders {
95-
dictSpider[p.Id] = p
58+
// check total
59+
if total == 0 {
60+
return GetEmptyListResponse[models.Schedule]()
9661
}
9762

98-
// iterate list again
99-
var data []models.Schedule
100-
for _, s := range schedules {
101-
// spider
102-
if !s.SpiderId.IsZero() {
103-
p, ok := dictSpider[s.SpiderId]
104-
if ok {
105-
s.Spider = &p
106-
}
107-
}
63+
// aggregation pipelines
64+
pipelines := service.GetPaginationPipeline(query, sort, skip, limit)
65+
pipelines = append(pipelines, service.GetDefaultJoinPipeline[models.Spider]()...)
10866

109-
// add to list
110-
data = append(data, s)
67+
// perform query
68+
var schedules []models.Schedule
69+
err = service.GetCollection[models.Schedule]().Aggregate(pipelines, nil).All(&schedules)
70+
if err != nil {
71+
return GetErrorListResponse[models.Schedule](err)
11172
}
11273

113-
// response
114-
return GetListResponse(data, total)
74+
return GetListResponse(schedules, total)
11575
}
11676

11777
type PostScheduleParams struct {

core/controllers/spider.go

Lines changed: 18 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package controllers
22

33
import (
4-
"math"
54
"mime/multipart"
65
"os"
76
"path/filepath"
@@ -81,201 +80,44 @@ func GetSpiderById(_ *gin.Context, params *GetByIdParams) (response *Response[mo
8180
}
8281

8382
// GetSpiderList handles getting a list of spiders with optional stats
84-
func GetSpiderList(c *gin.Context, params *GetListParams) (response *ListResponse[models.Spider], err error) {
85-
// get list
86-
withStats := c.Query("stats")
87-
if withStats == "" {
88-
return getSpiderList(params)
89-
}
90-
91-
// get list with stats
92-
return getSpiderListWithStats(params)
93-
}
94-
95-
func getSpiderList(params *GetListParams) (response *ListResponse[models.Spider], err error) {
83+
func GetSpiderList(_ *gin.Context, params *GetListParams) (response *ListResponse[models.Spider], err error) {
84+
// query parameters
9685
query := ConvertToBsonMFromListParams(params)
97-
9886
sort, err := GetSortOptionFromString(params.Sort)
9987
if err != nil {
10088
return GetErrorListResponse[models.Spider](errors.BadRequestf("invalid request parameters: %v", err))
10189
}
102-
103-
spiders, err := service.NewModelService[models.Spider]().GetMany(query, &mongo2.FindOptions{
104-
Sort: sort,
105-
Skip: params.Size * (params.Page - 1),
106-
Limit: params.Size,
107-
})
108-
if err != nil {
109-
if !errors.Is(err, mongo.ErrNoDocuments) {
110-
return GetErrorListResponse[models.Spider](err)
111-
}
112-
return GetListResponse[models.Spider]([]models.Spider{}, 0)
113-
}
114-
if len(spiders) == 0 {
115-
return GetListResponse[models.Spider]([]models.Spider{}, 0)
116-
}
90+
skip, limit := GetSkipLimitFromListParams(params)
11791

11892
// total count
11993
total, err := service.NewModelService[models.Spider]().Count(query)
12094
if err != nil {
12195
return GetErrorListResponse[models.Spider](err)
12296
}
12397

124-
// ids
125-
var ids []primitive.ObjectID
126-
var gitIds []primitive.ObjectID
127-
var projectIds []primitive.ObjectID
128-
for _, s := range spiders {
129-
ids = append(ids, s.Id)
130-
if !s.GitId.IsZero() {
131-
gitIds = append(gitIds, s.GitId)
132-
}
133-
if !s.ProjectId.IsZero() {
134-
projectIds = append(projectIds, s.ProjectId)
135-
}
136-
}
137-
138-
// project dict cache
139-
var projects []models.Project
140-
if len(projectIds) > 0 {
141-
projects, err = service.NewModelService[models.Project]().GetMany(bson.M{"_id": bson.M{"$in": projectIds}}, nil)
142-
if err != nil {
143-
return GetErrorListResponse[models.Spider](err)
144-
}
145-
}
146-
dictProject := map[primitive.ObjectID]models.Project{}
147-
for _, p := range projects {
148-
dictProject[p.Id] = p
149-
}
150-
151-
// git dict cache
152-
var gits []models.Git
153-
if len(gitIds) > 0 && utils.IsPro() {
154-
gits, err = service.NewModelService[models.Git]().GetMany(bson.M{"_id": bson.M{"$in": gitIds}}, nil)
155-
if err != nil {
156-
return GetErrorListResponse[models.Spider](err)
157-
}
158-
}
159-
dictGit := map[primitive.ObjectID]models.Git{}
160-
for _, g := range gits {
161-
dictGit[g.Id] = g
162-
}
163-
164-
// iterate list again
165-
var data []models.Spider
166-
for _, s := range spiders {
167-
// project
168-
if !s.ProjectId.IsZero() {
169-
p, ok := dictProject[s.ProjectId]
170-
if ok {
171-
s.Project = &p
172-
}
173-
}
174-
175-
// git
176-
if !s.GitId.IsZero() && utils.IsPro() {
177-
g, ok := dictGit[s.GitId]
178-
if ok {
179-
s.Git = &g
180-
}
181-
}
182-
183-
// add to list
184-
data = append(data, s)
185-
}
186-
187-
// response
188-
return GetListResponse(data, total)
189-
}
190-
191-
func getSpiderListWithStats(params *GetListParams) (response *ListResponse[models.Spider], err error) {
192-
response, err = getSpiderList(params)
193-
if err != nil {
194-
return GetErrorListResponse[models.Spider](err)
98+
// check total
99+
if total == 0 {
100+
return GetEmptyListResponse[models.Spider]()
195101
}
196102

197-
// spider ids
198-
var ids []primitive.ObjectID
199-
for _, s := range response.Data {
200-
ids = append(ids, s.Id)
103+
// aggregation pipelines
104+
pipelines := service.GetPaginationPipeline(query, sort, skip, limit)
105+
pipelines = append(pipelines, service.GetJoinPipeline[models.SpiderStat]("_id", "_id", "_stat")...)
106+
pipelines = append(pipelines, service.GetJoinPipeline[models.Task]("_stat.last_task_id", "_id", "_last_task")...)
107+
pipelines = append(pipelines, service.GetJoinPipeline[models.TaskStat]("_last_task._id", "_id", "_last_task._stat")...)
108+
pipelines = append(pipelines, service.GetDefaultJoinPipeline[models.Project]()...)
109+
if utils.IsPro() {
110+
pipelines = append(pipelines, service.GetDefaultJoinPipeline[models.Git]()...)
201111
}
202112

203-
// spider stat dict
204-
spiderStats, err := service.NewModelService[models.SpiderStat]().GetMany(bson.M{"_id": bson.M{"$in": ids}}, nil)
113+
// perform query
114+
var spiders []models.Spider
115+
err = service.GetCollection[models.Spider]().Aggregate(pipelines, nil).All(&spiders)
205116
if err != nil {
206117
return GetErrorListResponse[models.Spider](err)
207118
}
208-
dictSpiderStat := map[primitive.ObjectID]models.SpiderStat{}
209-
210-
// task dict and task stat dict
211-
var lastTasks []models.Task
212-
var lastTaskIds []primitive.ObjectID
213-
for _, st := range spiderStats {
214-
if st.Tasks > 0 {
215-
taskCount := int64(st.Tasks)
216-
st.AverageWaitDuration = int64(math.Round(float64(st.WaitDuration) / float64(taskCount)))
217-
st.AverageRuntimeDuration = int64(math.Round(float64(st.RuntimeDuration) / float64(taskCount)))
218-
st.AverageTotalDuration = int64(math.Round(float64(st.TotalDuration) / float64(taskCount)))
219-
}
220-
dictSpiderStat[st.Id] = st
221-
222-
if !st.LastTaskId.IsZero() {
223-
lastTaskIds = append(lastTaskIds, st.LastTaskId)
224-
}
225-
}
226-
dictLastTask := map[primitive.ObjectID]models.Task{}
227-
dictLastTaskStat := map[primitive.ObjectID]models.TaskStat{}
228-
if len(lastTaskIds) > 0 {
229-
// task list
230-
queryTask := bson.M{
231-
"_id": bson.M{
232-
"$in": lastTaskIds,
233-
},
234-
}
235-
lastTasks, err = service.NewModelService[models.Task]().GetMany(queryTask, nil)
236-
if err != nil {
237-
return GetErrorListResponse[models.Spider](err)
238-
}
239-
240-
// task stats list
241-
taskStats, err := service.NewModelService[models.TaskStat]().GetMany(queryTask, nil)
242-
if err != nil {
243-
return GetErrorListResponse[models.Spider](err)
244-
}
245-
246-
for _, st := range taskStats {
247-
dictLastTaskStat[st.Id] = st
248-
}
249-
250-
for _, t := range lastTasks {
251-
st, ok := dictLastTaskStat[t.Id]
252-
if ok {
253-
t.Stat = &st
254-
}
255-
dictLastTask[t.SpiderId] = t
256-
}
257-
}
258-
259-
// iterate list again
260-
for i, s := range response.Data {
261-
// spider stat
262-
st, ok := dictSpiderStat[s.Id]
263-
if ok {
264-
s.Stat = &st
265-
}
266-
267-
// last task and stat
268-
if !s.Stat.LastTaskId.IsZero() {
269-
t, ok := dictLastTask[s.Stat.LastTaskId]
270-
if ok {
271-
s.Stat.LastTask = &t
272-
}
273-
}
274-
275-
response.Data[i] = s
276-
}
277119

278-
return response, nil
120+
return GetListResponse(spiders, total)
279121
}
280122

281123
// PostSpider handles creating a new spider

0 commit comments

Comments
 (0)