@@ -49,46 +49,51 @@ def load_territories(engine):
49
49
logger .info ("Populating 'territories' table - Done!" )
50
50
51
51
52
+ def get_new_spiders (session , territory_spider_map ):
53
+ registered_spiders = session .query (QueridoDiarioSpider ).all ()
54
+ registered_spiders_set = {
55
+ (spider .spider_name , territory .id , spider .date_from )
56
+ for spider in registered_spiders
57
+ for territory in spider .territories
58
+ }
59
+ only_new_spiders = [
60
+ spider_info
61
+ for spider_info in territory_spider_map
62
+ if spider_info not in registered_spiders_set
63
+ ]
64
+ return only_new_spiders
65
+
66
+
52
67
def load_spiders (engine , territory_spider_map ):
53
68
Session = sessionmaker (bind = engine )
54
69
session = Session ()
55
70
56
- if session .query (QueridoDiarioSpider ).count () > 0 :
57
- return
71
+ table_is_populated = session .query (QueridoDiarioSpider ).count () > 0
72
+ new_spiders = (
73
+ get_new_spiders (session , territory_spider_map )
74
+ if table_is_populated
75
+ else territory_spider_map
76
+ )
58
77
59
78
logger .info ("Populating 'querido_diario_spider' table - Please wait!" )
60
79
61
- spiders = []
62
- territory_ids = set ()
63
- for info in territory_spider_map :
64
- spider_name , territory_id , date_from = info
65
- spiders .append (
66
- QueridoDiarioSpider (spider_name = spider_name , date_from = date_from )
67
- )
68
- territory_ids .add (territory_id )
69
-
70
- session .add_all (spiders )
71
- session .commit ()
72
-
73
- spiders = (
74
- session .query (QueridoDiarioSpider )
75
- .filter (
76
- QueridoDiarioSpider .spider_name .in_ (set (s [0 ] for s in territory_spider_map ))
77
- )
78
- .all ()
79
- )
80
- spider_map = {spider .spider_name : spider for spider in spiders }
81
-
82
- territories = session .query (Territory ).filter (Territory .id .in_ (territory_ids )).all ()
80
+ territories = session .query (Territory ).all ()
83
81
territory_map = {t .id : t for t in territories }
84
82
85
- for info in territory_spider_map :
86
- spider_name , territory_id , _ = info
87
- spider = spider_map . get ( spider_name )
83
+ spiders = []
84
+ for info in new_spiders :
85
+ spider_name , territory_id , date_from = info
88
86
territory = territory_map .get (territory_id )
89
- if spider is not None and territory is not None :
90
- spider .territories .append (territory )
87
+ if territory is not None :
88
+ spiders .append (
89
+ QueridoDiarioSpider (
90
+ spider_name = spider_name ,
91
+ date_from = date_from ,
92
+ territories = [territory ],
93
+ )
94
+ )
91
95
96
+ session .add_all (spiders )
92
97
session .commit ()
93
98
logger .info ("Populating 'querido_diario_spider' table - Done!" )
94
99
0 commit comments