Skip to content

Commit bc14efa

Browse files
authored
Corrige update de raspadores no banco (#1068)
#### Descrição Ao adicionar um novo raspador no projeto, a tabela no banco de raspadores não está sendo atualizada automaticamente. Este PR verifica quais raspadores estão no projeto e não estão no banco para adicioná-los pelo menos ao executar um novo raspador (idealmente seria um comando a parte).
2 parents 9520f6d + e3b33d3 commit bc14efa

File tree

1 file changed

+34
-29
lines changed

1 file changed

+34
-29
lines changed

data_collection/gazette/database/models.py

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -49,46 +49,51 @@ def load_territories(engine):
4949
logger.info("Populating 'territories' table - Done!")
5050

5151

52+
def get_new_spiders(session, territory_spider_map):
53+
registered_spiders = session.query(QueridoDiarioSpider).all()
54+
registered_spiders_set = {
55+
(spider.spider_name, territory.id, spider.date_from)
56+
for spider in registered_spiders
57+
for territory in spider.territories
58+
}
59+
only_new_spiders = [
60+
spider_info
61+
for spider_info in territory_spider_map
62+
if spider_info not in registered_spiders_set
63+
]
64+
return only_new_spiders
65+
66+
5267
def load_spiders(engine, territory_spider_map):
5368
Session = sessionmaker(bind=engine)
5469
session = Session()
5570

56-
if session.query(QueridoDiarioSpider).count() > 0:
57-
return
71+
table_is_populated = session.query(QueridoDiarioSpider).count() > 0
72+
new_spiders = (
73+
get_new_spiders(session, territory_spider_map)
74+
if table_is_populated
75+
else territory_spider_map
76+
)
5877

5978
logger.info("Populating 'querido_diario_spider' table - Please wait!")
6079

61-
spiders = []
62-
territory_ids = set()
63-
for info in territory_spider_map:
64-
spider_name, territory_id, date_from = info
65-
spiders.append(
66-
QueridoDiarioSpider(spider_name=spider_name, date_from=date_from)
67-
)
68-
territory_ids.add(territory_id)
69-
70-
session.add_all(spiders)
71-
session.commit()
72-
73-
spiders = (
74-
session.query(QueridoDiarioSpider)
75-
.filter(
76-
QueridoDiarioSpider.spider_name.in_(set(s[0] for s in territory_spider_map))
77-
)
78-
.all()
79-
)
80-
spider_map = {spider.spider_name: spider for spider in spiders}
81-
82-
territories = session.query(Territory).filter(Territory.id.in_(territory_ids)).all()
80+
territories = session.query(Territory).all()
8381
territory_map = {t.id: t for t in territories}
8482

85-
for info in territory_spider_map:
86-
spider_name, territory_id, _ = info
87-
spider = spider_map.get(spider_name)
83+
spiders = []
84+
for info in new_spiders:
85+
spider_name, territory_id, date_from = info
8886
territory = territory_map.get(territory_id)
89-
if spider is not None and territory is not None:
90-
spider.territories.append(territory)
87+
if territory is not None:
88+
spiders.append(
89+
QueridoDiarioSpider(
90+
spider_name=spider_name,
91+
date_from=date_from,
92+
territories=[territory],
93+
)
94+
)
9195

96+
session.add_all(spiders)
9297
session.commit()
9398
logger.info("Populating 'querido_diario_spider' table - Done!")
9499

0 commit comments

Comments
 (0)