Skip to content

Commit

Permalink
Corrige modificação do banco em atualização de raspadores (#1151)
Browse files Browse the repository at this point in the history
#### Descrição

Ao modificar o `TERRITORY_ID` ou o `start_date` de um raspador, um
erro ocorria pois o raspador era detectado como um novo raspador e
a nova entrada no banco era bloqueada pelo nome como chave
primária.

Agora, atualizações em `TERRITORY_ID` e `start_date` podem ser
realizadas e os campos serão atualizados no banco sem a tentativa
de inserção de novo registro.
  • Loading branch information
ogecece authored May 29, 2024
2 parents e67c439 + 7e3849d commit 29b69e7
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions data_collection/gazette/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,28 @@ def load_territories(engine):
logger.info("Populating 'territories' table - Done!")


def get_new_spiders(session, territory_spider_map):
def get_new_or_modified_spiders(session, territory_spider_map):
registered_spiders = session.query(QueridoDiarioSpider).all()
registered_spiders_set = {
(spider.spider_name, territory.id, spider.date_from)
for spider in registered_spiders
for territory in spider.territories
}
only_new_spiders = [
only_new_or_modified_spiders = [
spider_info
for spider_info in territory_spider_map
if spider_info not in registered_spiders_set
]
return only_new_spiders
return only_new_or_modified_spiders


def load_spiders(engine, territory_spider_map):
Session = sessionmaker(bind=engine)
session = Session()

table_is_populated = session.query(QueridoDiarioSpider).count() > 0
new_spiders = (
get_new_spiders(session, territory_spider_map)
spiders_to_persist = (
get_new_or_modified_spiders(session, territory_spider_map)
if table_is_populated
else territory_spider_map
)
Expand All @@ -80,20 +80,18 @@ def load_spiders(engine, territory_spider_map):
territories = session.query(Territory).all()
territory_map = {t.id: t for t in territories}

spiders = []
for info in new_spiders:
for info in spiders_to_persist:
spider_name, territory_id, date_from = info
territory = territory_map.get(territory_id)
if territory is not None:
spiders.append(
session.merge(
QueridoDiarioSpider(
spider_name=spider_name,
date_from=date_from,
territories=[territory],
)
)

session.add_all(spiders)
session.commit()
logger.info("Populating 'querido_diario_spider' table - Done!")

Expand Down

0 comments on commit 29b69e7

Please sign in to comment.