From 2c02fb0b643b468c6f9e2c6b6839a4a711aede36 Mon Sep 17 00:00:00 2001
From: trevineju <julianabtrevine@gmail.com>
Date: Wed, 8 Jan 2025 21:43:17 -0300
Subject: [PATCH] =?UTF-8?q?Atualiza=20base=20moderniza=C3=A7=C3=A3o?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data_collection/gazette/spiders/base/modernizacao.py  | 9 +++++----
 data_collection/gazette/spiders/rj/rj_belford_roxo.py | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/data_collection/gazette/spiders/base/modernizacao.py b/data_collection/gazette/spiders/base/modernizacao.py
index 00dcef415..6b25fd318 100644
--- a/data_collection/gazette/spiders/base/modernizacao.py
+++ b/data_collection/gazette/spiders/base/modernizacao.py
@@ -11,6 +11,8 @@
 class BaseModernizacaoSpider(BaseGazetteSpider):
     power = "executive_legislative"
     ver_subpath = "ver20230623"
+    filter_endpoint = "diario_oficial_get"
+    edition_endpoint = "WEB-ObterAnexo.rule"
 
     custom_settings = {
         "CONCURRENT_REQUESTS": 4,
@@ -19,7 +21,7 @@ class BaseModernizacaoSpider(BaseGazetteSpider):
 
     def start_requests(self):
         domain = self.allowed_domains[0]
-        base_url = f"https://{domain}/diario_oficial_get.php"
+        base_url = f"https://{domain}/{self.filter_endpoint}.php"
         initial_date = date(self.start_date.year, self.start_date.month, 1)
 
         for monthly_date in rrule(
@@ -29,20 +31,19 @@ def start_requests(self):
             yield scrapy.FormRequest(
                 method="GET",
                 url=base_url,
-                formdata={"mesano": month_year},
+                formdata={"mes_ano": month_year},
             )
 
     def parse(self, response):
         for gazette_data in response.json():
             raw_gazette_date = gazette_data["Data_Formatada"]
-            raw_gazette_date
             gazette_date = datetime.strptime(raw_gazette_date, "%d/%m/%Y").date()
             if not self.start_date <= gazette_date <= self.end_date:
                 continue
 
             gazette_code = gazette_data["Codigo_ANEXO"]
             gazette_url = response.urljoin(
-                f"{self.ver_subpath}/WEB-ObterAnexo.rule?sys=LAI&codigo={gazette_code}"
+                f"{self.ver_subpath}/{self.edition_endpoint}?sys=LAI&codigo={gazette_code}"
             )
 
             raw_edition_number = gazette_data["ANEXO"]
diff --git a/data_collection/gazette/spiders/rj/rj_belford_roxo.py b/data_collection/gazette/spiders/rj/rj_belford_roxo.py
index acd3c5e2c..c0ee9f505 100644
--- a/data_collection/gazette/spiders/rj/rj_belford_roxo.py
+++ b/data_collection/gazette/spiders/rj/rj_belford_roxo.py
@@ -9,3 +9,5 @@ class RjBelfordRoxoSpider(BaseModernizacaoSpider):
     allowed_domains = ["transparencia.prefeituradebelfordroxo.rj.gov.br"]
     start_date = date(2019, 1, 2)
     power = "executive"
+    edition_endpoint = "WEB-ObterAnexomaior.rule"
+    filter_endpoint = "diario_oficial_getmaior"