From 2757fac1bc44bfce95dfcf1ddf4b4a74eda5542a Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Thu, 6 Oct 2022 22:55:06 +0200
Subject: [PATCH 1/4] Add alive_progress - Known issue with my current logging
 configs and alive_progress

---
 main.py          | 39 ++++++++++++++++++++++++++++-----------
 requirements.txt |  1 +
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/main.py b/main.py
index 7c0752e3..f5e14179 100644
--- a/main.py
+++ b/main.py
@@ -2,8 +2,11 @@
 import logging.config
 import logging
 import time
+import alive_progress
 import scraper
 
+alive_progress.config_handler.set_global(ctrl_c=False, dual_line=True, theme="classic", stats=False)
+
 
 def main():
     args = scraper.argparse_setup()
@@ -49,10 +52,19 @@ def scrape():
     products = [scraper.Scraper(category, url) for category, url in zip(products_df["category"], products_df["url"])]
 
     # Scrape and save scraped data for each product (sequentially)
-    for product in products:
-        time.sleep(request_delay)
-        product.scrape_info()
-        product.save_info()
+    # for product in products:
+    #     time.sleep(request_delay)
+    #     product.scrape_info()
+    #     product.save_info()
+
+    with alive_progress.alive_bar(len(products), title="Scraping") as bar:
+        # Scrape and save scraped data for each product (sequentially)
+        for product in products:
+            bar.text = f"-> {product.url}"
+            time.sleep(request_delay)
+            product.scrape_info()
+            product.save_info()
+            bar()
 
 
 def scrape_with_threads():
@@ -67,14 +79,16 @@ def scrape_with_threads():
     # Create threads
     threads = [threading.Thread(target=product.scrape_info) for product in products]
 
-    # Start scraping on all threads
-    for thread in threads:
-        time.sleep(request_delay)
-        thread.start()
+    with alive_progress.alive_bar(len(products), title="Scraping") as bar:
+        # Start scraping on all threads
+        for thread in threads:
+            time.sleep(request_delay)
+            thread.start()
 
-    # Wait for all threads to finish
-    for thread in threads:
-        thread.join()
+        # Wait for all threads to finish
+        for thread in threads:
+            thread.join()
+            bar()
 
     # Save scraped data for each product (sequentially)
     for product in products:
@@ -82,6 +96,9 @@ def scrape_with_threads():
 
 
 if __name__ == "__main__":
+
+    # DON'T MERGE WITH MASTER BRANCH: KNOWN ISSUE: https://github.com/rsalmei/alive-progress/issues/155
+    # alive_progress crashes with the below logging config settings
     logging.config.fileConfig(
         fname=scraper.Filemanager.logging_ini_path,
         defaults={"logfilename": scraper.Filemanager.logfile_path},
diff --git a/requirements.txt b/requirements.txt
index cf44c310..0ee0a012 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ plotly>=4.12.0
 pandas>=1.1.3
 pytest>=7.1.2
 pytest-mock>=3.8.2
+alive-progress>=2.4.1

From d1619f39424e3a54b8dbe9c3a6b5b07843622915 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Mon, 2 Jan 2023 23:30:37 +0100
Subject: [PATCH 2/4] With version 3.0 of alive-progress, there is no issue
 with my logging setup and alive-progress

---
 main.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/main.py b/main.py
index 8a359f77..bf83c4a6 100644
--- a/main.py
+++ b/main.py
@@ -96,9 +96,6 @@ def scrape_with_threads():
 
 
 if __name__ == "__main__":
-
-    # DON'T MERGE WITH MASTER BRANCH: KNOWN ISSUE: https://github.com/rsalmei/alive-progress/issues/155
-    # alive_progress crashes with the below logging config settings
     logging.config.fileConfig(
         fname=scraper.Filemanager.logging_ini_path,
         defaults={"logfilename": scraper.Filemanager.logfile_path},

From 41510f0d22af84ba129160f7d6d2a8bfec94b8f6 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Mon, 30 Jan 2023 19:57:12 +0100
Subject: [PATCH 3/4] Update functions scrape_with_threads and
 start_threads_sequentially

Add alive_progress context manager to function scrape_with_threads
Add parameter to function start_threads_sequentially to pass a alive_progress bar to
---
 main.py           | 25 +++++++++++++------------
 scraper/scrape.py |  5 ++++-
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/main.py b/main.py
index f750a1fb..d5b2515a 100644
--- a/main.py
+++ b/main.py
@@ -84,21 +84,22 @@ def scrape_with_threads() -> None:
         scraper_threads = [threading.Thread(target=product.scrape_info) for product in products]
         grouped_scraper_threads.append(scraper_threads)
 
-    # Create master threads to manage scraper threads sequentially for each domain
-    master_threads = [
-        threading.Thread(target=scraper.start_threads_sequentially, args=[scraper_threads, request_delay])
-        for scraper_threads in grouped_scraper_threads
-    ]
+    products_flatten = [product for products in grouped_products.values() for product in products]
 
-    # Start all master threads
-    for master_thread in master_threads:
-        master_thread.start()
+    with alive_progress.alive_bar(len(products_flatten), title="Scraping with threads") as progress_bar:
+        # Create master threads to manage scraper threads sequentially for each domain
+        master_threads = [
+            threading.Thread(target=scraper.start_threads_sequentially, args=[scraper_threads, request_delay, progress_bar])
+            for scraper_threads in grouped_scraper_threads
+        ]
 
-    # Wait for all master threads to finish
-    for master_thread in master_threads:
-        master_thread.join()
+        # Start all master threads
+        for master_thread in master_threads:
+            master_thread.start()
 
-    products_flatten = [product for products in grouped_products.values() for product in products]
+        # Wait for all master threads to finish
+        for master_thread in master_threads:
+            master_thread.join()
 
     # Save scraped data for each product (sequentially)
     for product in products_flatten:
diff --git a/scraper/scrape.py b/scraper/scrape.py
index 881a420f..f8d40292 100644
--- a/scraper/scrape.py
+++ b/scraper/scrape.py
@@ -74,8 +74,11 @@ def add_product_datapoint(product_data: dict, price: float) -> None:
         product_datapoints.append(new_datapoint)
 
 
-def start_threads_sequentially(threads: list[threading.Thread], request_delay: int) -> None:
+def start_threads_sequentially(threads: list[threading.Thread], request_delay: int, progress_bar=None) -> None:
     for thread in threads:
         thread.start()
         thread.join()
         time.sleep(request_delay)
+
+        if progress_bar:
+            progress_bar()

From 119f40ea41b0119cdb5bb6f9285b93bc34070994 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Mon, 30 Jan 2023 19:58:16 +0100
Subject: [PATCH 4/4] Delete commented out code

---
 main.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/main.py b/main.py
index d5b2515a..4382183b 100644
--- a/main.py
+++ b/main.py
@@ -52,12 +52,6 @@ def scrape() -> None:
     # Create instances of class "Scraper"
     products = [scraper.Scraper(category, url) for category, url in zip(products_df["category"], products_df["url"])]
 
-    # Scrape and save scraped data for each product (sequentially)
-    # for product in products:
-    #     time.sleep(request_delay)
-    #     product.scrape_info()
-    #     product.save_info()
-
     with alive_progress.alive_bar(len(products), title="Scraping") as bar:
         # Scrape and save scraped data for each product (sequentially)
         for product in products: