From 644f29858439f070baf0944bc8be6e8bf0df197b Mon Sep 17 00:00:00 2001
From: Yeojun <52143231+Yun024@users.noreply.github.com>
Date: Tue, 4 Mar 2025 19:21:06 +0900
Subject: [PATCH 1/2] [Upload] spotify_weekly_top200_crawler

---
 merge_csv_top200.py      | 43 +++++++++++++++++++++++++++++++++
 weekly_top200_crawler.py | 52 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 merge_csv_top200.py
 create mode 100644 weekly_top200_crawler.py

diff --git a/merge_csv_top200.py b/merge_csv_top200.py
new file mode 100644
index 0000000..cbd5e37
--- /dev/null
+++ b/merge_csv_top200.py
@@ -0,0 +1,43 @@
+
+import pandas as pd
+import os
+from tqdm import tqdm
+from datetime import datetime, timedelta
+
+def merge_csv(source, destination):
+    # 통합 csv dataframe 생성
+    header = ['rank','track_id','artist_names','track_name','source','peak_rank','previous_rank','weeks_on_chart','streams', 'country_code','date']
+    combined_df = pd.DataFrame(columns=header)
+
+    # 파일명을 기반으로 국가명과 일자 정보를 추출하여 데이터에 추가
+    directory = source
+    for filename in tqdm(os.listdir(directory)):
+        if filename.endswith('.csv'):
+            file_path = os.path.join(directory, filename)
+            _, country, _, year, month, day = filename.split('-')
+
+            # 기존 데이터 로드
+            df = pd.read_csv(file_path)
+
+            # uri에서 track_id만 추출하여 컬럼명 변경하여 저장       
+            df['uri'] = df['uri'].str.split(':').str[-1]
+            df.rename(columns={'uri': 'track_id'}, inplace=True)
+
+            # 국가명과 일자 정보 추가 
+            df['country_code'] = country
+            df['date'] = datetime(int(year), int(month), int(day.split('.')[0]))
+            
+            # DataFrame을 통합
+            combined_df = pd.concat([combined_df, df], ignore_index=True)
+            print(filename)
+            
+    # 수정된 데이터를 새로운 파일로 저장
+    combined_df.to_csv(destination, index=False)
+
+directory = 'C:/Users/Yeojun/s4tify/weekly_Top_Songs/'
+new_filename = "weekly_top200_combined.csv"
+
+source = 'C:/Users/Yeojun/Downloads/'
+destination = os.path.join(directory, new_filename)
+
+merge_csv(source, destination)
\ No newline at end of file
diff --git a/weekly_top200_crawler.py b/weekly_top200_crawler.py
new file mode 100644
index 0000000..8f843ce
--- /dev/null
+++ b/weekly_top200_crawler.py
@@ -0,0 +1,52 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from webdriver_manager.chrome import ChromeDriverManager
+import time, os
+from datetime import datetime, timedelta
+from dotenv import load_dotenv
+load_dotenv("C://Users/Yeojun/.env")
+from tqdm import tqdm
+
+def get_weekly_top200_songs(countries, year=3):
+
+    dates = []
+
+    current_date = datetime.now() - timedelta(days=5)
+    end_date = current_date - timedelta(days=(year * 365))
+    while current_date >= end_date:
+        date_string = current_date.strftime("%Y-%m-%d")
+        dates.append(date_string)
+        current_date -= timedelta(weeks=1)
+
+
+    options = webdriver.ChromeOptions()
+    options.add_argument("--headless")
+    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+
+    with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
+        driver.get("https://charts.spotify.com/charts/view/regional-global-weekly/latest")
+        time.sleep(2)
+
+        login_button = driver.find_element(By.XPATH, '//*[@id="__next"]/div/div/header/div/div[2]/a/span[1]')
+        login_button.click()
+
+        id_input = driver.find_element(By.XPATH, '//*[@id="login-username"]').send_keys(os.getenv("SPOTIFY_ID"))
+        password_input = driver.find_element(By.XPATH, '//*[@id="login-password"]').send_keys(os.getenv("SPOTIFY_PASS"))
+        login_button = driver.find_element(By.XPATH, '//*[@id="login-button"]')
+        login_button.click()
+        time.sleep(3)
+
+        for country in tqdm(countries):
+            for date in dates:
+                driver.get(f"https://charts.spotify.com/charts/view/regional-{country}-weekly/{date}")
+
+                time.sleep(2)
+
+                csv_download_button = driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[3]/div/div/div[2]/span')
+                csv_download_button.click()
+
+                time.sleep(2)
+
+countries = ['global','kr','us']
+get_weekly_top200_songs(countries)
\ No newline at end of file

From df92ad2a497df85b0742362477380976e20d9d13 Mon Sep 17 00:00:00 2001
From: Yeojun <52143231+Yun024@users.noreply.github.com>
Date: Wed, 5 Mar 2025 10:39:48 +0900
Subject: [PATCH 2/2] [Update] change_write_encoding

---
 merge_csv_top200.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/merge_csv_top200.py b/merge_csv_top200.py
index cbd5e37..4e7db03 100644
--- a/merge_csv_top200.py
+++ b/merge_csv_top200.py
@@ -17,7 +17,7 @@ def merge_csv(source, destination):
             _, country, _, year, month, day = filename.split('-')
 
             # 기존 데이터 로드
-            df = pd.read_csv(file_path)
+            df = pd.read_csv(file_path,encoding='utf-8')
 
             # uri에서 track_id만 추출하여 컬럼명 변경하여 저장       
             df['uri'] = df['uri'].str.split(':').str[-1]
@@ -32,7 +32,7 @@ def merge_csv(source, destination):
             print(filename)
             
     # 수정된 데이터를 새로운 파일로 저장
-    combined_df.to_csv(destination, index=False)
+    combined_df.to_csv(destination, index=False,encoding='utf-8-sig')
 
 directory = 'C:/Users/Yeojun/s4tify/weekly_Top_Songs/'
 new_filename = "weekly_top200_combined.csv"
@@ -40,4 +40,4 @@ def merge_csv(source, destination):
 source = 'C:/Users/Yeojun/Downloads/'
 destination = os.path.join(directory, new_filename)
 
-merge_csv(source, destination)
\ No newline at end of file
+merge_csv(source, destination)