DEV-S4tify · Yun024 · Mar 4, 2025 · Mar 5, 2025
diff --git a/merge_csv_top200.py b/merge_csv_top200.py
@@ -0,0 +1,43 @@
+
+import pandas as pd
+import os
+from tqdm import tqdm
+from datetime import datetime, timedelta
+
+def merge_csv(source, destination):
+    # 통합 csv dataframe 생성
+    header = ['rank','track_id','artist_names','track_name','source','peak_rank','previous_rank','weeks_on_chart','streams', 'country_code','date']
+    combined_df = pd.DataFrame(columns=header)
+
+    # 파일명을 기반으로 국가명과 일자 정보를 추출하여 데이터에 추가
+    directory = source
+    for filename in tqdm(os.listdir(directory)):
+        if filename.endswith('.csv'):
+            file_path = os.path.join(directory, filename)
+            _, country, _, year, month, day = filename.split('-')
+
+            # 기존 데이터 로드
+            df = pd.read_csv(file_path,encoding='utf-8')
+
+            # uri에서 track_id만 추출하여 컬럼명 변경하여 저장       
+            df['uri'] = df['uri'].str.split(':').str[-1]
+            df.rename(columns={'uri': 'track_id'}, inplace=True)
+
+            # 국가명과 일자 정보 추가 
+            df['country_code'] = country
+            df['date'] = datetime(int(year), int(month), int(day.split('.')[0]))
+
+            # DataFrame을 통합
+            combined_df = pd.concat([combined_df, df], ignore_index=True)
+            print(filename)
+
+    # 수정된 데이터를 새로운 파일로 저장
+    combined_df.to_csv(destination, index=False,encoding='utf-8-sig')
+
+directory = 'C:/Users/Yeojun/s4tify/weekly_Top_Songs/'
+new_filename = "weekly_top200_combined.csv"
+
+source = 'C:/Users/Yeojun/Downloads/'
+destination = os.path.join(directory, new_filename)
+
+merge_csv(source, destination)
diff --git a/weekly_top200_crawler.py b/weekly_top200_crawler.py
@@ -0,0 +1,52 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from webdriver_manager.chrome import ChromeDriverManager
+import time, os
+from datetime import datetime, timedelta
+from dotenv import load_dotenv
+load_dotenv("C://Users/Yeojun/.env")
+from tqdm import tqdm
+
+def get_weekly_top200_songs(countries, year=3):
+
+    dates = []
+
+    current_date = datetime.now() - timedelta(days=5)
+    end_date = current_date - timedelta(days=(year * 365))
+    while current_date >= end_date:
+        date_string = current_date.strftime("%Y-%m-%d")
+        dates.append(date_string)
+        current_date -= timedelta(weeks=1)
+
+
+    options = webdriver.ChromeOptions()
+    options.add_argument("--headless")
+    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+
+    with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
+        driver.get("https://charts.spotify.com/charts/view/regional-global-weekly/latest")
+        time.sleep(2)
+
+        login_button = driver.find_element(By.XPATH, '//*[@id="__next"]/div/div/header/div/div[2]/a/span[1]')
+        login_button.click()
+
+        id_input = driver.find_element(By.XPATH, '//*[@id="login-username"]').send_keys(os.getenv("SPOTIFY_ID"))
+        password_input = driver.find_element(By.XPATH, '//*[@id="login-password"]').send_keys(os.getenv("SPOTIFY_PASS"))
+        login_button = driver.find_element(By.XPATH, '//*[@id="login-button"]')
+        login_button.click()
+        time.sleep(3)
+
+        for country in tqdm(countries):
+            for date in dates:
+                driver.get(f"https://charts.spotify.com/charts/view/regional-{country}-weekly/{date}")
+
+                time.sleep(2)
+
+                csv_download_button = driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[3]/div/div/div[2]/span')
+                csv_download_button.click()
+
+                time.sleep(2)
+
+countries = ['global','kr','us']
+get_weekly_top200_songs(countries)