DEV-S4tify · gland78 · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/airflow/dags/Bugs_DAG.py b/airflow/dags/Bugs_DAG.py
@@ -2,94 +2,97 @@
 import json
 from datetime import datetime, timedelta
 
-import pandas as pd
-from bugs import BugsChartPeriod, BugsChartType, ChartData
+import requests
+from plugins.bugs import BugsChartPeriod, BugsChartType, ChartData
+from plugins.get_artist_data import get_artist_genre, search_artist_id
+from scripts.get_access_token import get_token
 
 from airflow import DAG
+from airflow.models import Variable
 from airflow.operators.python import PythonOperator
 from airflow.providers.amazon.aws.hooks.s3 import S3Hook
-from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
-
-"""
-your-s3-bucket-name을 실제 S3 버킷명으로 바꾸고,
-✅ Snowflake 연결 정보 및 테이블명을 맞게 설정
-"""
 
 # 날짜 설정
 TODAY = datetime.now().strftime("%Y%m%d")
 
-# 파일 경로
-JSON_PATH = f"/opt/airflow/data/bugs_chart_{TODAY}.json"
-CSV_PATH = f"/opt/airflow/data/bugs_chart_{TODAY}.csv"
 
-# S3 설정
 S3_BUCKET = "de5-s4tify"
-S3_JSON_KEY = f"raw_data/bugs_chart/bugs_chart_{TODAY}.json"
-S3_CSV_KEY = f"raw_data/bugs_chart/bugs_chart_{TODAY}.csv"
-
-"""
-# Snowflake 설정
-SNOWFLAKE_CONN_ID = "S4tify_SnowFlake"
-SNOWFLAKE_TABLE = "raw_data"
-"""
+S3_CSV_KEY = f"raw_data/bugs_chart_with_genre_{TODAY}.csv"
+LOCAL_FILE_PATH = f"/opt/airflow/data/bugs_chart_with_genre_{TODAY}.csv"
 
 
-# 1. Bugs 차트 데이터 가져오기 및 JSON 저장
+# 1. Bugs 차트 데이터 가져오기 및 JSON 변환
 def fetch_bugs_chart():
     chart = ChartData(
         chartType=BugsChartType.All,
         chartPeriod=BugsChartPeriod.Realtime,
         fetch=True)
-    chart_data = {
-        "date": chart.date.strftime("%Y-%m-%d %H:%M:%S"),
-        "entries": [
+    chart_data = {"date": chart.date.strftime(
+        "%Y-%m-%d %H:%M:%S"), "entries": []}
+    for entry in chart.entries:
+        print(f"📊 차트 데이터 처리: {entry.rank}. {entry.title} - {entry.artist}")
+
+        artist_id = search_artist_id(entry.artist)
+        genre = get_artist_genre(artist_id)
+
+        chart_data["entries"].append(
             {
                 "rank": entry.rank,
                 "title": entry.title,
                 "artist": entry.artist,
                 "lastPos": entry.lastPos,
                 "peakPos": entry.peakPos,
                 "image": entry.image,
+                "genres": genre.split(", ") if genre else [],  # ✅ 리스트 변환,
             }
-            for entry in chart.entries
-        ],
-    }
-    with open(JSON_PATH, "w", encoding="utf-8") as f:
-        json.dump(chart_data, f, ensure_ascii=False, indent=4)
-    print(f"✅ JSON 저장 완료: {JSON_PATH}")
+        )
+    return chart_data
 
 
 # 2. JSON → CSV 변환
-def convert_json_to_csv():
-    with open(JSON_PATH, "r", encoding="utf-8") as f:
-        data = json.load(f)
-    fields = ["rank", "title", "artist", "lastPos", "peakPos", "image"]
-    with open(CSV_PATH, "w", newline="", encoding="utf-8") as csvfile:
-        writer = csv.DictWriter(csvfile, fieldnames=fields)
-        writer.writeheader()
-        for entry in data["entries"]:
-            writer.writerow(entry)
-    print(f"✅ CSV 변환 완료: {CSV_PATH}")
-
-
-# 3. AWS S3 업로드
-def upload_to_s3():
+def convert_json_to_csv(**kwargs):
+    ti = kwargs["ti"]
+    data = ti.xcom_pull(task_ids="fetch_bugs_chart")
+    csv_data = [["rank", "title", "artist",
+                 "lastPos", "peakPos", "image", "genre"]]
+    for entry in data["entries"]:
+        csv_data.append(
+            [
+                entry["rank"],
+                entry["title"],
+                entry["artist"],
+                entry["lastPos"],
+                entry["peakPos"],
+                entry["image"],
+                json.dumps(
+                    entry["genres"], ensure_ascii=False
+                ),  # ✅ 리스트를 문자열로 변환하여 저장
+            ]
+        )
+    csv_string = "\n".join(",".join(map(str, row)) for row in csv_data)
+    return csv_string
+
+
+# 3. 로컬에 CSV 저장 (테스트용, 삭제 용이하도록 별도 함수)
+def save_csv_locally(csv_string):
+    with open(LOCAL_FILE_PATH, "w", encoding="utf-8") as f:
+        f.write(csv_string)
+
+
+# 4. AWS S3 업로드
+def upload_to_s3(**kwargs):
+    ti = kwargs["ti"]
+    csv_string = ti.xcom_pull(task_ids="convert_json_to_csv")
+    # save_csv_locally(csv_string)  # 테스트용 로컬 저장
     s3_hook = S3Hook(aws_conn_id="S4tify_S3")
-    # s3_hook.load_file(filename=JSON_PATH, key=S3_JSON_KEY, bucket_name=S3_BUCKET, replace=True)
-    s3_hook.load_file(
-        filename=CSV_PATH, key=S3_CSV_KEY, bucket_name=S3_BUCKET, replace=True
-    )
+    s3_hook.load_string(
+        csv_string,
+        key=S3_CSV_KEY,
+        bucket_name=S3_BUCKET,
+        replace=True)
     print(f"✅ S3 업로드 완료: {S3_CSV_KEY}")
 
 
-"""# 4. Snowflake 업로드
-def upload_to_snowflake():
-    snowflake_hook = SnowflakeHook(snowflake_conn_id=SNOWFLAKE_CONN_ID)
-    df = pd.read_csv(CSV_PATH)
-    snowflake_hook.run(f"DELETE FROM {SNOWFLAKE_TABLE} WHERE DATE = '{TODAY}';")
-    snowflake_hook.insert_rows(table=SNOWFLAKE_TABLE, rows=df.values.tolist(), target_fields=df.columns.tolist())
-    print(f"✅ Snowflake 업로드 완료: {SNOWFLAKE_TABLE}")
-"""
 # DAG 설정
 default_args = {
     "owner": "airflow",
@@ -106,28 +109,33 @@ def upload_to_snowflake():
     catchup=False,
 ) as dag:
 
+    get_spotify_token_task = PythonOperator(
+        task_id="get_spotify_token",
+        python_callable=get_token,  # ✅ 먼저 실행해서 Variable 갱신
+        provide_context=True,
+    )
+
     fetch_bugs_chart_task = PythonOperator(
         task_id="fetch_bugs_chart",
         python_callable=fetch_bugs_chart,
+        provide_context=True,
     )
 
     convert_json_to_csv_task = PythonOperator(
         task_id="convert_json_to_csv",
         python_callable=convert_json_to_csv,
+        provide_context=True,
     )
 
     upload_s3_task = PythonOperator(
         task_id="upload_to_s3",
         python_callable=upload_to_s3,
+        provide_context=True,
     )
-    """
-    upload_snowflake_task = PythonOperator(
-        task_id="upload_to_snowflake",
-        python_callable=upload_to_snowflake,
-    )
-    """
 
-    # DAG 실행 순서
     (
-        fetch_bugs_chart_task >> convert_json_to_csv_task >> upload_s3_task
-    )  # upload_snowflake_task
+        get_spotify_token_task
+        >> fetch_bugs_chart_task
+        >> convert_json_to_csv_task
+        >> upload_s3_task
+    )