Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 73 additions & 65 deletions airflow/dags/Bugs_DAG.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,94 +2,97 @@
import json
from datetime import datetime, timedelta

import pandas as pd
from bugs import BugsChartPeriod, BugsChartType, ChartData
import requests
from plugins.bugs import BugsChartPeriod, BugsChartType, ChartData
from plugins.get_artist_data import get_artist_genre, search_artist_id
from scripts.get_access_token import get_token

from airflow import DAG
from airflow.models import Variable
from airflow.operators.python import PythonOperator
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook

"""
your-s3-bucket-name을 실제 S3 버킷명으로 바꾸고,
✅ Snowflake 연결 정보 및 테이블명을 맞게 설정
"""

# 날짜 설정
TODAY = datetime.now().strftime("%Y%m%d")

# 파일 경로
JSON_PATH = f"/opt/airflow/data/bugs_chart_{TODAY}.json"
CSV_PATH = f"/opt/airflow/data/bugs_chart_{TODAY}.csv"

# S3 설정
S3_BUCKET = "de5-s4tify"
S3_JSON_KEY = f"raw_data/bugs_chart/bugs_chart_{TODAY}.json"
S3_CSV_KEY = f"raw_data/bugs_chart/bugs_chart_{TODAY}.csv"

"""
# Snowflake 설정
SNOWFLAKE_CONN_ID = "S4tify_SnowFlake"
SNOWFLAKE_TABLE = "raw_data"
"""
S3_CSV_KEY = f"raw_data/bugs_chart_with_genre_{TODAY}.csv"
LOCAL_FILE_PATH = f"/opt/airflow/data/bugs_chart_with_genre_{TODAY}.csv"


# 1. Bugs 차트 데이터 가져오기 및 JSON 저장
# 1. Bugs 차트 데이터 가져오기 및 JSON 변환
def fetch_bugs_chart():
chart = ChartData(
chartType=BugsChartType.All,
chartPeriod=BugsChartPeriod.Realtime,
fetch=True)
chart_data = {
"date": chart.date.strftime("%Y-%m-%d %H:%M:%S"),
"entries": [
chart_data = {"date": chart.date.strftime(
"%Y-%m-%d %H:%M:%S"), "entries": []}
for entry in chart.entries:
print(f"📊 차트 데이터 처리: {entry.rank}. {entry.title} - {entry.artist}")

artist_id = search_artist_id(entry.artist)
genre = get_artist_genre(artist_id)

chart_data["entries"].append(
{
"rank": entry.rank,
"title": entry.title,
"artist": entry.artist,
"lastPos": entry.lastPos,
"peakPos": entry.peakPos,
"image": entry.image,
"genres": genre.split(", ") if genre else [], # ✅ 리스트 변환,
}
for entry in chart.entries
],
}
with open(JSON_PATH, "w", encoding="utf-8") as f:
json.dump(chart_data, f, ensure_ascii=False, indent=4)
print(f"✅ JSON 저장 완료: {JSON_PATH}")
)
return chart_data


# 2. JSON → CSV 변환
def convert_json_to_csv():
with open(JSON_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
fields = ["rank", "title", "artist", "lastPos", "peakPos", "image"]
with open(CSV_PATH, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fields)
writer.writeheader()
for entry in data["entries"]:
writer.writerow(entry)
print(f"✅ CSV 변환 완료: {CSV_PATH}")


# 3. AWS S3 업로드
def upload_to_s3():
def convert_json_to_csv(**kwargs):
ti = kwargs["ti"]
data = ti.xcom_pull(task_ids="fetch_bugs_chart")
csv_data = [["rank", "title", "artist",
"lastPos", "peakPos", "image", "genre"]]
for entry in data["entries"]:
csv_data.append(
[
entry["rank"],
entry["title"],
entry["artist"],
entry["lastPos"],
entry["peakPos"],
entry["image"],
json.dumps(
entry["genres"], ensure_ascii=False
), # ✅ 리스트를 문자열로 변환하여 저장
]
)
csv_string = "\n".join(",".join(map(str, row)) for row in csv_data)
return csv_string


# 3. 로컬에 CSV 저장 (테스트용, 삭제 용이하도록 별도 함수)
def save_csv_locally(csv_string):
with open(LOCAL_FILE_PATH, "w", encoding="utf-8") as f:
f.write(csv_string)


# 4. AWS S3 업로드
def upload_to_s3(**kwargs):
ti = kwargs["ti"]
csv_string = ti.xcom_pull(task_ids="convert_json_to_csv")
# save_csv_locally(csv_string) # 테스트용 로컬 저장
s3_hook = S3Hook(aws_conn_id="S4tify_S3")
# s3_hook.load_file(filename=JSON_PATH, key=S3_JSON_KEY, bucket_name=S3_BUCKET, replace=True)
s3_hook.load_file(
filename=CSV_PATH, key=S3_CSV_KEY, bucket_name=S3_BUCKET, replace=True
)
s3_hook.load_string(
csv_string,
key=S3_CSV_KEY,
bucket_name=S3_BUCKET,
replace=True)
print(f"✅ S3 업로드 완료: {S3_CSV_KEY}")


"""# 4. Snowflake 업로드
def upload_to_snowflake():
snowflake_hook = SnowflakeHook(snowflake_conn_id=SNOWFLAKE_CONN_ID)
df = pd.read_csv(CSV_PATH)
snowflake_hook.run(f"DELETE FROM {SNOWFLAKE_TABLE} WHERE DATE = '{TODAY}';")
snowflake_hook.insert_rows(table=SNOWFLAKE_TABLE, rows=df.values.tolist(), target_fields=df.columns.tolist())
print(f"✅ Snowflake 업로드 완료: {SNOWFLAKE_TABLE}")
"""
# DAG 설정
default_args = {
"owner": "airflow",
Expand All @@ -106,28 +109,33 @@ def upload_to_snowflake():
catchup=False,
) as dag:

get_spotify_token_task = PythonOperator(
task_id="get_spotify_token",
python_callable=get_token, # ✅ 먼저 실행해서 Variable 갱신
provide_context=True,
)

fetch_bugs_chart_task = PythonOperator(
task_id="fetch_bugs_chart",
python_callable=fetch_bugs_chart,
provide_context=True,
)

convert_json_to_csv_task = PythonOperator(
task_id="convert_json_to_csv",
python_callable=convert_json_to_csv,
provide_context=True,
)

upload_s3_task = PythonOperator(
task_id="upload_to_s3",
python_callable=upload_to_s3,
provide_context=True,
)
"""
upload_snowflake_task = PythonOperator(
task_id="upload_to_snowflake",
python_callable=upload_to_snowflake,
)
"""

# DAG 실행 순서
(
fetch_bugs_chart_task >> convert_json_to_csv_task >> upload_s3_task
) # upload_snowflake_task
get_spotify_token_task
>> fetch_bugs_chart_task
>> convert_json_to_csv_task
>> upload_s3_task
)
Loading