diff --git a/ML/model/weights.pt b/ML/model/weights.pt index 4dcd733..f911979 100644 Binary files a/ML/model/weights.pt and b/ML/model/weights.pt differ diff --git a/ML/src/makexml/MakeScore.py b/ML/src/makexml/MakeScore.py index 681e584..37f382b 100644 --- a/ML/src/makexml/MakeScore.py +++ b/ML/src/makexml/MakeScore.py @@ -1,6 +1,5 @@ from fractions import Fraction -from music21 import chord, stream, note, meter, key, clef, metadata -from music21 import interval +from music21 import chord, stream, note, meter, key, clef, metadata, interval, bar from .ScoreInfo import ScoreInfo from .ScoreIterator import ScoreIterator from .MeasureIterator import MeasureIterator @@ -8,13 +7,16 @@ from .StafflineUtils import StafflineUtils from .IntervalPreset import IntervalPreset from .MakeTestData import MakeTestData -from ML.src.FilePath import BASE_DIR +from .TextProcesser import TextProcesser +from ..FilePath import BASE_DIR import random import string +from PIL import Image import cv2 from ultralytics import YOLO import pandas as pd import os +import numpy as np class MakeScore: # 학습된 모델 위치와 그걸 기반으로 한 모델 객체 @@ -51,10 +53,73 @@ def convert_origin_to_png(origin): png_list = [] return png_list """ + + #추가한 함수 + #staff_line이 겹쳐 탐지된 경우, y좌표 비슷한 줄끼리 병합하여 + #하나의 줄로 만든다. x1=0, x2=image_width로 강제 확장 + @staticmethod + def merge_staff_lines(df: pd.DataFrame, image_width: int, y_threshold: int = 10) -> pd.DataFrame: + staff_lines = df[df["class_name"] == "staff_line"].copy().reset_index(drop=True) + others = df[df["class_name"] != "staff_line"].copy() + + merged = [] + used = [False] * len(staff_lines) + + for i in range(len(staff_lines)): + if used[i]: + continue + + y1_i = staff_lines.loc[i, "y1"] + y2_i = staff_lines.loc[i, "y2"] + conf_i = staff_lines.loc[i, "confidence"] + class_id_i = staff_lines.loc[i, "class_id"] + + group = [staff_lines.loc[i]] + used[i] = True + + for j in range(i + 1, len(staff_lines)): + if used[j]: + continue + y1_j = staff_lines.loc[j, "y1"] + y2_j = staff_lines.loc[j, "y2"] + + if abs(y1_i - y1_j) < y_threshold and abs(y2_i - y2_j) < y_threshold: + group.append(staff_lines.loc[j]) + used[j] = True + + y1_avg = float(np.mean([g["y1"] for g in group])) + y2_avg = float(np.mean([g["y2"] for g in group])) + x1 = 0 + x2 = image_width + conf = max([g["confidence"] for g in group]) + + merged.append({ + "class_id": class_id_i, + "class_name": "staff_line", + "confidence": conf, + "x1": x1, + "y1": y1_avg, + "x2": x2, + "y2": y2_avg, + "x_center": (x1 + x2) / 2, + "y_center": (y1_avg + y2_avg) / 2, + "width": x2 - x1, + "height": y2_avg - y1_avg + }) + + df_merged = pd.DataFrame(merged) + result_df = pd.concat([others, df_merged], ignore_index=True) + result_df = result_df.sort_values(by=["class_name", "x_center", "y_center"]).reset_index(drop=True) + return result_df + + # 모델의 예측 결과를 pandas dataframe으로 변환시켜주는 함수 def convert_result_to_df(result): rows = [] - boxes = result.boxes + boxes = result.boxes + + image_width = result.orig_img.shape[1] + for i in range(len(boxes)): class_id = int(boxes.cls[i]) class_name = result.names[class_id] @@ -75,14 +140,15 @@ def convert_result_to_df(result): rows.append(row) df = pd.DataFrame(rows) - + # 중복되는 staff_line 제거 + df = MakeScore.merge_staff_lines(df, image_width=image_width) # 클래스 이름으로 정렬하고 좌표기준으로 정렬 df_sorted = df.sort_values(by=["class_name", "x_center", "y_center"]) return df_sorted - # 이미지 리스트를 모델을 통해 탑지하는 함수 + # 이미지 리스트를 모델을 통해 탐지하는 함수 @staticmethod def detect_object(img_list): detection_results = [] @@ -165,13 +231,35 @@ def convert_df_to_score(object_dfs, vis_list): # 저장된 dataframe에서 보표에 대한 정보만 들고옴 staff_df = object_df[object_df["class_name"] == "staff_line"].copy() staff_df = staff_df.sort_values(by="y1").reset_index(drop=True) + + # 해당 페이지의 탐지결과에서 가사 영역만 가진 dataframe과 코드 영역만 가진 dataframe + lyrics_df = object_df[object_df["class_name"] == "lyrics"].copy() + harmony_df = object_df[object_df["class_name"] == "harmony"].copy() + # 들고온 보표의 개수만큼 반복문 for staff_index in range(len(staff_df)): row = staff_df.iloc[staff_index] sx1, sy1, sx2, sy2 = int(row["x1"]), int(row["y1"]), int(row["x2"]), int(row["y2"]) + # 해당 보표의 가사만 골라내기 + if staff_index < len(staff_df) - 1: # 마지막 보표가 아닌 경우 + next_row = staff_df.iloc[staff_index+1] + cur_lyrics_df = lyrics_df[ + (lyrics_df["y_center"] > row["y2"]) & + (lyrics_df["y_center"] < next_row["y1"]) + ].copy() + else: # 마지막 보표인 경우 + cur_lyrics_df = lyrics_df[ + (lyrics_df["y_center"] > row["y2"]) + ].copy() + + # 박스쳐진 staff_line에 선이 5개가 안들어가있는 경우가 있어서 y좌표에 약간의 padding을 적용 + y_padding = int(row["height"] * 0.05) + y1_pad = max(0, sy1 - y_padding) + y2_pad = min(vis.shape[0], sy2 + y_padding) + # 이미지에서 잘라냄 - staff_crop = vis[sy1:sy2, sx1:sx2] + staff_crop = vis[y1_pad:y2_pad, sx1:sx2] # OpenCV로 5줄 찾음 staff_lines = StafflineUtils.extract_5lines(staff_crop) @@ -208,6 +296,7 @@ def convert_df_to_score(object_dfs, vis_list): m.append(clef.TrebleClef()) else: m.append(clef.BassClef()) + elif "keysig" in cls: # 조표 keysig = cls.split("_")[1] print("keysig_index: ", cls) @@ -220,6 +309,9 @@ def convert_df_to_score(object_dfs, vis_list): measiter.cur_keysig = keysig_index measiter.interval_list = IntervalPreset.get_interval_list(measiter.cur_clef, measiter.cur_keysig) print(measiter.interval_list) + for el in m.getElementsByClass(key.KeySignature): + m.remove(el) + m.insert(0, key.KeySignature(keysig_index)) m.append(key.KeySignature(keysig_index)) elif "timesig" in cls: # 박자표 @@ -234,18 +326,20 @@ def convert_df_to_score(object_dfs, vis_list): r = note.Rest() r.duration.quarterLength = MakeScore.REST_DURATION_MAP[cls] m.append(r) - print(cls) + #print(cls) elif cls in MakeScore.NOTE_DURATION_MAP: # 음표 duration = MakeScore.NOTE_DURATION_MAP[cls] + c = chord.Chord() # 점 음표 확인 note_box = (row["x1"], row["y1"], row["x2"], row["y2"]) - print(note_box) + #print(note_box) if Pitch.is_dotted_note(note_box, cur_staff_df): duration *= 1.5 print("dot",cls) else: print(cls) + # pitch 계산 head_df = Pitch.find_note_head(cur_staff_note_head, row["x1"], pitch_y_top, row["x2"], pitch_y_bottom) print("음표탐지시도 완료") @@ -257,27 +351,61 @@ def convert_df_to_score(object_dfs, vis_list): n = Pitch.find_pitch_from_y(cur_staff_df, head, staff_lines_global, measiter) if isinstance(n, note.Note): pitches.append(n) - if pitches: - #midi_list = [note_obj.pitch.midi for note_obj in pitches] - - c = chord.Chord([n.pitch for n in pitches]) + if pitches: + c.pitches = [n.pitch for n in pitches] c.duration.quarterLength = duration for i, note_obj in enumerate(pitches): if hasattr(note_obj, "accidental") and note_obj.accidental is not None and note_obj.accidental.displayStatus: c.notes[i].accidental = note_obj.accidental c.notes[i].accidental.displayStatus = True - #c = chord.Chord(pitches) # pitch 리스트로 코드 생성 - #c.duration.quarterLength = duration # 미리 계산한 duration 할당 + # 가사 확인 + lyrics_list = TextProcesser.find_text_list(cur_lyrics_df, row["x1"], row["x2"]) + + lyrics_data = [] + for _, lyric in lyrics_list.iterrows(): + x1, y1, x2, y2 = int(lyric["x1"]), int(lyric["y1"]), int(lyric["x2"]), int(lyric["y2"]) + pad_x = lyric["width"] * 0 + pad_y = lyric["height"] * 0 + y_max, x_max = vis.shape[:2] + crop_x1 = max(x1-pad_x,0) + crop_x2 = min(x2+pad_x,x_max) + crop_y1 = max(y1-pad_y,0) + crop_y2 = min(y2+pad_y,y_max) + lyrics_crop = vis[int(crop_y1):int(crop_y2), int(crop_x1):int(crop_x2)] + crop_pil = Image.fromarray(cv2.cvtColor(lyrics_crop, cv2.COLOR_BGR2RGB)) + # OCR 수행 + text = TextProcesser.detect_text(crop_pil) + # 결과 저장 + lyrics_data.append(text) + for i, lyric in enumerate(lyrics_data): + print(f"탐지된 가사: {lyric}") + lyric_obj = note.Lyric() + lyric_obj.text = lyric + lyric_obj.number = i + 1 + #c.notes[0].lyrics.append(lyric_obj) + c.addLyric(lyric) m.append(c) print(c) + elif cls in ["measure", "measure_double", "measure_final"]: + if cls == "measure_double": + m.rightBarline = bar.Barline("light-light") + elif cls == "measure_final": + m.rightBarline = bar.Barline("light-heavy") + + part.append(m) + measurenum += 1 + m = stream.Measure(number=measurenum) + measiter.interval_list = IntervalPreset.get_interval_list(measiter.cur_clef, measiter.cur_keysig) + + """ elif cls in ["measure", "double_measure"]: part.append(m) measurenum += 1 m = stream.Measure(number=measurenum) measiter.interval_list = IntervalPreset.get_interval_list(measiter.cur_clef, measiter.cur_keysig) - + """ part.append(m) @@ -289,20 +417,30 @@ def convert_df_to_score(object_dfs, vis_list): # 키를 변환하는 함수 # Score 객체와 변환할 값을 정수로 받아서 키를 변환 - # 현재는 -2, -1, 1, 2 만 받음 + # 범위는 -7 ~ +7까지지 @staticmethod - def change_key(score, diff): # -2 -1 1 1 3 4 - if diff > 2 or diff < -2: + def change_key(score, diff): + if diff > 7 or diff < -7: return score if diff == 0: return score else: change = { + -7: "-P5", + -6: "-D5", + -5: "-P4", + -4: "-M3", + -3: "-m3", -2: "-M2", -1: "-m2", 1: "m2", - 2: "M2" + 2: "M2", + 3: "m3", + 4: "M3", + 5: "P4", + 6: "D5", + 7: "P5" } interval_str = change[diff] intv = interval.Interval(interval_str) diff --git a/ML/src/makexml/Pitch.py b/ML/src/makexml/Pitch.py index 24e21ab..5235d8f 100644 --- a/ML/src/makexml/Pitch.py +++ b/ML/src/makexml/Pitch.py @@ -39,67 +39,66 @@ def find_pitch_from_y(staff_df, head, staff_lines, measiter, margin_ratio=0.35): # 기준 생성 margin = gap * margin_ratio positions = [ - (18, L1-5*gap+margin, L1-5*gap-margin), - (17, L1-4*gap+margin, L1-4*gap-margin), - (16, L1-3*gap+margin, L1-3*gap-margin), - (15, L1-2*gap+margin, L1-2*gap-margin), - (14, L1-1*gap+margin, L1-1*gap-margin), - (13, L1+margin, L1-margin), - (12, L1+gap+margin, L1+gap-margin), - (11, L2+margin, L2-margin), - (10, L2+gap+margin, L2+gap-margin), - (9, L3+margin, L3-margin), - (8, L3+gap+margin, L3+gap-margin), - (7, L4+margin, L4-margin), - (6, L4+gap+margin, L4+gap-margin), - (5, L5+margin, L5-margin), - (4, L5+1*gap+margin, L5+1*gap-margin), - (3, L5+2*gap+margin, L5+2*gap-margin), - (2, L5+3*gap+margin, L5+3*gap-margin), - (1, L5+4*gap+margin, L5+4*gap-margin), - (0, L5+5*gap+margin, L5+5*gap-margin), + (18, L1-5*gap), + (17, L1-4*gap), + (16, L1-3*gap), + (15, L1-2*gap), + (14, L1-1*gap), + (13, L1), + (12, L1+gap), + (11, L2), + (10, L2+gap), + (9, L3), + (8, L3+gap), + (7, L4), + (6, L4+gap), + (5, L5), + (4, L5+1*gap), + (3, L5+2*gap), + (2, L5+3*gap), + (1, L5+4*gap), + (0, L5+5*gap), ] + # 가장 가까운 위치 계산 + pitch_centers = [(pitch, position) for pitch, position in positions] + pitch_idx, closest_y = min(pitch_centers, key=lambda p: abs(p[1] - y_center)) + + n = note.Note() + + # 임시표 처리 accidental_df = staff_df[staff_df["class_name"].isin(ACCIDENTAL_CLASSES.keys())].copy() - for pitch, low, high in positions: - #print(low, y_center, high) - if low > y_center > high: - n = note.Note() - - # 임시표 처리 - for _, acc in accidental_df.iterrows(): - ax_target = acc["x2"] - acc["width"] * 0.2 - ay_center = acc["y_center"] - - # y조건: 임시표가 음표 머리 영역 y 안에 있어야 함 - if not (y1 <= ay_center <= y2): - continue - - # x조건: 임시표의 x1, x2,의 5/4 지점이 음표 머리 영역 안에 있어야됨. - head_width = x2 - x1 - threshold_x = x1 - 0.2 * head_width - if x1 <= ax_target <= x2: - # pitch 보정 - adjust = ACCIDENTAL_CLASSES[acc["class_name"]] - if adjust == 1: - interval_list[pitch] += 1 - n.pitch.midi = interval_list[pitch] - n.accidental = pitch.Accidental('sharp') - elif adjust == -1: - interval_list[pitch] -= 1 - n.pitch.midi = interval_list[pitch] - n.accidental = pitch.Accidental('flat') - else: - temp_interval = IntervalPreset.get_interval_list(measiter.cur_clef, 0) - interval_list[pitch] = temp_interval[pitch] - n.pitch.midi = interval_list[pitch] - n.accidental = pitch.Accidental('natural') - return n - n.pitch.midi = interval_list[pitch] - n.accidental = None + for _, acc in accidental_df.iterrows(): + ax_target = acc["x2"] - acc["width"] * 0.2 + ay_center = acc["y_center"] + + # y조건: 임시표가 음표 머리 영역 y 안에 있어야 함 + if not (y1 <= ay_center <= y2): + continue + + # x조건: 임시표가 음표 머리 옆에 있을 경우 + head_width = x2 - x1 + if x1 <= ax_target <= x2: + adjust = ACCIDENTAL_CLASSES[acc["class_name"]] + if adjust == 1: + interval_list[pitch_idx] += 1 + n.pitch.midi = interval_list[pitch_idx] + n.accidental = note.Accidental('sharp') + elif adjust == -1: + interval_list[pitch_idx] -= 1 + n.pitch.midi = interval_list[pitch_idx] + n.accidental = note.Accidental('flat') + else: + temp_interval = IntervalPreset.get_interval_list(measiter.cur_clef, 0) + interval_list[pitch_idx] = temp_interval[pitch_idx] + n.pitch.midi = interval_list[pitch_idx] + n.accidental = note.Accidental('natural') return n - return None # 범위 밖이면 None - + n.pitch.midi = interval_list[pitch_idx] + n.accidental = None + return n + + # 음표 영역 안에 dot_note_head의 중심좌표가 있는지 확인하는 함수 @staticmethod def is_dotted_note(note_box, staff_df): @@ -127,7 +126,7 @@ def find_note_head(head_fd, x1, y1, x2, y2): (head_fd["x_center"] >= x1) & (head_fd["x_center"] <= x2) & (head_fd["y_center"] >= y1) & (head_fd["y_center"] <= y2) ].copy() - print(hits) + #print(hits) if hits.empty: return pd.DataFrame(columns=head_fd.columns) x_base = hits["x_center"].min() diff --git a/ML/src/makexml/TextProcesser.py b/ML/src/makexml/TextProcesser.py new file mode 100644 index 0000000..68f93ce --- /dev/null +++ b/ML/src/makexml/TextProcesser.py @@ -0,0 +1,70 @@ +import easyocr +import json +import numpy as np +from music21 import converter, note +from collections import defaultdict +from PIL import Image + +class TextProcesser: + # easyOCR reader 객체 + reader = easyocr.Reader(['ko','en'], gpu=False) + # 특정 보표의 텍스트(가사&코드)만 담긴 DataFrame과 특정 음표의 x좌표들을 받으면 해당 음표에 해당하는 가사를 추출 + + # 특정 음표에 해당하는 가사들을 추출 + @staticmethod + def find_text_list(text_df, x1, x2): + # 전달된 가사 데이터프레임에서 x 범위 내에 있는 것만 추출 + text_list = text_df[ + (text_df["x_center"] >= x1 ) & + (text_df["x_center"] < x2) + ].copy() + + # 추출 후 y좌표순으로 정렬 + text_list = text_list.sort_values(by="y_center").reset_index(drop=True) + return text_list + + # 전달받은 이미지의 텍스트를 추출 + @staticmethod + def detect_text(img): + if not isinstance(img, np.ndarray): + img = np.array(img) + + results = TextProcesser.reader.readtext(img, detail=0) + + if results : + return "".join(results).strip() + return "" + + # 전달받은 musicxml에서 가사를 추출하여 json으로 변환하기 + # 현재는 mxl을 score로 변환하고 추출하는 방식인데 추후 파라미터를 score로 받을 수도 있음 + @staticmethod + def get_lyrics_json_from_mxl(mxl_path): + score = converter.parse(mxl_path) + notes = list(score.recurse().notes) + + # 절 별로 가사 수집 + lyrics_by_verse = defaultdict(list) + + # 절 번호 저장용 + all_verse_numbers = set() + # 먼저 모든 절 번호 수집 + for n in notes: + for lyric in n.lyrics: + number = int(lyric.number) if lyric.number else 1 + all_verse_numbers.add(number) + + max_verse = max(all_verse_numbers) if all_verse_numbers else 1 + verse_list = list(range(1, max_verse + 1)) + + # 음표 순서대로 각 절의 가사 채우기 + for n in notes: + # 현재 음표에서 실제 있는 절들만 수집 + verse_to_text = {int(lyric.number) if lyric.number else 1: lyric.text.strip() for lyric in n.lyrics} + + for verse in verse_list: + lyrics_by_verse[verse].append(verse_to_text.get(verse, "")) # 없는 절은 빈칸 + + #print(lyrics_by_verse) + return dict(lyrics_by_verse) + + diff --git a/convert_result/b247644d.pdf b/convert_result/b247644d.pdf new file mode 100644 index 0000000..916ce43 Binary files /dev/null and b/convert_result/b247644d.pdf differ diff --git a/convert_result/b247644d.xml b/convert_result/b247644d.xml new file mode 100644 index 0000000..7d67957 --- /dev/null +++ b/convert_result/b247644d.xml @@ -0,0 +1,728 @@ + + + + + b247644d + + b247644d + + Music21 + + 2025-05-19 + music21 v.9.5.0 + + + + + 7 + 40 + + + + + + + + + + + + + 10080 + + 4 + + + + G + 2 + + + + + E + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 5040 + eighth + up + begin + + single + + + + + + E + 4 + + 5040 + eighth + up + end + + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + + + + G + 1 + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 5040 + eighth + + single + + + + + + B + 4 + + 5040 + eighth + + single + + + + + + G + 1 + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + + + + + + B + 4 + + 5040 + eighth + + single + + + + + + + B + 4 + + 5040 + eighth + + + + B + 4 + + 5040 + eighth + + single + + + + + + G + 1 + 4 + + 5040 + eighth + + single + + + + + + 5040 + eighth + + + + B + 4 + + 5040 + eighth + + single + + + + + + B + 4 + + 5040 + eighth + + + + + + + G + 1 + 4 + + 5040 + eighth + + single + + + + + + 5040 + eighth + + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + 10080 + quarter + + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + G + 1 + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 20160 + half + + single + + + + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + + + + + + G + 1 + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + 10080 + quarter + + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + single + + + + + + G + 1 + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + single + + + + + + + + + B + 4 + + 5040 + eighth + down + begin + + single + + + + + + B + 4 + + 5040 + eighth + down + end + + single + + + + + + B + 4 + + 5040 + eighth + down + begin + + single + + + + + + C + 1 + 5 + + 5040 + eighth + down + end + + single + + + + + + + C + 1 + 5 + + 5040 + eighth + + + + B + 4 + + 10080 + quarter + + single + + + + + + 10080 + quarter + + + + + + + E + 5 + + 10080 + quarter + + single + + + + + + B + 4 + + 10080 + quarter + + + + + + + E + 5 + + 10080 + quarter + + single + 9 + + + + + B + 4 + + 10080 + quarter + + + + + + + + + + G + 1 + 4 + + 10080 + quarter + + single + + + + + + F + 1 + 4 + + 10080 + quarter + + single + + + + + + E + 4 + + 10080 + quarter + + + + + + + 10080 + quarter + + + + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c82833d..f95b202 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/run.py b/run.py index 2f398dd..ea4a56b 100644 --- a/run.py +++ b/run.py @@ -4,7 +4,6 @@ app = create_app() - if __name__ == '__main__': with app.app_context(): db.create_all() diff --git a/src/models/transform.py b/src/models/transform.py index a260273..c4f4339 100644 --- a/src/models/transform.py +++ b/src/models/transform.py @@ -8,6 +8,15 @@ class TransformTranspose(db.Model): score_id = db.Column(db.Integer, db.ForeignKey('scores.id'), nullable=False) pdf_path = db.Column(db.String(512), nullable=False) created_at = db.Column(db.DateTime, default=datetime.utcnow) + +class TransformLyrics(db.Model): + __tablename__ = 'transform_lyrics' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + score_id = db.Column(db.Integer, db.ForeignKey('scores.id'), nullable=False) + lyrics_text = db.Column(db.Text, nullable=False) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + class TransformMelody(db.Model): diff --git a/src/routes/transform.py b/src/routes/transform.py index 01ddcdf..de4bba2 100644 --- a/src/routes/transform.py +++ b/src/routes/transform.py @@ -140,6 +140,62 @@ def transform_transpose_route(score_id): 'message': 'Transpose completed successfully' }), 201 +@transform_bp.route('/score//lyrics', methods=['POST']) +def lyrics_extract_route(score_id): + """ + 가사 추출 API + --- + tags: + - transform + summary: 업로드된 악보에서 가사를 추출하여 텍스트 파일로 저장하고 결과 ID를 반환합니다 + parameters: + - in: path + name: score_id + required: true + schema: + type: integer + description: 가사를 추출할 대상 악보의 ID + responses: + 200: + description: 가사 추출 완료 + schema: + type: object + properties: + result_id: + type: integer + example: 301 + text_path: + type: string + example: "convert_result/301.txt" + message: + type: string + example: "Lyrics extracted successfully" + 404: + description: 악보 ID를 찾을 수 없음 + schema: + type: object + properties: + error: + type: string + example: "Score not found" + """ + score = Score.query.get(score_id) + if not score: + return jsonify({'error': 'Score not found'}), 404 + + from src.services.transform_service import extract_lyrics + result_id = extract_lyrics(score) + + result = Result.query.get(result_id) + text_path = result.text_path if result else f"convert_result/{result_id}.txt" + + return jsonify({ + 'result_id': result_id, + 'text_path': text_path, + 'message': 'Lyrics extracted successfully' + }), 200 + + @transform_bp.route('/score//melody', methods=['POST']) def melody_extract_route(score_id): diff --git a/src/services/transform_service.py b/src/services/transform_service.py index dd4f886..d201d7f 100644 --- a/src/services/transform_service.py +++ b/src/services/transform_service.py @@ -4,7 +4,7 @@ import subprocess import cv2 -from music21 import midi, stream +from music21 import midi, stream, note from src.models.db import db from src.models.score import Score from src.models.result import Result # ✅ 통합된 Result 모델 @@ -20,6 +20,7 @@ TIMIDITY_CMD = "timidity" MSCORE_CMD = os.path.join("squashfs-root", "mscore4portable") + def perform_transpose(score: Score, shift: int) -> int: """ 키 변경을 수행하고 결과 PDF를 생성해 Result 테이블에 저장 @@ -53,6 +54,7 @@ def perform_transpose(score: Score, shift: int) -> int: return result.id + def extract_melody(score: Score, start_measure: int, end_measure: int) -> int: """ 악보에서 특정 마디 범위의 멜로디를 추출하여 MP3 파일로 저장 후 Result 테이블에 저장 @@ -95,3 +97,45 @@ def extract_melody(score: Score, start_measure: int, end_measure: int) -> int: db.session.commit() return result.id + + +def extract_lyrics(score: Score) -> int: + """ + 악보에서 가사를 추출하여 텍스트로 저장하고 Result 테이블에 저장 + """ + image_path = os.path.join('uploaded_scores', score.original_filename) + img = cv2.imread(image_path, cv2.IMREAD_COLOR) + if img is None: + raise RuntimeError("이미지를 불러올 수 없습니다") + + img_list = [img] + score_obj = MakeScore.make_score(img_list) + + # 가사 추출 + lyrics = [] + for el in score_obj.recurse(): + if isinstance(el, note.Note) and el.lyric: + lyrics.append(el.lyric.strip()) + + lyrics_text = "\n".join(filter(None, lyrics)).strip() + if not lyrics_text: + raise ValueError("추출된 가사가 없습니다") + + result_id = str(uuid.uuid4()) + convert_dir = 'convert_result' + os.makedirs(convert_dir, exist_ok=True) + + text_path = os.path.join(convert_dir, f"{result_id}.txt") + with open(text_path, 'w', encoding='utf-8') as f: + f.write(lyrics_text) + + result = Result( + score_id=score.id, + type='lyrics', + text_path=text_path, # 다운로드용 + text_content=lyrics_text # ✅ API 조회용 + ) + db.session.add(result) + db.session.commit() + + return result.id