diff --git a/ML/model/weights.pt b/ML/model/weights.pt
index 4dcd733..f911979 100644
Binary files a/ML/model/weights.pt and b/ML/model/weights.pt differ
diff --git a/ML/src/makexml/MakeScore.py b/ML/src/makexml/MakeScore.py
index 681e584..37f382b 100644
--- a/ML/src/makexml/MakeScore.py
+++ b/ML/src/makexml/MakeScore.py
@@ -1,6 +1,5 @@
from fractions import Fraction
-from music21 import chord, stream, note, meter, key, clef, metadata
-from music21 import interval
+from music21 import chord, stream, note, meter, key, clef, metadata, interval, bar
from .ScoreInfo import ScoreInfo
from .ScoreIterator import ScoreIterator
from .MeasureIterator import MeasureIterator
@@ -8,13 +7,16 @@
from .StafflineUtils import StafflineUtils
from .IntervalPreset import IntervalPreset
from .MakeTestData import MakeTestData
-from ML.src.FilePath import BASE_DIR
+from .TextProcesser import TextProcesser
+from ..FilePath import BASE_DIR
import random
import string
+from PIL import Image
import cv2
from ultralytics import YOLO
import pandas as pd
import os
+import numpy as np
class MakeScore:
# 학습된 모델 위치와 그걸 기반으로 한 모델 객체
@@ -51,10 +53,73 @@ def convert_origin_to_png(origin):
png_list = []
return png_list
"""
+
+ #추가한 함수
+ #staff_line이 겹쳐 탐지된 경우, y좌표 비슷한 줄끼리 병합하여
+ #하나의 줄로 만든다. x1=0, x2=image_width로 강제 확장
+ @staticmethod
+ def merge_staff_lines(df: pd.DataFrame, image_width: int, y_threshold: int = 10) -> pd.DataFrame:
+ staff_lines = df[df["class_name"] == "staff_line"].copy().reset_index(drop=True)
+ others = df[df["class_name"] != "staff_line"].copy()
+
+ merged = []
+ used = [False] * len(staff_lines)
+
+ for i in range(len(staff_lines)):
+ if used[i]:
+ continue
+
+ y1_i = staff_lines.loc[i, "y1"]
+ y2_i = staff_lines.loc[i, "y2"]
+ conf_i = staff_lines.loc[i, "confidence"]
+ class_id_i = staff_lines.loc[i, "class_id"]
+
+ group = [staff_lines.loc[i]]
+ used[i] = True
+
+ for j in range(i + 1, len(staff_lines)):
+ if used[j]:
+ continue
+ y1_j = staff_lines.loc[j, "y1"]
+ y2_j = staff_lines.loc[j, "y2"]
+
+ if abs(y1_i - y1_j) < y_threshold and abs(y2_i - y2_j) < y_threshold:
+ group.append(staff_lines.loc[j])
+ used[j] = True
+
+ y1_avg = float(np.mean([g["y1"] for g in group]))
+ y2_avg = float(np.mean([g["y2"] for g in group]))
+ x1 = 0
+ x2 = image_width
+ conf = max([g["confidence"] for g in group])
+
+ merged.append({
+ "class_id": class_id_i,
+ "class_name": "staff_line",
+ "confidence": conf,
+ "x1": x1,
+ "y1": y1_avg,
+ "x2": x2,
+ "y2": y2_avg,
+ "x_center": (x1 + x2) / 2,
+ "y_center": (y1_avg + y2_avg) / 2,
+ "width": x2 - x1,
+ "height": y2_avg - y1_avg
+ })
+
+ df_merged = pd.DataFrame(merged)
+ result_df = pd.concat([others, df_merged], ignore_index=True)
+ result_df = result_df.sort_values(by=["class_name", "x_center", "y_center"]).reset_index(drop=True)
+ return result_df
+
+
# 모델의 예측 결과를 pandas dataframe으로 변환시켜주는 함수
def convert_result_to_df(result):
rows = []
- boxes = result.boxes
+ boxes = result.boxes
+
+ image_width = result.orig_img.shape[1]
+
for i in range(len(boxes)):
class_id = int(boxes.cls[i])
class_name = result.names[class_id]
@@ -75,14 +140,15 @@ def convert_result_to_df(result):
rows.append(row)
df = pd.DataFrame(rows)
-
+ # 중복되는 staff_line 제거
+ df = MakeScore.merge_staff_lines(df, image_width=image_width)
# 클래스 이름으로 정렬하고 좌표기준으로 정렬
df_sorted = df.sort_values(by=["class_name", "x_center", "y_center"])
return df_sorted
- # 이미지 리스트를 모델을 통해 탑지하는 함수
+ # 이미지 리스트를 모델을 통해 탐지하는 함수
@staticmethod
def detect_object(img_list):
detection_results = []
@@ -165,13 +231,35 @@ def convert_df_to_score(object_dfs, vis_list):
# 저장된 dataframe에서 보표에 대한 정보만 들고옴
staff_df = object_df[object_df["class_name"] == "staff_line"].copy()
staff_df = staff_df.sort_values(by="y1").reset_index(drop=True)
+
+ # 해당 페이지의 탐지결과에서 가사 영역만 가진 dataframe과 코드 영역만 가진 dataframe
+ lyrics_df = object_df[object_df["class_name"] == "lyrics"].copy()
+ harmony_df = object_df[object_df["class_name"] == "harmony"].copy()
+
# 들고온 보표의 개수만큼 반복문
for staff_index in range(len(staff_df)):
row = staff_df.iloc[staff_index]
sx1, sy1, sx2, sy2 = int(row["x1"]), int(row["y1"]), int(row["x2"]), int(row["y2"])
+ # 해당 보표의 가사만 골라내기
+ if staff_index < len(staff_df) - 1: # 마지막 보표가 아닌 경우
+ next_row = staff_df.iloc[staff_index+1]
+ cur_lyrics_df = lyrics_df[
+ (lyrics_df["y_center"] > row["y2"]) &
+ (lyrics_df["y_center"] < next_row["y1"])
+ ].copy()
+ else: # 마지막 보표인 경우
+ cur_lyrics_df = lyrics_df[
+ (lyrics_df["y_center"] > row["y2"])
+ ].copy()
+
+ # 박스쳐진 staff_line에 선이 5개가 안들어가있는 경우가 있어서 y좌표에 약간의 padding을 적용
+ y_padding = int(row["height"] * 0.05)
+ y1_pad = max(0, sy1 - y_padding)
+ y2_pad = min(vis.shape[0], sy2 + y_padding)
+
# 이미지에서 잘라냄
- staff_crop = vis[sy1:sy2, sx1:sx2]
+ staff_crop = vis[y1_pad:y2_pad, sx1:sx2]
# OpenCV로 5줄 찾음
staff_lines = StafflineUtils.extract_5lines(staff_crop)
@@ -208,6 +296,7 @@ def convert_df_to_score(object_dfs, vis_list):
m.append(clef.TrebleClef())
else:
m.append(clef.BassClef())
+
elif "keysig" in cls: # 조표
keysig = cls.split("_")[1]
print("keysig_index: ", cls)
@@ -220,6 +309,9 @@ def convert_df_to_score(object_dfs, vis_list):
measiter.cur_keysig = keysig_index
measiter.interval_list = IntervalPreset.get_interval_list(measiter.cur_clef, measiter.cur_keysig)
print(measiter.interval_list)
+ for el in m.getElementsByClass(key.KeySignature):
+ m.remove(el)
+ m.insert(0, key.KeySignature(keysig_index))
m.append(key.KeySignature(keysig_index))
elif "timesig" in cls: # 박자표
@@ -234,18 +326,20 @@ def convert_df_to_score(object_dfs, vis_list):
r = note.Rest()
r.duration.quarterLength = MakeScore.REST_DURATION_MAP[cls]
m.append(r)
- print(cls)
+ #print(cls)
elif cls in MakeScore.NOTE_DURATION_MAP: # 음표
duration = MakeScore.NOTE_DURATION_MAP[cls]
+ c = chord.Chord()
# 점 음표 확인
note_box = (row["x1"], row["y1"], row["x2"], row["y2"])
- print(note_box)
+ #print(note_box)
if Pitch.is_dotted_note(note_box, cur_staff_df):
duration *= 1.5
print("dot",cls)
else:
print(cls)
+
# pitch 계산
head_df = Pitch.find_note_head(cur_staff_note_head, row["x1"], pitch_y_top, row["x2"], pitch_y_bottom)
print("음표탐지시도 완료")
@@ -257,27 +351,61 @@ def convert_df_to_score(object_dfs, vis_list):
n = Pitch.find_pitch_from_y(cur_staff_df, head, staff_lines_global, measiter)
if isinstance(n, note.Note):
pitches.append(n)
- if pitches:
- #midi_list = [note_obj.pitch.midi for note_obj in pitches]
-
- c = chord.Chord([n.pitch for n in pitches])
+ if pitches:
+ c.pitches = [n.pitch for n in pitches]
c.duration.quarterLength = duration
for i, note_obj in enumerate(pitches):
if hasattr(note_obj, "accidental") and note_obj.accidental is not None and note_obj.accidental.displayStatus:
c.notes[i].accidental = note_obj.accidental
c.notes[i].accidental.displayStatus = True
- #c = chord.Chord(pitches) # pitch 리스트로 코드 생성
- #c.duration.quarterLength = duration # 미리 계산한 duration 할당
+ # 가사 확인
+ lyrics_list = TextProcesser.find_text_list(cur_lyrics_df, row["x1"], row["x2"])
+
+ lyrics_data = []
+ for _, lyric in lyrics_list.iterrows():
+ x1, y1, x2, y2 = int(lyric["x1"]), int(lyric["y1"]), int(lyric["x2"]), int(lyric["y2"])
+ pad_x = lyric["width"] * 0
+ pad_y = lyric["height"] * 0
+ y_max, x_max = vis.shape[:2]
+ crop_x1 = max(x1-pad_x,0)
+ crop_x2 = min(x2+pad_x,x_max)
+ crop_y1 = max(y1-pad_y,0)
+ crop_y2 = min(y2+pad_y,y_max)
+ lyrics_crop = vis[int(crop_y1):int(crop_y2), int(crop_x1):int(crop_x2)]
+ crop_pil = Image.fromarray(cv2.cvtColor(lyrics_crop, cv2.COLOR_BGR2RGB))
+ # OCR 수행
+ text = TextProcesser.detect_text(crop_pil)
+ # 결과 저장
+ lyrics_data.append(text)
+ for i, lyric in enumerate(lyrics_data):
+ print(f"탐지된 가사: {lyric}")
+ lyric_obj = note.Lyric()
+ lyric_obj.text = lyric
+ lyric_obj.number = i + 1
+ #c.notes[0].lyrics.append(lyric_obj)
+ c.addLyric(lyric)
m.append(c)
print(c)
+ elif cls in ["measure", "measure_double", "measure_final"]:
+ if cls == "measure_double":
+ m.rightBarline = bar.Barline("light-light")
+ elif cls == "measure_final":
+ m.rightBarline = bar.Barline("light-heavy")
+
+ part.append(m)
+ measurenum += 1
+ m = stream.Measure(number=measurenum)
+ measiter.interval_list = IntervalPreset.get_interval_list(measiter.cur_clef, measiter.cur_keysig)
+
+ """
elif cls in ["measure", "double_measure"]:
part.append(m)
measurenum += 1
m = stream.Measure(number=measurenum)
measiter.interval_list = IntervalPreset.get_interval_list(measiter.cur_clef, measiter.cur_keysig)
-
+ """
part.append(m)
@@ -289,20 +417,30 @@ def convert_df_to_score(object_dfs, vis_list):
# 키를 변환하는 함수
# Score 객체와 변환할 값을 정수로 받아서 키를 변환
- # 현재는 -2, -1, 1, 2 만 받음
+ # 범위는 -7 ~ +7까지지
@staticmethod
- def change_key(score, diff): # -2 -1 1 1 3 4
- if diff > 2 or diff < -2:
+ def change_key(score, diff):
+ if diff > 7 or diff < -7:
return score
if diff == 0:
return score
else:
change = {
+ -7: "-P5",
+ -6: "-D5",
+ -5: "-P4",
+ -4: "-M3",
+ -3: "-m3",
-2: "-M2",
-1: "-m2",
1: "m2",
- 2: "M2"
+ 2: "M2",
+ 3: "m3",
+ 4: "M3",
+ 5: "P4",
+ 6: "D5",
+ 7: "P5"
}
interval_str = change[diff]
intv = interval.Interval(interval_str)
diff --git a/ML/src/makexml/Pitch.py b/ML/src/makexml/Pitch.py
index 24e21ab..5235d8f 100644
--- a/ML/src/makexml/Pitch.py
+++ b/ML/src/makexml/Pitch.py
@@ -39,67 +39,66 @@ def find_pitch_from_y(staff_df, head, staff_lines, measiter, margin_ratio=0.35):
# 기준 생성
margin = gap * margin_ratio
positions = [
- (18, L1-5*gap+margin, L1-5*gap-margin),
- (17, L1-4*gap+margin, L1-4*gap-margin),
- (16, L1-3*gap+margin, L1-3*gap-margin),
- (15, L1-2*gap+margin, L1-2*gap-margin),
- (14, L1-1*gap+margin, L1-1*gap-margin),
- (13, L1+margin, L1-margin),
- (12, L1+gap+margin, L1+gap-margin),
- (11, L2+margin, L2-margin),
- (10, L2+gap+margin, L2+gap-margin),
- (9, L3+margin, L3-margin),
- (8, L3+gap+margin, L3+gap-margin),
- (7, L4+margin, L4-margin),
- (6, L4+gap+margin, L4+gap-margin),
- (5, L5+margin, L5-margin),
- (4, L5+1*gap+margin, L5+1*gap-margin),
- (3, L5+2*gap+margin, L5+2*gap-margin),
- (2, L5+3*gap+margin, L5+3*gap-margin),
- (1, L5+4*gap+margin, L5+4*gap-margin),
- (0, L5+5*gap+margin, L5+5*gap-margin),
+ (18, L1-5*gap),
+ (17, L1-4*gap),
+ (16, L1-3*gap),
+ (15, L1-2*gap),
+ (14, L1-1*gap),
+ (13, L1),
+ (12, L1+gap),
+ (11, L2),
+ (10, L2+gap),
+ (9, L3),
+ (8, L3+gap),
+ (7, L4),
+ (6, L4+gap),
+ (5, L5),
+ (4, L5+1*gap),
+ (3, L5+2*gap),
+ (2, L5+3*gap),
+ (1, L5+4*gap),
+ (0, L5+5*gap),
]
+ # 가장 가까운 위치 계산
+ pitch_centers = [(pitch, position) for pitch, position in positions]
+ pitch_idx, closest_y = min(pitch_centers, key=lambda p: abs(p[1] - y_center))
+
+ n = note.Note()
+
+ # 임시표 처리
accidental_df = staff_df[staff_df["class_name"].isin(ACCIDENTAL_CLASSES.keys())].copy()
- for pitch, low, high in positions:
- #print(low, y_center, high)
- if low > y_center > high:
- n = note.Note()
-
- # 임시표 처리
- for _, acc in accidental_df.iterrows():
- ax_target = acc["x2"] - acc["width"] * 0.2
- ay_center = acc["y_center"]
-
- # y조건: 임시표가 음표 머리 영역 y 안에 있어야 함
- if not (y1 <= ay_center <= y2):
- continue
-
- # x조건: 임시표의 x1, x2,의 5/4 지점이 음표 머리 영역 안에 있어야됨.
- head_width = x2 - x1
- threshold_x = x1 - 0.2 * head_width
- if x1 <= ax_target <= x2:
- # pitch 보정
- adjust = ACCIDENTAL_CLASSES[acc["class_name"]]
- if adjust == 1:
- interval_list[pitch] += 1
- n.pitch.midi = interval_list[pitch]
- n.accidental = pitch.Accidental('sharp')
- elif adjust == -1:
- interval_list[pitch] -= 1
- n.pitch.midi = interval_list[pitch]
- n.accidental = pitch.Accidental('flat')
- else:
- temp_interval = IntervalPreset.get_interval_list(measiter.cur_clef, 0)
- interval_list[pitch] = temp_interval[pitch]
- n.pitch.midi = interval_list[pitch]
- n.accidental = pitch.Accidental('natural')
- return n
- n.pitch.midi = interval_list[pitch]
- n.accidental = None
+ for _, acc in accidental_df.iterrows():
+ ax_target = acc["x2"] - acc["width"] * 0.2
+ ay_center = acc["y_center"]
+
+ # y조건: 임시표가 음표 머리 영역 y 안에 있어야 함
+ if not (y1 <= ay_center <= y2):
+ continue
+
+ # x조건: 임시표가 음표 머리 옆에 있을 경우
+ head_width = x2 - x1
+ if x1 <= ax_target <= x2:
+ adjust = ACCIDENTAL_CLASSES[acc["class_name"]]
+ if adjust == 1:
+ interval_list[pitch_idx] += 1
+ n.pitch.midi = interval_list[pitch_idx]
+ n.accidental = note.Accidental('sharp')
+ elif adjust == -1:
+ interval_list[pitch_idx] -= 1
+ n.pitch.midi = interval_list[pitch_idx]
+ n.accidental = note.Accidental('flat')
+ else:
+ temp_interval = IntervalPreset.get_interval_list(measiter.cur_clef, 0)
+ interval_list[pitch_idx] = temp_interval[pitch_idx]
+ n.pitch.midi = interval_list[pitch_idx]
+ n.accidental = note.Accidental('natural')
return n
- return None # 범위 밖이면 None
-
+ n.pitch.midi = interval_list[pitch_idx]
+ n.accidental = None
+ return n
+
+
# 음표 영역 안에 dot_note_head의 중심좌표가 있는지 확인하는 함수
@staticmethod
def is_dotted_note(note_box, staff_df):
@@ -127,7 +126,7 @@ def find_note_head(head_fd, x1, y1, x2, y2):
(head_fd["x_center"] >= x1) & (head_fd["x_center"] <= x2) &
(head_fd["y_center"] >= y1) & (head_fd["y_center"] <= y2)
].copy()
- print(hits)
+ #print(hits)
if hits.empty:
return pd.DataFrame(columns=head_fd.columns)
x_base = hits["x_center"].min()
diff --git a/ML/src/makexml/TextProcesser.py b/ML/src/makexml/TextProcesser.py
new file mode 100644
index 0000000..68f93ce
--- /dev/null
+++ b/ML/src/makexml/TextProcesser.py
@@ -0,0 +1,70 @@
+import easyocr
+import json
+import numpy as np
+from music21 import converter, note
+from collections import defaultdict
+from PIL import Image
+
+class TextProcesser:
+ # easyOCR reader 객체
+ reader = easyocr.Reader(['ko','en'], gpu=False)
+ # 특정 보표의 텍스트(가사&코드)만 담긴 DataFrame과 특정 음표의 x좌표들을 받으면 해당 음표에 해당하는 가사를 추출
+
+ # 특정 음표에 해당하는 가사들을 추출
+ @staticmethod
+ def find_text_list(text_df, x1, x2):
+ # 전달된 가사 데이터프레임에서 x 범위 내에 있는 것만 추출
+ text_list = text_df[
+ (text_df["x_center"] >= x1 ) &
+ (text_df["x_center"] < x2)
+ ].copy()
+
+ # 추출 후 y좌표순으로 정렬
+ text_list = text_list.sort_values(by="y_center").reset_index(drop=True)
+ return text_list
+
+ # 전달받은 이미지의 텍스트를 추출
+ @staticmethod
+ def detect_text(img):
+ if not isinstance(img, np.ndarray):
+ img = np.array(img)
+
+ results = TextProcesser.reader.readtext(img, detail=0)
+
+ if results :
+ return "".join(results).strip()
+ return ""
+
+ # 전달받은 musicxml에서 가사를 추출하여 json으로 변환하기
+ # 현재는 mxl을 score로 변환하고 추출하는 방식인데 추후 파라미터를 score로 받을 수도 있음
+ @staticmethod
+ def get_lyrics_json_from_mxl(mxl_path):
+ score = converter.parse(mxl_path)
+ notes = list(score.recurse().notes)
+
+ # 절 별로 가사 수집
+ lyrics_by_verse = defaultdict(list)
+
+ # 절 번호 저장용
+ all_verse_numbers = set()
+ # 먼저 모든 절 번호 수집
+ for n in notes:
+ for lyric in n.lyrics:
+ number = int(lyric.number) if lyric.number else 1
+ all_verse_numbers.add(number)
+
+ max_verse = max(all_verse_numbers) if all_verse_numbers else 1
+ verse_list = list(range(1, max_verse + 1))
+
+ # 음표 순서대로 각 절의 가사 채우기
+ for n in notes:
+ # 현재 음표에서 실제 있는 절들만 수집
+ verse_to_text = {int(lyric.number) if lyric.number else 1: lyric.text.strip() for lyric in n.lyrics}
+
+ for verse in verse_list:
+ lyrics_by_verse[verse].append(verse_to_text.get(verse, "")) # 없는 절은 빈칸
+
+ #print(lyrics_by_verse)
+ return dict(lyrics_by_verse)
+
+
diff --git a/convert_result/b247644d.pdf b/convert_result/b247644d.pdf
new file mode 100644
index 0000000..916ce43
Binary files /dev/null and b/convert_result/b247644d.pdf differ
diff --git a/convert_result/b247644d.xml b/convert_result/b247644d.xml
new file mode 100644
index 0000000..7d67957
--- /dev/null
+++ b/convert_result/b247644d.xml
@@ -0,0 +1,728 @@
+
+
+
+
+ b247644d
+
+ b247644d
+
+ Music21
+
+ 2025-05-19
+ music21 v.9.5.0
+
+
+
+
+ 7
+ 40
+
+
+
+
+
+
+
+
+
+
+
+
+ 10080
+
+ 4
+
+
+
+ G
+ 2
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 곰
+
+
+
+
+ E
+ 4
+
+ 5040
+ eighth
+ up
+ begin
+
+ single
+ 세
+
+
+
+
+ E
+ 4
+
+ 5040
+ eighth
+ up
+ end
+
+
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 리
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 가
+
+
+
+
+
+
+
+ G
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 한
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+ single
+ 집
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+ single
+ 에
+
+
+
+
+ G
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 잎
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+ single
+ 아
+
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+ single
+ 빠
+
+
+
+
+ G
+ 1
+ 4
+
+ 5040
+ eighth
+
+ single
+ 곰
+
+
+
+
+ 5040
+ eighth
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+ single
+ 엄
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+
+
+
+
+
+
+ G
+ 1
+ 4
+
+ 5040
+ eighth
+
+ single
+ 곰
+
+
+
+
+ 5040
+ eighth
+
+
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 애
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 기
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 곰
+
+
+
+
+ 10080
+ quarter
+
+
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 아
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 빠
+
+
+
+
+ G
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 곰
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 은
+
+
+
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 등
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 등
+
+
+
+
+ B
+ 4
+
+ 20160
+ half
+
+ single
+ 해
+
+
+
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 엄
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+
+
+
+
+
+ G
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 곰
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 은
+
+
+
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 날
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 씨
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 해
+
+
+
+
+ 10080
+ quarter
+
+
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 애
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 기
+
+
+
+
+ G
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 곰
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+ single
+ 은
+
+
+
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+ down
+ begin
+
+ single
+ 너
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+ down
+ end
+
+ single
+ 무
+
+
+
+
+ B
+ 4
+
+ 5040
+ eighth
+ down
+ begin
+
+ single
+ 귀
+
+
+
+
+ C
+ 1
+ 5
+
+ 5040
+ eighth
+ down
+ end
+
+ single
+ 여
+
+
+
+
+
+ C
+ 1
+ 5
+
+ 5040
+ eighth
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+ single
+ 위
+
+
+
+
+ 10080
+ quarter
+
+
+
+
+
+
+ E
+ 5
+
+ 10080
+ quarter
+
+ single
+ 으
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+
+
+
+
+
+ E
+ 5
+
+ 10080
+ quarter
+
+ single
+ 9
+
+
+
+
+ B
+ 4
+
+ 10080
+ quarter
+
+
+
+
+
+
+
+
+
+ G
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 잘
+
+
+
+
+ F
+ 1
+ 4
+
+ 10080
+ quarter
+
+ single
+ 한
+
+
+
+
+ E
+ 4
+
+ 10080
+ quarter
+
+
+
+
+
+
+ 10080
+ quarter
+
+
+
+
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index c82833d..f95b202 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/run.py b/run.py
index 2f398dd..ea4a56b 100644
--- a/run.py
+++ b/run.py
@@ -4,7 +4,6 @@
app = create_app()
-
if __name__ == '__main__':
with app.app_context():
db.create_all()
diff --git a/src/models/transform.py b/src/models/transform.py
index a260273..c4f4339 100644
--- a/src/models/transform.py
+++ b/src/models/transform.py
@@ -8,6 +8,15 @@ class TransformTranspose(db.Model):
score_id = db.Column(db.Integer, db.ForeignKey('scores.id'), nullable=False)
pdf_path = db.Column(db.String(512), nullable=False)
created_at = db.Column(db.DateTime, default=datetime.utcnow)
+
+class TransformLyrics(db.Model):
+ __tablename__ = 'transform_lyrics'
+
+ id = db.Column(db.Integer, primary_key=True, autoincrement=True)
+ score_id = db.Column(db.Integer, db.ForeignKey('scores.id'), nullable=False)
+ lyrics_text = db.Column(db.Text, nullable=False)
+ created_at = db.Column(db.DateTime, default=datetime.utcnow)
+
class TransformMelody(db.Model):
diff --git a/src/routes/transform.py b/src/routes/transform.py
index 01ddcdf..de4bba2 100644
--- a/src/routes/transform.py
+++ b/src/routes/transform.py
@@ -140,6 +140,62 @@ def transform_transpose_route(score_id):
'message': 'Transpose completed successfully'
}), 201
+@transform_bp.route('/score//lyrics', methods=['POST'])
+def lyrics_extract_route(score_id):
+ """
+ 가사 추출 API
+ ---
+ tags:
+ - transform
+ summary: 업로드된 악보에서 가사를 추출하여 텍스트 파일로 저장하고 결과 ID를 반환합니다
+ parameters:
+ - in: path
+ name: score_id
+ required: true
+ schema:
+ type: integer
+ description: 가사를 추출할 대상 악보의 ID
+ responses:
+ 200:
+ description: 가사 추출 완료
+ schema:
+ type: object
+ properties:
+ result_id:
+ type: integer
+ example: 301
+ text_path:
+ type: string
+ example: "convert_result/301.txt"
+ message:
+ type: string
+ example: "Lyrics extracted successfully"
+ 404:
+ description: 악보 ID를 찾을 수 없음
+ schema:
+ type: object
+ properties:
+ error:
+ type: string
+ example: "Score not found"
+ """
+ score = Score.query.get(score_id)
+ if not score:
+ return jsonify({'error': 'Score not found'}), 404
+
+ from src.services.transform_service import extract_lyrics
+ result_id = extract_lyrics(score)
+
+ result = Result.query.get(result_id)
+ text_path = result.text_path if result else f"convert_result/{result_id}.txt"
+
+ return jsonify({
+ 'result_id': result_id,
+ 'text_path': text_path,
+ 'message': 'Lyrics extracted successfully'
+ }), 200
+
+
@transform_bp.route('/score//melody', methods=['POST'])
def melody_extract_route(score_id):
diff --git a/src/services/transform_service.py b/src/services/transform_service.py
index dd4f886..d201d7f 100644
--- a/src/services/transform_service.py
+++ b/src/services/transform_service.py
@@ -4,7 +4,7 @@
import subprocess
import cv2
-from music21 import midi, stream
+from music21 import midi, stream, note
from src.models.db import db
from src.models.score import Score
from src.models.result import Result # ✅ 통합된 Result 모델
@@ -20,6 +20,7 @@
TIMIDITY_CMD = "timidity"
MSCORE_CMD = os.path.join("squashfs-root", "mscore4portable")
+
def perform_transpose(score: Score, shift: int) -> int:
"""
키 변경을 수행하고 결과 PDF를 생성해 Result 테이블에 저장
@@ -53,6 +54,7 @@ def perform_transpose(score: Score, shift: int) -> int:
return result.id
+
def extract_melody(score: Score, start_measure: int, end_measure: int) -> int:
"""
악보에서 특정 마디 범위의 멜로디를 추출하여 MP3 파일로 저장 후 Result 테이블에 저장
@@ -95,3 +97,45 @@ def extract_melody(score: Score, start_measure: int, end_measure: int) -> int:
db.session.commit()
return result.id
+
+
+def extract_lyrics(score: Score) -> int:
+ """
+ 악보에서 가사를 추출하여 텍스트로 저장하고 Result 테이블에 저장
+ """
+ image_path = os.path.join('uploaded_scores', score.original_filename)
+ img = cv2.imread(image_path, cv2.IMREAD_COLOR)
+ if img is None:
+ raise RuntimeError("이미지를 불러올 수 없습니다")
+
+ img_list = [img]
+ score_obj = MakeScore.make_score(img_list)
+
+ # 가사 추출
+ lyrics = []
+ for el in score_obj.recurse():
+ if isinstance(el, note.Note) and el.lyric:
+ lyrics.append(el.lyric.strip())
+
+ lyrics_text = "\n".join(filter(None, lyrics)).strip()
+ if not lyrics_text:
+ raise ValueError("추출된 가사가 없습니다")
+
+ result_id = str(uuid.uuid4())
+ convert_dir = 'convert_result'
+ os.makedirs(convert_dir, exist_ok=True)
+
+ text_path = os.path.join(convert_dir, f"{result_id}.txt")
+ with open(text_path, 'w', encoding='utf-8') as f:
+ f.write(lyrics_text)
+
+ result = Result(
+ score_id=score.id,
+ type='lyrics',
+ text_path=text_path, # 다운로드용
+ text_content=lyrics_text # ✅ API 조회용
+ )
+ db.session.add(result)
+ db.session.commit()
+
+ return result.id