From d7057a3495686d983cd498a67c7272b71e1e80ad Mon Sep 17 00:00:00 2001 From: junhanjeong Date: Thu, 27 Nov 2025 20:42:08 +0900 Subject: [PATCH 1/4] =?UTF-8?q?debug:=20json=5Futils=EC=97=90=20debugging?= =?UTF-8?q?=EC=9A=A9=20=EC=BD=94=EB=93=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/json_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/json_utils.py b/src/json_utils.py index cbbab8d..84c0817 100644 --- a/src/json_utils.py +++ b/src/json_utils.py @@ -98,5 +98,8 @@ def extract_json_fragment(text: str) -> Any: last_error = err continue + print("CLEANED:", cleaned) + print("FRAGMENT:", fragment) + print("FRAGMENT repr:", repr(fragment)) print("Failed JSON text:", text) # 디버깅용 출력 raise ValueError(f"JSON 파싱에 실패했습니다: {last_error}") \ No newline at end of file From d8820315a249a26498eebba2517f891699473331 Mon Sep 17 00:00:00 2001 From: junhanjeong Date: Thu, 27 Nov 2025 20:58:30 +0900 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20openai=20response=20=EC=9D=B8?= =?UTF-8?q?=EB=8D=B1=EC=8A=A41=20->=20-1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm_client.py b/src/llm_client.py index 6bb7012..e3896c5 100644 --- a/src/llm_client.py +++ b/src/llm_client.py @@ -82,4 +82,4 @@ def _generate_response_bedrock_claude(self, system_instruction: str, message: st messages=[{"role": "user", "content": [{"text": message}]}] ) - return response['output']['message']['content'][0]['text'] if not selected_model.startswith("openai") else response['output']['message']['content'][1]['text'] \ No newline at end of file + return response['output']['message']['content'][0]['text'] if not selected_model.startswith("openai") else response['output']['message']['content'][-1]['text'] \ No newline at end of file From 45991f3b8d84b0c9be3331595b233c10b363945b Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham Date: Thu, 27 Nov 2025 22:00:22 +0900 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20url=20=EB=AC=B8=EC=9E=90=EC=97=B4=20?= =?UTF-8?q?=EB=8C=80=EC=8B=A0=20url=EC=97=90=20=EB=8C=80=ED=95=9C=20?= =?UTF-8?q?=ED=95=B4=EC=8B=9C=EA=B0=92=EC=9D=84=20=EC=82=AC=EC=9A=A9?= =?UTF-8?q?=ED=95=98=EB=8F=84=EB=A1=9D=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lambda_function.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lambda_function.py b/src/lambda_function.py index 42de60e..4071612 100644 --- a/src/lambda_function.py +++ b/src/lambda_function.py @@ -1,4 +1,5 @@ import json +import hashlib from trafilatura import extract import boto3 @@ -50,22 +51,23 @@ def lambda_handler(event, context): # url에서 쿼리 파라미터(?), 해시(#) 제거 url = url.split('?')[0].split('#')[0] + url_hashed = hashlib.sha256(url.encode('utf-8')).hexdigest() s3 = boto3.client("s3") bucket = 'inha-capstone-20-tos-content' dynamodb = boto3.resource('dynamodb') table = dynamodb.Table('inha-capstone-20-tos-analysis') - key = url + key = url_hashed # try: S3에서 해당 url을 key로 갖는 객체 가져오기 try: - response = s3.get_object(Bucket=bucket, Key=url) + response = s3.get_object(Bucket=bucket, Key=key) saved_tos_content = response['Body'].read().decode('utf-8') # 기존 tos_content와 비교 if saved_tos_content == tos_content: # 동일하면 DynamoDB에서 이전 분석 결과를 가져와 return - db_response = table.get_item(Key={'url': url}) + db_response = table.get_item(Key={'url_hashed': url_hashed}) if 'Item' in db_response: evaluation_result = db_response['Item'] print("캐시 존재, 이전 분석 결과 반환") @@ -140,7 +142,7 @@ def lambda_handler(event, context): # DynamoDB에 분석 결과 저장 table.put_item(Item={ - 'url': url, + 'url_hashed': url_hashed, 'overall_evaluation': evaluation_result.get("overall_evaluation"), 'evaluation_for_each_clause': evaluation_result.get("evaluation_for_each_clause") }) From f1d1b8ed0f518b954bbb84631c10d083219b0550 Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham Date: Thu, 27 Nov 2025 22:07:00 +0900 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20dynamodb=EC=9D=98=20schema=EC=97=90?= =?UTF-8?q?=20=EB=A7=9E=EB=8A=94=20key=20=EC=9D=B4=EB=A6=84=20=EC=82=AC?= =?UTF-8?q?=EC=9A=A9=20=EA=B8=89=ED=95=98=EA=B2=8C=20=EC=88=98=EC=A0=95?= =?UTF-8?q?=ED=95=98=EB=8A=90=EB=9D=BC=20=EC=98=A4=EB=A5=98=EA=B0=80=20?= =?UTF-8?q?=EC=9E=88=EC=97=88=EC=8A=B5=EB=8B=88=EB=8B=A4.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lambda_function.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lambda_function.py b/src/lambda_function.py index 4071612..ae3c482 100644 --- a/src/lambda_function.py +++ b/src/lambda_function.py @@ -67,7 +67,7 @@ def lambda_handler(event, context): # 기존 tos_content와 비교 if saved_tos_content == tos_content: # 동일하면 DynamoDB에서 이전 분석 결과를 가져와 return - db_response = table.get_item(Key={'url_hashed': url_hashed}) + db_response = table.get_item(Key={'url': key}) if 'Item' in db_response: evaluation_result = db_response['Item'] print("캐시 존재, 이전 분석 결과 반환") @@ -142,7 +142,7 @@ def lambda_handler(event, context): # DynamoDB에 분석 결과 저장 table.put_item(Item={ - 'url_hashed': url_hashed, + 'url': key, 'overall_evaluation': evaluation_result.get("overall_evaluation"), 'evaluation_for_each_clause': evaluation_result.get("evaluation_for_each_clause") })