diff --git a/apps/pre-processing-service/app/api/endpoints/product.py b/apps/pre-processing-service/app/api/endpoints/product.py
index 32a4dcbe..2812ef79 100644
--- a/apps/pre-processing-service/app/api/endpoints/product.py
+++ b/apps/pre-processing-service/app/api/endpoints/product.py
@@ -6,6 +6,7 @@
     CustomException,
 )
 from ...service.crawl_service import CrawlService
+from ...service.s3_upload_service import S3UploadService
 from ...service.search_service import SearchService
 from ...service.match_service import MatchService
 from ...service.similarity_service import SimilarityService
@@ -60,11 +61,11 @@ async def match(request: RequestSadaguMatch):
 )
 async def similarity(request: RequestSadaguSimilarity):
     """
-    매칭된 상품들 중 키워드와의 유사도를 계산하여 최적의 상품을 선택합니다.
+    매칭된 상품들 중 키워드와의 유사도를 계산하여 상위 10개 상품을 선택합니다.
     """
     try:
         similarity_service = SimilarityService()
-        response_data = similarity_service.select_product_by_similarity(request)
+        response_data = similarity_service.select_top_products_by_similarity(request)
 
         if not response_data:
             raise CustomException(
@@ -99,3 +100,24 @@ async def crawl(body: RequestSadaguCrawl):
         raise HTTPException(status_code=e.status_code, detail=e.detail)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/s3-upload", response_model=ResponseS3Upload, summary="S3 이미지 업로드")
+async def s3_upload(request: RequestS3Upload):
+    """
+    크롤링 완료 후 별도로 호출하여 이미지들을 S3 저장소에 업로드합니다.
+    """
+    try:
+        s3_upload_service = S3UploadService()
+        response_data = await s3_upload_service.upload_crawled_products_to_s3(request)
+
+        if not response_data:
+            raise CustomException(
+                500, "S3 이미지 업로드에 실패했습니다.", "S3_UPLOAD_FAILED"
+            )
+
+        return response_data
+    except InvalidItemDataException as e:
+        raise HTTPException(status_code=e.status_code, detail=e.detail)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/apps/pre-processing-service/app/model/schemas.py b/apps/pre-processing-service/app/model/schemas.py
index 36bef959..ebf19478 100644
--- a/apps/pre-processing-service/app/model/schemas.py
+++ b/apps/pre-processing-service/app/model/schemas.py
@@ -110,8 +110,10 @@ class SadaguSimilarityData(BaseModel):
     keyword: str = Field(
         ..., title="분석 키워드", description="유사도 분석에 사용된 키워드"
     )
-    selected_product: Optional[Dict] = Field(
-        None, title="선택된 상품", description="유사도 분석 결과 선택된 상품"
+    top_products: List[Dict] = Field(
+        default_factory=list,
+        title="선택된 상품들",
+        description="유사도 분석 결과 선택된 상위 상품 목록",
     )
     reason: Optional[str] = Field(
         None, title="선택 이유", description="상품 선택 근거 및 점수 정보"
@@ -129,16 +131,23 @@ class ResponseSadaguSimilarity(ResponseBase[SadaguSimilarityData]):
 
 
 class RequestSadaguCrawl(RequestBase):
-    product_url: HttpUrl = Field(
+    product_urls: List[HttpUrl] = Field(
         ..., title="상품 URL", description="크롤링할 상품 페이지의 URL"
     )
 
 
 # 응답 데이터 모델
 class SadaguCrawlData(BaseModel):
-    product_url: str = Field(..., title="상품 URL", description="크롤링된 상품 URL")
-    product_detail: Optional[Dict] = Field(
-        None, title="상품 상세정보", description="크롤링된 상품의 상세 정보"
+    crawled_products: List[Dict] = Field(
+        ...,
+        title="크롤링된 상품들",
+        description="크롤링된 상품들의 상세 정보 목록 (URL 포함)",
+    )
+    success_count: int = Field(
+        ..., title="성공 개수", description="성공적으로 크롤링된 상품 개수"
+    )
+    fail_count: int = Field(
+        ..., title="실패 개수", description="크롤링에 실패한 상품 개수"
     )
     crawled_at: Optional[str] = Field(
         None, title="크롤링 시간", description="크롤링 완료 시간"
@@ -152,6 +161,81 @@ class ResponseSadaguCrawl(ResponseBase[SadaguCrawlData]):
     pass
 
 
+# ============== S3 이미지 업로드 ==============
+
+
+class RequestS3Upload(RequestBase):
+    keyword: str = Field(
+        ..., title="검색 키워드", description="폴더명 생성용 키워드"
+    )  # 추가
+    crawled_products: List[Dict] = Field(
+        ...,
+        title="크롤링된 상품 데이터",
+        description="이전 단계에서 크롤링된 상품들의 데이터",
+    )
+    base_folder: Optional[str] = Field(
+        "product", title="기본 폴더", description="S3 내 기본 저장 폴더 경로"
+    )
+
+
+# S3 업로드된 이미지 정보
+class S3ImageInfo(BaseModel):
+    index: int = Field(..., title="이미지 순번", description="상품 내 이미지 순번")
+    original_url: str = Field(
+        ..., title="원본 URL", description="크롤링된 원본 이미지 URL"
+    )
+    s3_url: str = Field(..., title="S3 URL", description="S3에서 접근 가능한 URL")
+
+
+# 상품별 S3 업로드 결과
+class ProductS3UploadResult(BaseModel):
+    product_index: int = Field(..., title="상품 순번", description="크롤링 순번")
+    product_title: str = Field(..., title="상품 제목", description="상품명")
+    status: str = Field(..., title="업로드 상태", description="completed/skipped/error")
+    uploaded_images: List[S3ImageInfo] = Field(
+        default_factory=list, title="업로드 성공 이미지"
+    )
+    success_count: int = Field(
+        ..., title="성공 개수", description="업로드 성공한 이미지 수"
+    )
+    fail_count: int = Field(
+        ..., title="실패 개수", description="업로드 실패한 이미지 수"
+    )
+
+
+# S3 업로드 요약 정보
+class S3UploadSummary(BaseModel):
+    total_products: int = Field(
+        ..., title="총 상품 수", description="처리 대상 상품 총 개수"
+    )
+    total_success_images: int = Field(
+        ..., title="성공 이미지 수", description="업로드 성공한 이미지 총 개수"
+    )
+    total_fail_images: int = Field(
+        ..., title="실패 이미지 수", description="업로드 실패한 이미지 총 개수"
+    )
+
+
+# 응답 데이터 모델
+class S3UploadData(BaseModel):
+    upload_results: List[ProductS3UploadResult] = Field(
+        ..., title="업로드 결과", description="각 상품의 S3 업로드 결과"
+    )
+    summary: S3UploadSummary = Field(
+        ..., title="업로드 요약", description="전체 업로드 결과 요약"
+    )
+    uploaded_at: str = Field(
+        ..., title="업로드 완료 시간", description="S3 업로드 완료 시간"
+    )
+
+
+# 최종 응답 모델
+class ResponseS3Upload(ResponseBase[S3UploadData]):
+    """S3 이미지 업로드 API 응답"""
+
+    pass
+
+
 # ============== 블로그 콘텐츠 생성 ==============
 
 
diff --git a/apps/pre-processing-service/app/service/crawl_service.py b/apps/pre-processing-service/app/service/crawl_service.py
index af8f91bc..e8785f64 100644
--- a/apps/pre-processing-service/app/service/crawl_service.py
+++ b/apps/pre-processing-service/app/service/crawl_service.py
@@ -1,4 +1,5 @@
 import time
+import asyncio
 from app.service.crawlers.detail_crawler import DetailCrawler
 from app.errors.CustomException import InvalidItemDataException
 from app.model.schemas import RequestSadaguCrawl
@@ -12,45 +13,133 @@ def __init__(self):
 
     async def crawl_product_detail(self, request: RequestSadaguCrawl) -> dict:
         """
-        선택된 상품의 상세 정보를 크롤링하는 비즈니스 로직입니다. (5단계)
-        상품 URL을 입력받아 상세 정보를 크롤링하여 딕셔너리로 반환합니다.
+        선택된 상품들의 상세 정보를 크롤링하는 비즈니스 로직입니다. (5단계)
+        여러 상품 URL을 입력받아 순차적으로 상세 정보를 크롤링하여 딕셔너리로 반환합니다.
         """
-        crawler = DetailCrawler(use_selenium=True)
+        product_urls = [str(url) for url in request.product_urls]
+
+        logger.info(f"상품 상세 크롤링 서비스 시작: 총 {len(product_urls)}개 상품")
+
+        crawled_products = []
+        success_count = 0
+        fail_count = 0
 
         try:
-            logger.info(
-                f"상품 상세 크롤링 서비스 시작: product_url={request.product_url}"
+            # 각 상품을 순차적으로 크롤링 (안정성 확보)
+            for i, product_url in enumerate(product_urls, 1):
+                logger.info(f"상품 {i}/{len(product_urls)} 크롤링 시작: {product_url}")
+
+                crawler = DetailCrawler(use_selenium=True)
+
+                try:
+                    # 상세 정보 크롤링 실행
+                    product_detail = await crawler.crawl_detail(product_url)
+
+                    if product_detail:
+                        product_title = product_detail.get("title", "Unknown")[:50]
+                        logger.success(
+                            f"상품 {i} 크롤링 성공: title='{product_title}', price={product_detail.get('price', 0)}"
+                        )
+
+                        # 성공한 상품 추가
+                        crawled_products.append(
+                            {
+                                "index": i,
+                                "url": product_url,
+                                "product_detail": product_detail,
+                                "status": "success",
+                                "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+                            }
+                        )
+                        success_count += 1
+                    else:
+                        logger.error(f"상품 {i} 크롤링 실패: 상세 정보 없음")
+                        crawled_products.append(
+                            {
+                                "index": i,
+                                "url": product_url,
+                                "product_detail": None,
+                                "status": "failed",
+                                "error": "상세 정보 없음",
+                                "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+                            }
+                        )
+                        fail_count += 1
+
+                except Exception as e:
+                    logger.error(
+                        f"상품 {i} 크롤링 오류: url={product_url}, error='{e}'"
+                    )
+                    crawled_products.append(
+                        {
+                            "index": i,
+                            "url": product_url,
+                            "product_detail": None,
+                            "status": "failed",
+                            "error": str(e),
+                            "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+                        }
+                    )
+                    fail_count += 1
+
+                finally:
+                    # 각 크롤러 개별 정리
+                    await crawler.close()
+
+                # 상품간 간격 (서버 부하 방지)
+                if i < len(product_urls):
+                    await asyncio.sleep(1)
+
+            logger.success(
+                f"전체 크롤링 완료: 총 {len(product_urls)}개, 성공 {success_count}개, 실패 {fail_count}개"
             )
 
-            # 상세 정보 크롤링 실행
-            product_detail = await crawler.crawl_detail(
-                product_url=str(request.product_url)
+            # 응답 데이터 구성
+            data = {
+                "crawled_products": crawled_products,
+                "success_count": success_count,
+                "fail_count": fail_count,
+                "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+            }
+
+            logger.info(
+                f"상품 상세 크롤링 서비스 완료: success_rate={success_count}/{len(product_urls)}"
             )
+            return Response.ok(data)
+
+        except Exception as e:
+            logger.error(f"배치 크롤링 서비스 오류: error='{e}'")
+            raise InvalidItemDataException()
+
+    # 기존 단일 크롤링 메서드도 유지 (하위 호환성)
+    async def crawl_single_product_detail(self, product_url: str) -> dict:
+        """
+        단일 상품 크롤링 (하위 호환성용)
+        """
+        crawler = DetailCrawler(use_selenium=True)
+
+        try:
+            logger.info(f"단일 상품 크롤링 시작: {product_url}")
+
+            product_detail = await crawler.crawl_detail(product_url)
 
             if not product_detail:
-                logger.error(f"상품 상세 정보 크롤링 실패: url={request.product_url}")
+                logger.error(f"상품 상세 정보 크롤링 실패: url={product_url}")
                 raise InvalidItemDataException()
 
             product_title = product_detail.get("title", "Unknown")[:50]
-            logger.success(
-                f"크롤링 완료: title='{product_title}', price={product_detail.get('price', 0)}, options_count={len(product_detail.get('options', []))}"
-            )
+            logger.success(f"크롤링 완료: title='{product_title}'")
 
-            # 응답 데이터 구성
             data = {
-                "product_url": str(request.product_url),
+                "product_url": product_url,
                 "product_detail": product_detail,
                 "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"),
             }
 
-            logger.info(f"상품 상세 크롤링 서비스 완료: status=success")
             return Response.ok(data)
 
         except Exception as e:
-            logger.error(
-                f"크롤링 서비스 오류: product_url={request.product_url}, error='{e}'"
-            )
+            logger.error(f"단일 크롤링 오류: url={product_url}, error='{e}'")
             raise InvalidItemDataException()
         finally:
             await crawler.close()
-            logger.debug("크롤러 리소스 정리 완료")
diff --git a/apps/pre-processing-service/app/service/crawlers/search_crawler.py b/apps/pre-processing-service/app/service/crawlers/search_crawler.py
index a0d46e02..1bc36fc5 100644
--- a/apps/pre-processing-service/app/service/crawlers/search_crawler.py
+++ b/apps/pre-processing-service/app/service/crawlers/search_crawler.py
@@ -49,7 +49,7 @@ async def search_products_selenium(self, keyword: str) -> list[dict]:
             logger.info(
                 f"Selenium으로 발견한 상품 링크: {len(unique_products)}개 (중복 제거 전: {len(product_links)}개)"
             )
-            return unique_products[:20]
+            return unique_products[:40]
 
         except Exception as e:
             logger.error(f"Selenium 검색 오류: keyword='{keyword}', error='{e}'")
@@ -88,7 +88,7 @@ async def search_products_httpx(self, keyword: str) -> list[dict]:
                     product_links.append({"url": full_url, "title": title})
 
             logger.info(f"httpx로 발견한 상품 링크: {len(product_links)}개")
-            return product_links[:20]
+            return product_links[:40]
 
         except Exception as e:
             logger.error(f"httpx 검색 오류: keyword='{keyword}', error='{e}'")
diff --git a/apps/pre-processing-service/app/service/s3_upload_service.py b/apps/pre-processing-service/app/service/s3_upload_service.py
new file mode 100644
index 00000000..1c024a63
--- /dev/null
+++ b/apps/pre-processing-service/app/service/s3_upload_service.py
@@ -0,0 +1,125 @@
+import time
+import asyncio
+import aiohttp
+from typing import List, Dict
+from loguru import logger
+from app.errors.CustomException import InvalidItemDataException
+from app.model.schemas import RequestS3Upload
+from app.utils.s3_upload_util import S3UploadUtil
+from app.utils.response import Response
+
+
+class S3UploadService:
+    """6단계: 크롤링된 상품 이미지들과 데이터를 S3에 업로드하는 서비스"""
+
+    def __init__(self):
+        self.s3_util = S3UploadUtil()
+
+    async def upload_crawled_products_to_s3(self, request: RequestS3Upload) -> dict:
+        """
+        크롤링된 상품들의 이미지와 데이터를 S3에 업로드하는 비즈니스 로직 (6단계)
+        """
+        keyword = request.keyword  # 키워드 추가
+        crawled_products = request.crawled_products
+        base_folder = (
+            request.base_folder or "product"
+        )  # 🔸 기본값 변경: product-images → product
+
+        logger.info(
+            f"S3 업로드 서비스 시작: keyword='{keyword}', {len(crawled_products)}개 상품"
+        )
+
+        upload_results = []
+        total_success_images = 0
+        total_fail_images = 0
+
+        try:
+            # HTTP 세션을 사용한 이미지 다운로드
+            async with aiohttp.ClientSession() as session:
+
+                # 각 상품별로 순차 업로드
+                for product_info in crawled_products:
+                    product_index = product_info.get("index", 0)
+                    product_detail = product_info.get("product_detail")
+
+                    logger.info(
+                        f"상품 {product_index}/{len(crawled_products)} S3 업로드 시작"
+                    )
+
+                    # 크롤링 실패한 상품은 스킵
+                    if not product_detail or product_info.get("status") != "success":
+                        logger.warning(
+                            f"상품 {product_index}: 크롤링 실패로 인한 업로드 스킵"
+                        )
+                        upload_results.append(
+                            {
+                                "product_index": product_index,
+                                "product_title": "Unknown",
+                                "status": "skipped",
+                                "folder_s3_url": None,
+                                "uploaded_images": [],
+                                "success_count": 0,
+                                "fail_count": 0,
+                            }
+                        )
+                        continue
+
+                    try:
+                        # 상품 이미지 + 데이터 업로드 (키워드 전달 추가!)
+                        # 🔸 전체 크롤링 데이터를 전달 (product_detail이 아닌 product_info 전체)
+                        upload_result = await self.s3_util.upload_single_product_images(
+                            session,
+                            product_info,
+                            product_index,
+                            keyword,
+                            base_folder,  # product_detail → product_info
+                        )
+
+                        upload_results.append(upload_result)
+                        total_success_images += upload_result["success_count"]
+                        total_fail_images += upload_result["fail_count"]
+
+                        logger.success(
+                            f"상품 {product_index} S3 업로드 완료: 성공 {upload_result['success_count']}개, "
+                            f"실패 {upload_result['fail_count']}개"
+                        )
+
+                    except Exception as e:
+                        logger.error(f"상품 {product_index} S3 업로드 오류: {e}")
+                        upload_results.append(
+                            {
+                                "product_index": product_index,
+                                "product_title": product_detail.get("title", "Unknown"),
+                                "status": "error",
+                                "folder_s3_url": None,
+                                "uploaded_images": [],
+                                "success_count": 0,
+                                "fail_count": 0,
+                            }
+                        )
+
+                    # 상품간 간격 (서버 부하 방지)
+                    if product_index < len(crawled_products):
+                        await asyncio.sleep(1)
+
+            logger.success(
+                f"S3 업로드 서비스 완료: 총 성공 이미지 {total_success_images}개, 총 실패 이미지 {total_fail_images}개"
+            )
+
+            # 간소화된 응답 데이터 구성
+            data = {
+                "upload_results": upload_results,
+                "summary": {
+                    "total_products": len(crawled_products),
+                    "total_success_images": total_success_images,
+                    "total_fail_images": total_fail_images,
+                },
+                "uploaded_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+            }
+
+            message = f"S3 업로드 완료: {total_success_images}개 이미지 업로드 성공, 상품 데이터 JSON 파일 포함"
+            return Response.ok(data, message)
+
+        except Exception as e:
+            logger.error(f"S3 업로드 서비스 전체 오류: {e}")
+            raise InvalidItemDataException()
diff --git a/apps/pre-processing-service/app/service/search_service.py b/apps/pre-processing-service/app/service/search_service.py
index 171bd57f..070f6cc2 100644
--- a/apps/pre-processing-service/app/service/search_service.py
+++ b/apps/pre-processing-service/app/service/search_service.py
@@ -77,9 +77,9 @@ async def search_products(self, request: RequestSadaguSearch) -> dict:
                             logger.debug(f"상품 {i + 1}: 제목 추출 실패, 제외")
                             continue
 
-                    # 최대 20개까지만 처리
-                    if len(enriched_results) >= 20:
-                        logger.info("최대 20개 상품 수집 완료")
+                    # 최대 40개까지 처리
+                    if len(enriched_results) >= 40:
+                        logger.info("최대 40개 상품 수집 완료")
                         break
 
                 except Exception as e:
diff --git a/apps/pre-processing-service/app/service/similarity_service.py b/apps/pre-processing-service/app/service/similarity_service.py
index 516b0c63..cf943279 100644
--- a/apps/pre-processing-service/app/service/similarity_service.py
+++ b/apps/pre-processing-service/app/service/similarity_service.py
@@ -9,16 +9,19 @@ class SimilarityService:
     def __init__(self):
         pass
 
-    def select_product_by_similarity(self, request: RequestSadaguSimilarity) -> dict:
+    def select_top_products_by_similarity(
+        self, request: RequestSadaguSimilarity
+    ) -> dict:
         """
-        BERT 기반 유사도 분석 후 상품 선택 - 4단계
+        형태소 분석 후 Top 10 선택 (10개 이하면 유사도 분석 생략)
         """
         keyword = request.keyword
         candidates = request.matched_products
         fallback_products = request.search_results or []
+        top_count = 10  # Top 10 개수 설정
 
         logger.info(
-            f"유사도 분석 서비스 시작: keyword='{keyword}', matched_count={len(candidates) if candidates else 0}, fallback_count={len(fallback_products)}"
+            f"상품 선택 서비스 시작 (Top {top_count}): keyword='{keyword}', matched_count={len(candidates) if candidates else 0}, fallback_count={len(fallback_products)}"
         )
 
         # 매칭된 상품이 없으면 전체 검색 결과로 폴백
@@ -30,130 +33,151 @@ def select_product_by_similarity(self, request: RequestSadaguSimilarity) -> dict
 
                 data = {
                     "keyword": keyword,
-                    "selected_product": None,
+                    "top_products": [],
                     "reason": "매칭된 상품과 검색 결과가 모두 없음",
                 }
                 return Response.ok(data, "매칭된 상품과 검색 결과가 모두 없습니다.")
 
-            logger.info("매칭된 상품 없음 → 전체 검색 결과에서 유사도 분석")
+            logger.info("매칭된 상품 없음 → 전체 검색 결과에서 유사도 분석 진행")
             candidates = fallback_products
             analysis_mode = "fallback_similarity_only"
+            skip_similarity = False
         else:
             analysis_mode = "matched_products"
+            # 형태소 분석 결과가 10개 이하면 유사도 분석 생략
+            skip_similarity = len(candidates) <= top_count
 
         try:
-            analyzer = SimilarityAnalyzerONNX()
-
-            logger.info(
-                f"키워드 '{keyword}'와 {len(candidates)}개 상품의 유사도 분석 시작... (모드: {analysis_mode})"
-            )
-
-            # 한 개만 있으면 바로 선택
-            if len(candidates) == 1:
-                selected_product = candidates[0]
-
-                logger.info("단일 후보 상품 - 유사도 검증 진행")
-                # 유사도 계산
-                similarity = analyzer.calculate_similarity(
-                    keyword, selected_product["title"]
+            # 형태소 분석 결과가 10개 이하면 유사도 분석 생략하고 바로 반환
+            if skip_similarity and analysis_mode == "matched_products":
+                logger.info(
+                    f"형태소 분석 결과가 {len(candidates)}개로 {top_count}개 이하 - 유사도 분석 생략"
                 )
 
-                # 폴백 모드에서는 임계값 검증
-                if analysis_mode == "fallback_similarity_only":
-                    similarity_threshold = 0.3
-                    if similarity < similarity_threshold:
-                        logger.warning(
-                            f"단일 상품 유사도 미달: similarity={similarity:.4f} < threshold={similarity_threshold}"
-                        )
-                        data = {
-                            "keyword": keyword,
-                            "selected_product": None,
-                            "reason": f"단일 상품 유사도({similarity:.4f}) < 기준({similarity_threshold})",
-                        }
-                        return Response.ok(
-                            data, "단일 상품 유사도 미달 되어 상품이 존재하지않습니다."
-                        )
-
-                selected_product["similarity_info"] = {
-                    "similarity_score": float(similarity),
-                    "analysis_type": "single_candidate",
-                    "analysis_mode": analysis_mode,
-                }
+                # 매칭 스코어 기준으로 정렬된 상태 유지 (이미 match_service에서 정렬됨)
+                top_products = []
+                for i, product in enumerate(candidates):
+                    enhanced_product = product.copy()
+                    enhanced_product["rank"] = i + 1
+                    enhanced_product["selection_info"] = {
+                        "selection_type": "match_only",
+                        "match_score": product.get("match_info", {}).get(
+                            "match_score", 0.0
+                        ),
+                        "reason": "형태소 분석만으로 선택 (유사도 분석 생략)",
+                        "total_candidates": len(candidates),
+                    }
+                    top_products.append(enhanced_product)
 
                 logger.success(
-                    f"단일 상품 선택 완료: title='{selected_product['title'][:30]}', similarity={similarity:.4f}"
+                    f"형태소 분석만으로 상품 선택 완료: keyword='{keyword}', selected_count={len(top_products)}"
                 )
+
                 data = {
                     "keyword": keyword,
-                    "selected_product": selected_product,
-                    "reason": f"단일 상품 - 유사도: {similarity:.4f} ({analysis_mode})",
+                    "top_products": top_products,
+                    "reason": f"형태소 분석 결과 {len(candidates)}개 - 유사도 분석 생략",
                 }
                 return Response.ok(data)
 
-            # 여러 개가 있으면 유사도 비교
-            logger.info("여러 상품 중 최고 유사도로 선택...")
+            # 유사도 분석 필요한 경우 (매칭 결과가 10개 초과이거나 폴백 모드)
+            analyzer = SimilarityAnalyzerONNX()
+
+            logger.info(
+                f"키워드 '{keyword}'와 {len(candidates)}개 상품의 유사도 분석 시작... (모드: {analysis_mode})"
+            )
 
-            # 제목만 추출해서 배치 분석
+            # 모든 후보에 대해 유사도 계산
             titles = [product["title"] for product in candidates]
             similarity_results = analyzer.analyze_similarity_batch(keyword, titles)
 
-            # 결과 출력
-            logger.info("유사도 분석 결과:")
-            for i, result in enumerate(similarity_results[:5]):  # 상위 5개만 로그
-                logger.info(
-                    f"  {i+1}위: {result['title'][:40]} | 유사도: {result['similarity']:.4f}"
-                )
+            # 유사도 정보 추가 및 Top 10 선택
+            enhanced_products = []
+            similarity_threshold = (
+                0.3 if analysis_mode == "fallback_similarity_only" else 0.0
+            )
 
-            # 최고 유사도 선택
-            best_result = similarity_results[0]
-            selected_product = candidates[best_result["index"]].copy()
+            for i, result in enumerate(similarity_results):
+                product = candidates[result["index"]].copy()
 
-            # 폴백 모드에서는 임계값 검증
-            similarity_threshold = 0.3
-            if (
-                analysis_mode == "fallback_similarity_only"
-                and best_result["similarity"] < similarity_threshold
-            ):
-                logger.warning(
-                    f"최고 유사도 미달: similarity={best_result['similarity']:.4f} < threshold={similarity_threshold}"
-                )
-                data = {
-                    "keyword": keyword,
-                    "selected_product": None,
-                    "reason": f"최고 유사도({best_result['similarity']:.4f}) < 기준({similarity_threshold})",
+                # 폴백 모드에서는 임계값 검증
+                if (
+                    analysis_mode == "fallback_similarity_only"
+                    and result["similarity"] < similarity_threshold
+                ):
+                    logger.debug(
+                        f"상품 {i + 1} 유사도 미달로 제외: similarity={result['similarity']:.4f} < threshold={similarity_threshold}"
+                    )
+                    continue
+
+                product["similarity_info"] = {
+                    "similarity_score": result["similarity"],
+                    "analysis_type": "batch_similarity",
+                    "analysis_mode": analysis_mode,
                 }
-                return Response.ok(data, "최고 유사도가 기준보다 미달 되었습니다.")
-
-            # 유사도 정보 추가
-            selected_product["similarity_info"] = {
-                "similarity_score": best_result["similarity"],
-                "analysis_type": "multi_candidate_bert",
-                "analysis_mode": analysis_mode,
-                "rank": 1,
-                "total_candidates": len(candidates),
-            }
 
-            # 매칭 모드에서는 종합 점수도 계산
-            if analysis_mode == "matched_products" and "match_info" in selected_product:
-                match_score = selected_product["match_info"]["match_score"]
-                similarity_score = best_result["similarity"]
-                # 가중치: 매칭 40%, 유사도 60%
-                final_score = match_score * 0.4 + similarity_score * 0.6
-                selected_product["final_score"] = final_score
-                reason = f"종합점수({final_score:.4f}) = 매칭({match_score:.4f})*0.4 + 유사도({similarity_score:.4f})*0.6"
-                logger.info(
-                    f"종합 점수 계산: match_score={match_score:.4f}, similarity_score={similarity_score:.4f}, final_score={final_score:.4f}"
+                # 매칭 모드에서는 종합 점수 계산
+                if analysis_mode == "matched_products" and "match_info" in product:
+                    match_score = product["match_info"]["match_score"]
+                    similarity_score = result["similarity"]
+                    # 가중치: 매칭 40%, 유사도 60%
+                    final_score = match_score * 0.4 + similarity_score * 0.6
+                    product["final_score"] = final_score
+                    product["selection_info"] = {
+                        "selection_type": "match_and_similarity",
+                        "match_score": match_score,
+                        "similarity_score": similarity_score,
+                        "final_score": final_score,
+                        "reason": f"종합점수({final_score:.4f}) = 매칭({match_score:.4f})*0.4 + 유사도({similarity_score:.4f})*0.6",
+                    }
+                else:
+                    product["selection_info"] = {
+                        "selection_type": "similarity_only",
+                        "similarity_score": result["similarity"],
+                        "reason": f"유사도({result['similarity']:.4f}) 기준 선택 ({analysis_mode})",
+                    }
+
+                enhanced_products.append(product)
+
+            # 종합 점수 또는 유사도 기준으로 재정렬
+            if analysis_mode == "matched_products":
+                enhanced_products.sort(
+                    key=lambda x: x.get(
+                        "final_score", x["similarity_info"]["similarity_score"]
+                    ),
+                    reverse=True,
                 )
             else:
-                reason = f"유사도({best_result['similarity']:.4f}) 기준 선택 ({analysis_mode})"
+                enhanced_products.sort(
+                    key=lambda x: x["similarity_info"]["similarity_score"], reverse=True
+                )
+
+            # Top 10 선택
+            top_products = enhanced_products[:top_count]
+
+            # 순위 정보 추가
+            for i, product in enumerate(top_products):
+                product["rank"] = i + 1
 
             logger.success(
-                f"상품 선택 완료: title='{selected_product['title'][:30]}', {reason}"
+                f"유사도 분석 완료: keyword='{keyword}', total_analyzed={len(candidates)}, valid_results={len(enhanced_products)}, top_selected={len(top_products)}"
             )
+
+            if top_products:
+                best_product = top_products[0]
+                if "final_score" in best_product:
+                    logger.info(
+                        f"1위 상품: title='{best_product['title'][:30]}', final_score={best_product['final_score']:.4f}"
+                    )
+                else:
+                    logger.info(
+                        f"1위 상품: title='{best_product['title'][:30]}', similarity={best_product['similarity_info']['similarity_score']:.4f}"
+                    )
+
             data = {
                 "keyword": keyword,
-                "selected_product": selected_product,
-                "reason": reason,
+                "top_products": top_products,
+                "reason": f"유사도 분석 후 상위 {len(top_products)}개 선택 ({analysis_mode})",
             }
             return Response.ok(data)
 
diff --git a/apps/pre-processing-service/app/utils/s3_upload_util.py b/apps/pre-processing-service/app/utils/s3_upload_util.py
new file mode 100644
index 00000000..0aaa5ace
--- /dev/null
+++ b/apps/pre-processing-service/app/utils/s3_upload_util.py
@@ -0,0 +1,281 @@
+import os
+import json
+import boto3
+import aiohttp
+import asyncio
+from datetime import datetime
+from urllib.parse import urlparse
+from typing import Dict, Optional
+from loguru import logger
+
+
+class S3UploadUtil:
+    """S3 업로드 전용 유틸리티 클래스"""
+
+    def __init__(self):
+        # 환경변수에서 AWS 설정 읽기
+        self.aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
+        self.aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
+        self.bucket_name = os.getenv("S3_BUCKET_NAME", "icebang4-dev-bucket")
+        self.region = os.getenv("AWS_REGION", "ap-northeast-2")
+
+        if not self.aws_access_key or not self.aws_secret_key:
+            raise ValueError(
+                "AWS_ACCESS_KEY_ID와 AWS_SECRET_ACCESS_KEY 환경변수가 필요합니다"
+            )
+
+        self.base_url = f"https://{self.bucket_name}.s3.{self.region}.amazonaws.com"
+
+        # S3 클라이언트 초기화
+        self.s3_client = boto3.client(
+            "s3",
+            aws_access_key_id=self.aws_access_key,
+            aws_secret_access_key=self.aws_secret_key,
+            region_name=self.region,
+        )
+
+        logger.info(
+            f"S3 클라이언트 초기화 완료: bucket={self.bucket_name}, region={self.region}"
+        )
+
+    async def download_image(
+        self, session: aiohttp.ClientSession, image_url: str
+    ) -> Optional[bytes]:
+        """이미지 URL에서 이미지 데이터 다운로드"""
+        try:
+            logger.debug(f"이미지 다운로드 시작: {image_url}")
+
+            async with session.get(
+                image_url, timeout=aiohttp.ClientTimeout(total=30)
+            ) as response:
+                if response.status == 200:
+                    image_data = await response.read()
+                    logger.debug(f"이미지 다운로드 완료: {len(image_data)} bytes")
+                    return image_data
+                else:
+                    logger.warning(
+                        f"이미지 다운로드 실패: {image_url}, status={response.status}"
+                    )
+                    return None
+
+        except Exception as e:
+            logger.error(f"이미지 다운로드 오류: {image_url}, error={e}")
+            return None
+
+    def get_file_extension(self, image_url: str) -> str:
+        """URL에서 파일 확장자 추출"""
+        parsed = urlparse(image_url)
+        path = parsed.path.lower()
+
+        # 일반적인 이미지 확장자 확인
+        for ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
+            if ext in path:
+                return ext
+
+        # 기본값
+        return ".jpg"
+
+    def get_content_type(self, file_extension: str) -> str:
+        """파일 확장자에 따른 Content-Type 반환"""
+        content_types = {
+            ".jpg": "image/jpeg",
+            ".jpeg": "image/jpeg",
+            ".png": "image/png",
+            ".gif": "image/gif",
+            ".webp": "image/webp",
+        }
+        return content_types.get(file_extension, "image/jpeg")
+
+    def upload_to_s3(
+        self, data: bytes, s3_key: str, content_type: str = "image/jpeg"
+    ) -> bool:
+        """S3에 데이터 업로드 (이미지 또는 JSON)"""
+        try:
+            logger.debug(f"S3 업로드 시작: key={s3_key}")
+
+            self.s3_client.put_object(
+                Bucket=self.bucket_name,
+                Key=s3_key,
+                Body=data,
+                ContentType=content_type,
+            )
+
+            logger.debug(f"S3 업로드 완료: key={s3_key}")
+            return True
+
+        except Exception as e:
+            logger.error(f"S3 업로드 오류: key={s3_key}, error={e}")
+            return False
+
+    def upload_json_to_s3(self, json_data: Dict, s3_key: str) -> bool:
+        """JSON 데이터를 S3에 업로드"""
+        try:
+            json_str = json.dumps(json_data, ensure_ascii=False, indent=2)
+            json_bytes = json_str.encode("utf-8")
+
+            return self.upload_to_s3(json_bytes, s3_key, "application/json")
+
+        except Exception as e:
+            logger.error(f"JSON S3 업로드 오류: key={s3_key}, error={e}")
+            return False
+
+    def generate_product_folder_name(self, product_index: int, keyword: str) -> str:
+        """상품별 폴더명 생성 (시간_키워드_번호)"""
+        # 키워드에서 특수문자 제거
+        safe_keyword = (
+            keyword.replace("/", "-")
+            .replace("\\", "-")
+            .replace(" ", "_")
+            .replace(":", "-")
+            .replace("*", "-")
+            .replace("?", "-")
+            .replace('"', "-")
+            .replace("<", "-")
+            .replace(">", "-")
+            .replace("|", "-")[:20]  # 길이 제한
+        )
+
+        # 날짜 형식: 20250922
+        date_str = datetime.now().strftime("%Y%m%d")
+
+        # 폴더명: 20250922_키워드_1
+        folder_name = f"{date_str}_{safe_keyword}_{product_index}"
+
+        return folder_name
+
+    def generate_s3_key(
+        self,
+        base_folder: str,
+        folder_name: str,
+        file_name: str,
+    ) -> str:
+        """S3 키 생성"""
+        # 최종 S3 키: product/20250922_산리오_1/image_001.jpg 또는 product_data.json
+        s3_key = f"{base_folder}/{folder_name}/{file_name}"
+        return s3_key
+
+    def get_s3_url(self, s3_key: str) -> str:
+        """S3 키에서 접근 가능한 URL 생성"""
+        return f"{self.base_url}/{s3_key}"
+
+    async def upload_single_product_images(
+        self,
+        session: aiohttp.ClientSession,
+        product_info: Dict,  # 🔸 이름 변경: product_data → product_info (전체 크롤링 데이터)
+        product_index: int,
+        keyword: str,  # 키워드 파라미터 추가
+        base_folder: str = "product",  # 🔸 기본 폴더 변경: product-images → product
+    ) -> Dict:
+        """단일 상품의 모든 데이터(이미지 + JSON)를 S3에 업로드"""
+
+        # 🔸 전체 크롤링 데이터에서 필요한 정보 추출
+        product_detail = product_info.get("product_detail", {})
+        product_title = product_detail.get("title", "Unknown")
+        product_images = product_detail.get("product_images", [])
+
+        uploaded_images = []
+
+        logger.info(
+            f"상품 {product_index} 업로드 시작: {len(product_images)}개 이미지, keyword='{keyword}'"
+        )
+
+        # 키워드 기반 폴더명 한 번만 생성
+        folder_name = self.generate_product_folder_name(product_index, keyword)
+
+        fail_count = 0
+        folder_s3_url = f"{self.base_url}/{base_folder}/{folder_name}"
+
+        # 🆕 1. 먼저 상품 데이터 JSON 파일 업로드
+        try:
+            # 전체 크롤링 데이터를 JSON으로 저장 (S3 업로드 메타데이터 추가)
+            product_data_with_meta = {
+                **product_info,  # 전체 크롤링 데이터 (index, url, product_detail, status, crawled_at 포함)
+                "s3_upload_keyword": keyword,  # 추가 메타데이터
+                "s3_uploaded_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            }
+
+            json_s3_key = self.generate_s3_key(
+                base_folder, folder_name, "product_data.json"
+            )
+
+            if self.upload_json_to_s3(product_data_with_meta, json_s3_key):
+                logger.success(f"상품 {product_index} JSON 데이터 업로드 완료")
+            else:
+                logger.error(f"상품 {product_index} JSON 데이터 업로드 실패")
+
+        except Exception as e:
+            logger.error(f"상품 {product_index} JSON 업로드 오류: {e}")
+
+        # 2. 이미지 업로드 (기존 로직)
+        if not product_images:
+            logger.warning(f"상품 {product_index}: 업로드할 이미지가 없음")
+            return {
+                "product_index": product_index,
+                "product_title": product_title,
+                "status": "no_images",
+                "folder_s3_url": folder_s3_url,
+                "uploaded_images": uploaded_images,
+                "success_count": 0,
+                "fail_count": 0,
+            }
+
+        # 각 이미지 업로드
+        for img_idx, img_info in enumerate(product_images, 1):
+            original_url = img_info.get("original_url", "")
+
+            if not original_url:
+                logger.warning(f"상품 {product_index}, 이미지 {img_idx}: URL이 없음")
+                fail_count += 1
+                continue
+
+            try:
+                # 이미지 다운로드
+                image_data = await self.download_image(session, original_url)
+
+                if not image_data:
+                    fail_count += 1
+                    continue
+
+                # S3 키 생성 (키워드 기반 폴더명 사용)
+                file_extension = self.get_file_extension(original_url)
+                image_file_name = f"image_{img_idx:03d}{file_extension}"
+                s3_key = self.generate_s3_key(base_folder, folder_name, image_file_name)
+
+                # S3 업로드
+                content_type = self.get_content_type(file_extension)
+
+                if self.upload_to_s3(image_data, s3_key, content_type):
+                    s3_url = self.get_s3_url(s3_key)
+                    uploaded_images.append(
+                        {
+                            "index": img_idx,
+                            "original_url": original_url,
+                            "s3_url": s3_url,
+                        }
+                    )
+
+                    logger.debug(f"상품 {product_index}, 이미지 {img_idx} 업로드 완료")
+                else:
+                    fail_count += 1
+
+            except Exception as e:
+                logger.error(f"상품 {product_index}, 이미지 {img_idx} 처리 오류: {e}")
+                fail_count += 1
+
+            # 이미지 간 간격 (서버 부하 방지)
+            await asyncio.sleep(0.5)
+
+        logger.success(
+            f"상품 {product_index} 업로드 완료: 성공 {len(uploaded_images)}개, 실패 {fail_count}개, folder='{folder_name}'"
+        )
+
+        return {
+            "product_index": product_index,
+            "product_title": product_title,
+            "status": "completed",
+            "folder_s3_url": folder_s3_url,  # 🔸 폴더 전체를 가리킴 (이미지 + JSON 포함)
+            "json_s3_url": f"{folder_s3_url}/product_data.json",  # 🆕 JSON 파일 직접 링크
+            "uploaded_images": uploaded_images,
+            "success_count": len(uploaded_images),
+            "fail_count": fail_count,
+        }
diff --git a/apps/pre-processing-service/poetry.lock b/apps/pre-processing-service/poetry.lock
index ca5c20ab..f02855bc 100644
--- a/apps/pre-processing-service/poetry.lock
+++ b/apps/pre-processing-service/poetry.lock
@@ -321,6 +321,46 @@ d = ["aiohttp (>=3.10)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "boto3"
+version = "1.40.35"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "boto3-1.40.35-py3-none-any.whl", hash = "sha256:f4c1b01dd61e7733b453bca38b004ce030e26ee36e7a3d4a9e45a730b67bc38d"},
+    {file = "boto3-1.40.35.tar.gz", hash = "sha256:d718df3591c829bcca4c498abb7b09d64d1eecc4e5a2b6cef14b476501211b8a"},
+]
+
+[package.dependencies]
+botocore = ">=1.40.35,<1.41.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.14.0,<0.15.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.40.35"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "botocore-1.40.35-py3-none-any.whl", hash = "sha256:c545de2cbbce161f54ca589fbb677bae14cdbfac7d5f1a27f6a620cb057c26f4"},
+    {file = "botocore-1.40.35.tar.gz", hash = "sha256:67e062752ff579c8cc25f30f9c3a84c72d692516a41a9ee1cf17735767ca78be"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.27.6)"]
+
 [[package]]
 name = "bs4"
 version = "0.0.2"
@@ -1320,6 +1360,18 @@ files = [
     {file = "jiter-0.11.0.tar.gz", hash = "sha256:1d9637eaf8c1d6a63d6562f2a6e5ab3af946c66037eb1b894e8fad75422266e4"},
 ]
 
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
+    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.2"
@@ -1693,14 +1745,14 @@ sympy = "*"
 
 [[package]]
 name = "openai"
-version = "1.108.0"
+version = "1.108.1"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "openai-1.108.0-py3-none-any.whl", hash = "sha256:31f2e58230e2703f13ddbb50c285f39dacf7fca64ab19882fd8a7a0b2bccd781"},
-    {file = "openai-1.108.0.tar.gz", hash = "sha256:e859c64e4202d7f5956f19280eee92bb281f211c41cdd5be9e63bf51a024ff72"},
+    {file = "openai-1.108.1-py3-none-any.whl", hash = "sha256:952fc027e300b2ac23be92b064eac136a2bc58274cec16f5d2906c361340d59b"},
+    {file = "openai-1.108.1.tar.gz", hash = "sha256:6648468c1aec4eacfa554001e933a9fa075f57bacfc27588c2e34456cee9fef9"},
 ]
 
 [package.dependencies]
@@ -1793,14 +1845,14 @@ testing = ["coverage", "pytest", "pytest-benchmark"]
 
 [[package]]
 name = "poetry-core"
-version = "2.2.0"
+version = "2.2.1"
 description = "Poetry PEP 517 Build Backend"
 optional = false
 python-versions = "<4.0,>=3.9"
 groups = ["main"]
 files = [
-    {file = "poetry_core-2.2.0-py3-none-any.whl", hash = "sha256:0edea81d07e88cbd407369eef753c722da8ff1338f554788dc04636e756318fc"},
-    {file = "poetry_core-2.2.0.tar.gz", hash = "sha256:b4033b71b99717a942030e074fec7e3082e5fde7a8ed10f02cd2413bdf940b1f"},
+    {file = "poetry_core-2.2.1-py3-none-any.whl", hash = "sha256:bdfce710edc10bfcf9ab35041605c480829be4ab23f5bc01202cfe5db8f125ab"},
+    {file = "poetry_core-2.2.1.tar.gz", hash = "sha256:97e50d8593c8729d3f49364b428583e044087ee3def1e010c6496db76bd65ac5"},
 ]
 
 [[package]]
@@ -2288,14 +2340,14 @@ rsa = ["cryptography"]
 
 [[package]]
 name = "pyparsing"
-version = "3.2.4"
+version = "3.2.5"
 description = "pyparsing - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "pyparsing-3.2.4-py3-none-any.whl", hash = "sha256:91d0fcde680d42cd031daf3a6ba20da3107e08a75de50da58360e7d94ab24d36"},
-    {file = "pyparsing-3.2.4.tar.gz", hash = "sha256:fff89494f45559d0f2ce46613b419f632bbb6afbdaed49696d322bcf98a58e99"},
+    {file = "pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e"},
+    {file = "pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6"},
 ]
 
 [package.extras]
@@ -2364,6 +2416,21 @@ pygments = ">=2.7.2"
 [package.extras]
 dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+files = [
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
 [[package]]
 name = "python-dotenv"
 version = "1.1.1"
@@ -2638,6 +2705,24 @@ files = [
 [package.dependencies]
 pyasn1 = ">=0.1.3"
 
+[[package]]
+name = "s3transfer"
+version = "0.14.0"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456"},
+    {file = "s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125"},
+]
+
+[package.dependencies]
+botocore = ">=1.37.4,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
+
 [[package]]
 name = "safetensors"
 version = "0.6.2"
@@ -2832,6 +2917,18 @@ typing_extensions = ">=4.14.0,<4.15.0"
 urllib3 = {version = ">=2.5.0,<3.0", extras = ["socks"]}
 websocket-client = ">=1.8.0,<1.9.0"
 
+[[package]]
+name = "six"
+version = "1.17.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+files = [
+    {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
+    {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -3015,31 +3112,31 @@ files = [
 
 [[package]]
 name = "tokenizers"
-version = "0.22.0"
+version = "0.22.1"
 description = ""
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "tokenizers-0.22.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:eaa9620122a3fb99b943f864af95ed14c8dfc0f47afa3b404ac8c16b3f2bb484"},
-    {file = "tokenizers-0.22.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:71784b9ab5bf0ff3075bceeb198149d2c5e068549c0d18fe32d06ba0deb63f79"},
-    {file = "tokenizers-0.22.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec5b71f668a8076802b0241a42387d48289f25435b86b769ae1837cad4172a17"},
-    {file = "tokenizers-0.22.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ea8562fa7498850d02a16178105b58803ea825b50dc9094d60549a7ed63654bb"},
-    {file = "tokenizers-0.22.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4136e1558a9ef2e2f1de1555dcd573e1cbc4a320c1a06c4107a3d46dc8ac6e4b"},
-    {file = "tokenizers-0.22.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf5954de3962a5fd9781dc12048d24a1a6f1f5df038c6e95db328cd22964206"},
-    {file = "tokenizers-0.22.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8337ca75d0731fc4860e6204cc24bb36a67d9736142aa06ed320943b50b1e7ed"},
-    {file = "tokenizers-0.22.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a89264e26f63c449d8cded9061adea7b5de53ba2346fc7e87311f7e4117c1cc8"},
-    {file = "tokenizers-0.22.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:790bad50a1b59d4c21592f9c3cf5e5cf9c3c7ce7e1a23a739f13e01fb1be377a"},
-    {file = "tokenizers-0.22.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:76cf6757c73a10ef10bf06fa937c0ec7393d90432f543f49adc8cab3fb6f26cb"},
-    {file = "tokenizers-0.22.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:1626cb186e143720c62c6c6b5371e62bbc10af60481388c0da89bc903f37ea0c"},
-    {file = "tokenizers-0.22.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:da589a61cbfea18ae267723d6b029b84598dc8ca78db9951d8f5beff72d8507c"},
-    {file = "tokenizers-0.22.0-cp39-abi3-win32.whl", hash = "sha256:dbf9d6851bddae3e046fedfb166f47743c1c7bd11c640f0691dd35ef0bcad3be"},
-    {file = "tokenizers-0.22.0-cp39-abi3-win_amd64.whl", hash = "sha256:c78174859eeaee96021f248a56c801e36bfb6bd5b067f2e95aa82445ca324f00"},
-    {file = "tokenizers-0.22.0.tar.gz", hash = "sha256:2e33b98525be8453f355927f3cab312c36cd3e44f4d7e9e97da2fa94d0a49dcb"},
+    {file = "tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73"},
+    {file = "tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc"},
+    {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a"},
+    {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7"},
+    {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21"},
+    {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214"},
+    {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f"},
+    {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4"},
+    {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879"},
+    {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446"},
+    {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a"},
+    {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390"},
+    {file = "tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82"},
+    {file = "tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138"},
+    {file = "tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9"},
 ]
 
 [package.dependencies]
-huggingface-hub = ">=0.16.4,<1.0"
+huggingface-hub = ">=0.16.4,<2.0"
 
 [package.extras]
 dev = ["tokenizers[testing]"]
@@ -3070,14 +3167,14 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "4.56.1"
+version = "4.56.2"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.9.0"
 groups = ["main"]
 files = [
-    {file = "transformers-4.56.1-py3-none-any.whl", hash = "sha256:1697af6addfb6ddbce9618b763f4b52d5a756f6da4899ffd1b4febf58b779248"},
-    {file = "transformers-4.56.1.tar.gz", hash = "sha256:0d88b1089a563996fc5f2c34502f10516cad3ea1aa89f179f522b54c8311fe74"},
+    {file = "transformers-4.56.2-py3-none-any.whl", hash = "sha256:79c03d0e85b26cb573c109ff9eafa96f3c8d4febfd8a0774e8bba32702dd6dde"},
+    {file = "transformers-4.56.2.tar.gz", hash = "sha256:5e7c623e2d7494105c726dd10f6f90c2c99a55ebe86eef7233765abd0cb1c529"},
 ]
 
 [package.dependencies]
@@ -3094,23 +3191,23 @@ tqdm = ">=4.27"
 
 [package.extras]
 accelerate = ["accelerate (>=0.26.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
+all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 benchmark = ["optimum-benchmark (>=0.3.0)"]
 chat-template = ["jinja2 (>=3.1.0)"]
 codecarbon = ["codecarbon (>=2.8.1)"]
 deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
-deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.22.0,<=0.23.0)", "urllib3 (<2.0.0)"]
-dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<=0.9)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.22.0,<=0.23.0)", "urllib3 (<2.0.0)"]
+dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<=0.9)", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
 flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
 flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 ftfy = ["ftfy"]
-hf-xet = ["hf_xet"]
+hf-xet = ["hf-xet"]
 hub-kernels = ["kernels (>=0.6.1,<=0.9)"]
 integrations = ["kernels (>=0.6.1,<=0.9)", "optuna", "ray[tune] (>=2.7.0)", "sigopt"]
-ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"]
+ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
 mistral-common = ["mistral-common[opencv] (>=1.6.3)"]
 modelcreation = ["cookiecutter (==1.7.3)"]
 natten = ["natten (>=0.14.6,<0.15.0)"]
@@ -3129,7 +3226,7 @@ serving = ["accelerate (>=0.26.0)", "fastapi", "openai (>=1.98.0)", "pydantic (>
 sigopt = ["sigopt"]
 sklearn = ["scikit-learn"]
 speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
-testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.11.2)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
 tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
 tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"]
 tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
@@ -3139,7 +3236,7 @@ tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"]
 torch = ["accelerate (>=0.26.0)", "torch (>=2.2)"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
-torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "tqdm (>=4.27)"]
+torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "tqdm (>=4.27)"]
 video = ["av"]
 vision = ["Pillow (>=10.0.1,<=15.0)"]
 
@@ -3429,4 +3526,4 @@ propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11,<3.14"
-content-hash = "42274fd00aedabf70dc419acd06e2f25b5c69b58b7bf76eef2ea7a9df6470b2c"
+content-hash = "fe9799a3d3a101e05d75d5e193c6e9e4ef17a7581cb273f41101e12129f80a2f"
diff --git a/apps/pre-processing-service/pyproject.toml b/apps/pre-processing-service/pyproject.toml
index 84a957b9..8cb11c0f 100644
--- a/apps/pre-processing-service/pyproject.toml
+++ b/apps/pre-processing-service/pyproject.toml
@@ -38,6 +38,7 @@ openai = "^1.107.3"
 aiohttp = "^3.12.15"
 prometheus-client = "^0.23.1"
 prometheus-fastapi-instrumentator = "^7.1.0"
+boto3 = "^1.40.35"
 
 [build-system]
 requires = ["poetry-core>=2.0.0,<3.0.0"]