diff --git a/apps/pre-processing-service/app/test/test_extraction_html.py b/apps/pre-processing-service/app/test/test_extraction_html.py
new file mode 100644
index 00000000..a023972c
--- /dev/null
+++ b/apps/pre-processing-service/app/test/test_extraction_html.py
@@ -0,0 +1,487 @@
+# if __name__ == "__main__":
+#     from app.utils.crawling_util import CrawlingUtil
+#     from app.utils.llm_extractor import LLMExtractor
+#     from selenium.webdriver.common.by import By
+#     from selenium.webdriver.support import expected_conditions as EC
+#     from selenium.common.exceptions import TimeoutException
+#     from selenium.webdriver.common.keys import Keys
+#     from selenium.webdriver.common.action_chains import ActionChains
+#     import pyperclip
+#     import time
+#     import json
+#
+#     crawling_util = CrawlingUtil()
+#     llm_extractor = LLMExtractor()
+#
+#     start_time = time.time()
+#     driver = crawling_util.get_driver()
+#     wait_driver = crawling_util.get_wait()
+#
+#     # ========== 로그인 부분 ==========
+#     driver.get("https://nid.naver.com/nidlogin.login")
+#     time.sleep(5)
+#     html = driver.page_source
+#
+#     print(f"원본 HTML 길이: {len(html)}")
+#     html_list = preprocess_html(html)
+#
+#     result_html = 0
+#
+#     for html in html_list:
+#         result_html += len(html)
+#
+#     print(f"전처리된 HTML 총 길이: {result_html}, 분할된 청크 수: {len(html_list)}")
+#
+#     result = []
+#
+#     for idx, html in enumerate(html_list):
+#         print(f"전처리된 HTML 길이: {len(html)}, List {idx}번 ")
+#         prompt = llm_extractor.extraction_prompt("아이디, 비밀번호를 입력할 수 있는 요소, 로그인 버튼을 클릭할 수 있는 요소", html)
+#
+#         response = llm_extractor.client.chat.completions.create(
+#             model=llm_extractor.model,
+#             messages=[{"role": "system", "content": prompt}],
+#             temperature=0,
+#             response_format={"type": "json_object"}
+#         )
+#
+#         result_json = response.choices[0].message.content
+#
+#         result.append(result_json)
+#
+#     parse_result = [json.loads(item) for item in result]
+#     print(json.dumps(parse_result, indent=4, ensure_ascii=False))
+#
+#     # 로그인
+#     naver_id = "all2641"
+#     naver_password = "kdyn2641*"
+#
+#     # 모든 결과에서 요소들을 수집 (개선된 방식)
+#     all_elements = {}
+#
+#     for item in parse_result:
+#         if not item.get("found"):
+#             print("요소를 찾지 못했습니다.")
+#             continue
+#
+#         elements = item.get("elements", [])
+#         for element in elements:
+#             for key, value in element.items():
+#                 # ID 관련 요소
+#                 if "id" in key.lower():
+#                     if "css_selector" in key:
+#                         all_elements["id_css"] = value
+#                     elif "xpath" in key:
+#                         all_elements["id_xpath"] = value
+#
+#                 # Password 관련 요소
+#                 elif "password" in key.lower() or "pw" in key.lower():
+#                     if "css_selector" in key:
+#                         all_elements["pw_css"] = value
+#                     elif "xpath" in key:
+#                         all_elements["pw_xpath"] = value
+#
+#                 # Login 관련 요소
+#                 elif "login" in key.lower():
+#                     if "css_selector" in key:
+#                         all_elements["login_css"] = value
+#                     elif "xpath" in key:
+#                         all_elements["login_xpath"] = value
+#
+#     print(f"수집된 요소들: {all_elements}")
+#
+#     # 아이디 입력
+#     id_input = None
+#     if all_elements.get("id_css"):
+#         try:
+#             id_input = wait_driver.until(
+#                 EC.presence_of_element_located((By.CSS_SELECTOR, all_elements["id_css"]))
+#             )
+#             print(f"아이디 요소 발견 (CSS): {all_elements['id_css']}")
+#             time.sleep(2)
+#         except TimeoutException:
+#             print(f"아이디 요소를 찾지 못했습니다 (CSS): {all_elements['id_css']}")
+#
+#     if not id_input and all_elements.get("id_xpath"):
+#         try:
+#             id_input = wait_driver.until(
+#                 EC.presence_of_element_located((By.XPATH, all_elements["id_xpath"]))
+#             )
+#             print(f"아이디 요소 발견 (XPath): {all_elements['id_xpath']}")
+#             time.sleep(2)
+#         except TimeoutException:
+#             print(f"아이디 요소를 찾지 못했습니다 (XPath): {all_elements['id_xpath']}")
+#
+#     if id_input:
+#         id_input.click()
+#         time.sleep(1)
+#         pyperclip.copy(naver_id)
+#         time.sleep(1)
+#         id_input.send_keys(Keys.COMMAND, "v")
+#         time.sleep(1)
+#
+#     # 비밀번호 입력
+#     password_input = None
+#     if all_elements.get("pw_css"):
+#         try:
+#             password_input = wait_driver.until(
+#                 EC.presence_of_element_located((By.CSS_SELECTOR, all_elements["pw_css"]))
+#             )
+#             print(f"비밀번호 요소 발견 (CSS): {all_elements['pw_css']}")
+#             time.sleep(2)
+#         except TimeoutException:
+#             print(f"비밀번호 요소를 찾지 못했습니다 (CSS): {all_elements['pw_css']}")
+#
+#     if not password_input and all_elements.get("pw_xpath"):
+#         try:
+#             password_input = wait_driver.until(
+#                 EC.presence_of_element_located((By.XPATH, all_elements["pw_xpath"]))
+#             )
+#             print(f"비밀번호 요소 발견 (XPath): {all_elements['pw_xpath']}")
+#             time.sleep(2)
+#         except TimeoutException:
+#             print(f"비밀번호 요소를 찾지 못했습니다 (XPath): {all_elements['pw_xpath']}")
+#
+#     if password_input:
+#         password_input.click()
+#         time.sleep(1)
+#         pyperclip.copy(naver_password)
+#         time.sleep(1)
+#         password_input.send_keys(Keys.COMMAND, "v")
+#         time.sleep(1)
+#
+#     # 로그인 버튼 클릭
+#     login_button = None
+#     if all_elements.get("login_css"):
+#         try:
+#             login_selector = all_elements["login_css"].replace('\\', '')
+#             login_button = wait_driver.until(
+#                 EC.element_to_be_clickable((By.CSS_SELECTOR, login_selector))
+#             )
+#             print(f"로그인 버튼 요소 발견 (CSS): {login_selector}")
+#         except TimeoutException:
+#             print(f"로그인 버튼 요소를 찾지 못했습니다 (CSS): {all_elements['login_css']}")
+#
+#     if not login_button and all_elements.get("login_xpath"):
+#         try:
+#             login_button = wait_driver.until(
+#                 EC.element_to_be_clickable((By.XPATH, all_elements["login_xpath"]))
+#             )
+#             print(f"로그인 버튼 요소 발견 (XPath): {all_elements['login_xpath']}")
+#         except TimeoutException:
+#             print(f"로그인 버튼 요소를 찾지 못했습니다 (XPath): {all_elements['login_xpath']}")
+#
+#     if login_button:
+#         login_button.click()
+#         print("로그인 버튼 클릭 완료")
+#
+#     # 로그인 완료 대기
+#     time.sleep(5)
+#     print("로그인 완료, 블로그 포스팅 시작...")
+#
+#     # ========== 블로그 포스팅 부분 (도움말 닫기 버튼 추가) ==========
+#     try:
+#         # 네이버 블로그 글쓰기 페이지로 이동
+#         post_content_url = f"https://blog.naver.com/PostWriteForm.naver?blogId={naver_id}&Redirect=Write&redirect=Write&widgetTypeCall=true&noTrackingCode=true&directAccess=false"
+#         driver.get(post_content_url)
+#         print("블로그 글쓰기 페이지로 이동 완료. 5초 대기...")
+#         time.sleep(10)
+#
+#         blog_html = driver.page_source
+#         print(f"HTML 길이: {len(blog_html)}")
+#         blog_html_list = preprocess_html(blog_html)
+#         blog_result_html = sum(len(html) for html in blog_html_list)
+#         print(f"전처리된 HTML 총 길이: {blog_result_html}, 분할된 청크 수: {len(blog_html_list)}")
+#
+#         # 테스트용 제목, 내용, 태그
+#         test_title = "LLM 기반 자동화 포스팅"
+#         test_content = "이 포스트는 LLM이 iframe 내부의 HTML을 분석하여 자동으로 작성한 글입니다."
+#         test_tags = ["LLM", "자동화", "네이버블로그"]
+#
+#         # 3. LLM을 사용해 iframe 내부의 블로그 요소들 추출
+#         blog_result = []
+#
+#         for idx, html in enumerate(blog_html_list):
+#             print(f"HTML 청크 {idx + 1}/{len(blog_html_list)} 분석 중...")
+#             prompt = llm_extractor.naver_post_extraction_prompt(html)
+#             response = llm_extractor.client.chat.completions.create(
+#                 model=llm_extractor.model,
+#                 messages=[{"role": "system", "content": prompt}],
+#                 temperature=0,
+#                 response_format={"type": "json_object"}
+#             )
+#             blog_result.append(response.choices[0].message.content)
+#
+#         blog_parse_result = [json.loads(item) for item in blog_result]
+#         print("\n>> 블로그 요소 추출 결과:")
+#         print(json.dumps(blog_parse_result, indent=4, ensure_ascii=False))
+#
+#         # 4. 추출된 요소 정보 취합
+#         blog_elements = {}
+#         for item in blog_parse_result:
+#             if not item.get("found"): continue
+#             for element in item.get("elements", []):
+#                 for key, value in element.items():
+#                     if "title" in key.lower():
+#                         if "css_selector" in key:
+#                             blog_elements["title_css"] = value
+#                         elif "xpath" in key:
+#                             blog_elements["title_xpath"] = value
+#                     elif "content" in key.lower() or "body" in key.lower():
+#                         if "css_selector" in key:
+#                             blog_elements["content_css"] = value
+#                         elif "xpath" in key:
+#                             blog_elements["content_xpath"] = value
+#                     elif "help_close" in key.lower():
+#                         if "css_selector" in key:
+#                             blog_elements["help_close_css"] = value
+#                         elif "xpath" in key:
+#                             blog_elements["help_close_xpath"] = value
+#                     elif "first_publish" in key.lower():
+#                         if "css_selector" in key:
+#                             blog_elements["first_publish_css"] = value
+#                         elif "xpath" in key:
+#                             blog_elements["first_publish_xpath"] = value
+#                     elif "tag_input" in key.lower():
+#                         if "css_selector" in key:
+#                             blog_elements["tag_input_css"] = value
+#                         elif "xpath" in key:
+#                             blog_elements["tag_input_xpath"] = value
+#                     elif "final_publish" in key.lower():
+#                         if "css_selector" in key:
+#                             blog_elements["final_publish_css"] = value
+#                         elif "xpath" in key:
+#                             blog_elements["final_publish_xpath"] = value
+#
+#         print(f"\n>> 수집된 블로그 요소들: {blog_elements}")
+#
+#         # 5. 도움말 닫기 버튼 클릭 (발행 버튼이 가려지지 않도록)
+#         help_close_button = None
+#         help_close_css = blog_elements.get("help_close_css")
+#         if help_close_css:
+#             try:
+#                 help_close_button = wait_driver.until(EC.element_to_be_clickable((By.CSS_SELECTOR, help_close_css)))
+#                 print(f"✅ 도움말 닫기 버튼 발견 (CSS): {help_close_css}")
+#             except TimeoutException:
+#                 print(f"⚠️ 도움말 닫기 버튼을 찾지 못했습니다 (CSS): {help_close_css}")
+#
+#         if not help_close_button:
+#             help_close_xpath = blog_elements.get("help_close_xpath")
+#             if help_close_xpath:
+#                 try:
+#                     help_close_button = wait_driver.until(EC.element_to_be_clickable((By.XPATH, help_close_xpath)))
+#                     print(f"✅ 도움말 닫기 버튼 발견 (XPath): {help_close_xpath}")
+#                 except TimeoutException:
+#                     print(f"⚠️ 도움말 닫기 버튼을 찾지 못했습니다 (XPath): {help_close_xpath}")
+#
+#         if help_close_button:
+#             try:
+#                 help_close_button.click()
+#                 print("✅ 도움말 닫기 버튼 클릭 완료")
+#                 time.sleep(1)  # 닫히는 시간 대기
+#             except Exception as e:
+#                 print(f"⚠️ 도움말 닫기 버튼 클릭 실패: {str(e)}")
+#                 # JavaScript로 강제 클릭 시도
+#                 try:
+#                     driver.execute_script("arguments[0].click();", help_close_button)
+#                     print("✅ 도움말 닫기 버튼 JavaScript 클릭 완료")
+#                     time.sleep(1)
+#                 except Exception as js_e:
+#                     print(f"❌ 도움말 닫기 버튼 JavaScript 클릭도 실패: {str(js_e)}")
+#         else:
+#             print("⚠️ 도움말 닫기 버튼을 찾지 못했습니다. se-utils 요소 직접 제거를 시도합니다.")
+#             # 직접 se-utils 요소 제거
+#             try:
+#                 driver.execute_script("""
+#                     var element = document.querySelector('.se-utils');
+#                     if (element) {
+#                         element.style.display = 'none';
+#                         console.log('se-utils 요소를 숨겼습니다.');
+#                     }
+#                 """)
+#                 print("✅ se-utils 요소를 직접 숨김 처리했습니다.")
+#             except Exception as e:
+#                 print(f"⚠️ se-utils 요소 숨김 처리 실패: {str(e)}")
+#
+#         # 6. 제목 및 본문 입력 (CSS, XPath 순차 시도)
+#         # 제목 입력
+#         title_input = None
+#         title_css = blog_elements.get("title_css")
+#         if title_css:
+#             try:
+#                 title_input = wait_driver.until(EC.element_to_be_clickable((By.CSS_SELECTOR, title_css)))
+#                 print(f"✅ 제목 요소 발견 (CSS): {title_css}")
+#             except TimeoutException:
+#                 print(f"⚠️ 제목 요소를 찾지 못했습니다 (CSS): {title_css}")
+#
+#         if not title_input:
+#             title_xpath = blog_elements.get("title_xpath")
+#             if title_xpath:
+#                 try:
+#                     title_input = wait_driver.until(EC.element_to_be_clickable((By.XPATH, title_xpath)))
+#                     print(f"✅ 제목 요소 발견 (XPath): {title_xpath}")
+#                 except TimeoutException:
+#                     print(f"⚠️ 제목 요소를 찾지 못했습니다 (XPath): {title_xpath}")
+#
+#         if title_input:
+#             ActionChains(driver).move_to_element(title_input).click().send_keys(test_title).perform()
+#             print("✅ 제목 입력 완료")
+#         else:
+#             print("❌ 제목 입력 요소를 최종적으로 찾지 못했습니다.")
+#
+#         # 본문 입력
+#         content_input = None
+#         content_css = blog_elements.get("content_css")
+#         if content_css:
+#             try:
+#                 content_input = wait_driver.until(EC.element_to_be_clickable((By.CSS_SELECTOR, content_css)))
+#                 print(f"✅ 본문 요소 발견 (CSS): {content_css}")
+#             except TimeoutException:
+#                 print(f"⚠️ 본문 요소를 찾지 못했습니다 (CSS): {content_css}")
+#
+#         if not content_input:
+#             content_xpath = blog_elements.get("content_xpath")
+#             if content_xpath:
+#                 try:
+#                     content_input = wait_driver.until(EC.element_to_be_clickable((By.XPATH, content_xpath)))
+#                     print(f"✅ 본문 요소 발견 (XPath): {content_xpath}")
+#                 except TimeoutException:
+#                     print(f"⚠️ 본문 요소를 찾지 못했습니다 (XPath): {content_xpath}")
+#
+#         if content_input:
+#             ActionChains(driver).move_to_element(content_input).click().send_keys(test_content).perform()
+#             print("✅ 본문 입력 완료")
+#         else:
+#             print("❌ 본문 입력 요소를 최종적으로 찾지 못했습니다.")
+#
+#         # 7. 발행 버튼 클릭 (LLM이 찾은 선택자 사용)
+#         first_publish_button = None
+#         first_publish_css = blog_elements.get("first_publish_css")
+#         if first_publish_css:
+#             try:
+#                 first_publish_button = wait_driver.until(
+#                     EC.element_to_be_clickable((By.CSS_SELECTOR, first_publish_css)))
+#                 print(f"✅ 첫 번째 발행 버튼 발견 (CSS): {first_publish_css}")
+#             except TimeoutException:
+#                 print(f"⚠️ 첫 번째 발행 버튼을 찾지 못했습니다 (CSS): {first_publish_css}")
+#
+#         if not first_publish_button:
+#             first_publish_xpath = blog_elements.get("first_publish_xpath")
+#             if first_publish_xpath:
+#                 try:
+#                     first_publish_button = wait_driver.until(
+#                         EC.element_to_be_clickable((By.XPATH, first_publish_xpath)))
+#                     print(f"✅ 첫 번째 발행 버튼 발견 (XPath): {first_publish_xpath}")
+#                 except TimeoutException:
+#                     print(f"⚠️ 첫 번째 발행 버튼을 찾지 못했습니다 (XPath): {first_publish_xpath}")
+#
+#         if first_publish_button:
+#             try:
+#                 # 일반 클릭 시도
+#                 first_publish_button.click()
+#                 print("✅ 첫 번째 발행 버튼 클릭 완료. 팝업창을 기다립니다...")
+#             except Exception as click_error:
+#                 print(f"⚠️ 일반 클릭 실패, JavaScript 클릭 시도: {str(click_error)}")
+#                 driver.execute_script("arguments[0].click();", first_publish_button)
+#                 print("✅ 첫 번째 발행 버튼 JavaScript 클릭 완료. 팝업창을 기다립니다...")
+#
+#             time.sleep(3)
+#         else:
+#             print("❌ 첫 번째 발행 버튼을 최종적으로 찾지 못했습니다. 하드코딩 선택자를 시도합니다.")
+#             # 폴백: 하드코딩 선택자 사용
+#             try:
+#                 publish_button = wait_driver.until(
+#                     EC.element_to_be_clickable((By.XPATH, "//button[.//span[normalize-space()='발행']]")))
+#
+#                 try:
+#                     publish_button.click()
+#                     print("✅ 발행 버튼 하드코딩 클릭 완료. 팝업창을 기다립니다...")
+#                 except Exception as click_error:
+#                     driver.execute_script("arguments[0].click();", publish_button)
+#                     print("✅ 발행 버튼 하드코딩 JavaScript 클릭 완료. 팝업창을 기다립니다...")
+#
+#                 time.sleep(3)
+#             except TimeoutException:
+#                 print("❌ 하드코딩 발행 버튼도 찾지 못했습니다.")
+#
+#         # 8. 태그 입력 및 최종 발행 (LLM이 찾은 선택자 사용)
+#         try:
+#             # 태그 입력 필드 찾기
+#             tag_input = None
+#             tag_input_css = blog_elements.get("tag_input_css")
+#             if tag_input_css:
+#                 try:
+#                     tag_input = wait_driver.until(EC.element_to_be_clickable((By.CSS_SELECTOR, tag_input_css)))
+#                     print(f"✅ 태그 입력 필드 발견 (CSS): {tag_input_css}")
+#                 except TimeoutException:
+#                     print(f"⚠️ 태그 입력 필드를 찾지 못했습니다 (CSS): {tag_input_css}")
+#
+#             if not tag_input:
+#                 tag_input_xpath = blog_elements.get("tag_input_xpath")
+#                 if tag_input_xpath:
+#                     try:
+#                         tag_input = wait_driver.until(EC.element_to_be_clickable((By.XPATH, tag_input_xpath)))
+#                         print(f"✅ 태그 입력 필드 발견 (XPath): {tag_input_xpath}")
+#                     except TimeoutException:
+#                         print(f"⚠️ 태그 입력 필드를 찾지 못했습니다 (XPath): {tag_input_xpath}")
+#
+#             if not tag_input:
+#                 # 폴백: 하드코딩 선택자 사용
+#                 tag_input = wait_driver.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[placeholder*='태그']")))
+#                 print("✅ 태그 입력 필드 하드코딩 선택자로 발견")
+#
+#             # 태그 입력
+#             for tag in test_tags:
+#                 tag_input.send_keys(tag)
+#                 tag_input.send_keys(Keys.ENTER)
+#                 time.sleep(0.5)
+#             print("✅ 태그 입력 완료")
+#
+#             # 최종 발행 버튼 찾기
+#             final_publish_button = None
+#             final_publish_css = blog_elements.get("final_publish_css")
+#             if final_publish_css:
+#                 try:
+#                     final_publish_button = wait_driver.until(
+#                         EC.element_to_be_clickable((By.CSS_SELECTOR, final_publish_css)))
+#                     print(f"✅ 최종 발행 버튼 발견 (CSS): {final_publish_css}")
+#                 except TimeoutException:
+#                     print(f"⚠️ 최종 발행 버튼을 찾지 못했습니다 (CSS): {final_publish_css}")
+#
+#             if not final_publish_button:
+#                 final_publish_xpath = blog_elements.get("final_publish_xpath")
+#                 if final_publish_xpath:
+#                     try:
+#                         final_publish_button = wait_driver.until(
+#                             EC.element_to_be_clickable((By.XPATH, final_publish_xpath)))
+#                         print(f"✅ 최종 발행 버튼 발견 (XPath): {final_publish_xpath}")
+#                     except TimeoutException:
+#                         print(f"⚠️ 최종 발행 버튼을 찾지 못했습니다 (XPath): {final_publish_xpath}")
+#
+#             if not final_publish_button:
+#                 # 폴백: 하드코딩 선택자 사용
+#                 final_publish_button = wait_driver.until(EC.element_to_be_clickable(
+#                     (By.XPATH, "//div[contains(@class,'popup')]//button[.//span[normalize-space()='발행']]")))
+#                 print("✅ 최종 발행 버튼 하드코딩 선택자로 발견")
+#
+#             # 최종 발행 버튼 클릭
+#             final_publish_button.click()
+#             print("✅ 최종 발행 버튼 클릭 완료!")
+#
+#             wait_driver.until(EC.url_contains("PostView.naver"), timeout=10)
+#             print("\n🎉 블로그 포스팅 발행 최종 완료! 🎉")
+#         except TimeoutException:
+#             print("❌ 발행 팝업 처리 중 오류가 발생했습니다.")
+#             raise
+#
+#     except Exception as e:
+#         print(f"블로그 포스팅 중 오류 발생: {str(e)}")
+#
+#     # ... (이후 전체 소요 시간 측정 및 드라이버 종료 코드) ...
+#
+#     end_time = time.time()
+#     print(f"전체 소요 시간: {end_time - start_time} seconds")
+#
+#     # 대기 후 드라이버 종료
+#     time.sleep(5)
+#     driver.quit()
diff --git a/apps/pre-processing-service/app/utils/llm_extractor.py b/apps/pre-processing-service/app/utils/llm_extractor.py
new file mode 100644
index 00000000..4263a270
--- /dev/null
+++ b/apps/pre-processing-service/app/utils/llm_extractor.py
@@ -0,0 +1,252 @@
+import os
+from openai import OpenAI
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class LLMExtractor:
+
+    def __init__(self, model="gpt-4o"):
+        """
+        LLMExtractor 초기화
+        :param model: 사용할 LLM 모델 이름
+        """
+
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        self.model = model
+
+    def login_extraction_prompt(self, target_description: str, html: str):
+        """
+        네이버, 티스토리 통합 로그인 프롬프트
+        :param html: 분석할 HTML
+        :param target_description: 추출 대상 설명
+        :return: 프롬프트 문자열
+        """
+
+        return f"""
+        # 지시 (Instructions):
+            1. 당신은 HTML에서 웹 자동화에 필요한 정확한 요소를 찾는 전문가입니다.
+            2. 당신의 임무는 사용자의 목표와 가장 일치하는 요소에 대한 CSS Selector와 XPath를 정확하게 찾아내어 지정된 JSON 형식으로 반환하는 것입니다.
+        
+        # 규칙 (Rules):
+            1. 만약 요청한 요소가 HTML 문서에 존재하지 않는다면, 반드시 {{"found": false}} 만 반환해야 합니다. 
+            2. 억지로 추측하거나 존재하지 않는 요소에 대한 정보를 생성하지 마세요. 
+            3. name에는 요소의 이름을 나타내도록 지정하세요. 예: id, password, login_button, title, body 등
+            4. 반한되는 형식 :
+             {{
+                "found": true/false,
+                "elements": [
+                    {{
+                        "name_css_selector": "CSS 선택자 문자열",
+                        "name_xpath": "XPath 문자열"
+                    }},
+                ]
+            }}
+    
+        # 수행 (Execution):    
+            사용자의 요구 사항 : {target_description}
+            HTML 문서 : {html}
+
+        """
+
+    def naver_post_extraction_prompt(self, html: str):
+        """
+        네이버 블로그 포스트 프롬프트
+        :param html: 분석할 HTML
+        :return: 프롬프트 문자열
+        """
+
+        return f"""
+            # 지시 (Instructions):
+                1. 당신은 HTML에서 웹 자동화에 필요한 정확한 요소를 찾는 전문가입니다.
+                2. 당신의 임무는 목표(Goal)와 가장 일치하는 요소에 대한 CSS Selector와 XPath를 정확하게 찾아내어 지정된 JSON 형식으로 반환하는 것입니다.
+            
+            #  규칙 (Rules):
+                1. 만약 요청한 요소가 HTML 문서에 존재하지 않는다면, 반드시 {{"found": false}} 만 반환해야 합니다. 
+                2. 억지로 추측하거나 존재하지 않는 요소에 대한 정보를 생성하지 마세요. 
+                
+            # 목표 (Goal):
+                
+                ## 제목 입력 영역 찾기:
+                "제목"이 포함된 요소 찾기
+                   - HTML에서 "제목"이라는 한글 텍스트를 포함한 모든 요소 검색
+                   - 이 요소와 같은 부모나 형제 관계에 있는 요소 찾기
+                
+                ## 본문 입력 영역 찾기:
+                "본문"이 포함된 요소 찾기:
+                    - HTML에서 "본문"이라는 한글 텍스트를 포함한 모든 요소 검색
+                    - 이 요소와 같은 부모나 형제 관계에 있는 요소 찾기
+                    
+                # 도움말 닫기 버튼 찾기:
+                "도움말"이 포함된 요소 찾기:
+                    - "도움말"이라는 한글 텍스트를 포함한 모든 요소 검색
+                    - 이 요소와 같은 부모나 형제 관계에 있는 "닫기" 버튼 찾기
+                    
+                # 첫 번째 발행 버튼(팝업 열기용) 찾기:
+                "발행"이 포함된 버튼 요소 찾기:
+                    - HTML에서 "발행"이라는 한글 텍스트를 포함한 모든 버튼
+                    - 이 버튼이 팝업을 여는 역할을 하는지 확인
+                
+                # 태그 입력 필드 찾기:
+                "tag"가 포함된 요소 찾기:
+                    - HTML에서 "tag"라는 단어가 포함된 모든 요소 검색
+                    - id나 placeholder에 "tag" or "태그" 관련 내용이 있는 것
+                    
+                # 최종 발행 버튼 찾기:
+                popup 내부의 발행 버튼 찾기:
+                    - popup div 내부에 있는 "발행" 버튼 
+                    - confirm_btn 클래스가 포함된 버튼
+        
+            # 반환 형식:
+            {{
+                "found": true/false,
+                "elements": [
+                    {{
+                        "title_css_selector": "제목 입력을 위한 요소의 CSS 선택자",
+                        "title_xpath": "제목 입력을 위한 요소의 XPath"
+                    }},
+                    {{
+                        "content_css_selector": "본문 입력을 위한 요소의 CSS 선택자",
+                        "content_xpath": "본문 입력을 위한 요소의 XPath"
+                    }},
+                    {{
+                        "help_close_css_selector": "도움말 닫기 버튼의 CSS 선택자",
+                        "help_close_xpath": "도움말 닫기 버튼의 XPath"
+                    }},
+                    {{
+                        "first_publish_css_selector": "첫 번째 발행 버튼(팝업 열기용)의 CSS 선택자",
+                        "popup_publish_xpath": "첫 번째 발행 버튼(팝업 열기용)의 XPath"
+                    }},
+                    {{
+                        "tag_input_css_selector": "태그 입력 필드의 CSS 선택자",
+                        "tag_input_xpath": "태그 입력 필드의 XPath"
+                    }},
+                    {{
+                        "final_publish_css_selector": "팝업 내의 발행 버튼의 CSS 선택자",
+                        "final_publish_xpath": "팝업 내의 발행 버튼의 XPath"
+                    }}
+                ]
+            }}
+        
+            # 분석할 HTML:
+            {html}
+            """
+
+    def tistory_post_extraction_prompt(self, html: str):
+        """
+        티스토리 기본 입력 요소들 (제목, 내용, 태그, 완료버튼) 추출 프롬프트
+        :param html: 분석할 HTML
+        :return: 프롬프트 문자열
+        """
+        return f"""
+            # 지시 (Instructions):
+                1. 당신은 HTML에서 웹 자동화에 필요한 정확한 요소를 찾는 전문가입니다.
+                2. 당신의 임무는 목표(Goal)와 가장 일치하는 요소에 대한 CSS Selector와 XPath를 정확하게 찾아내어 지정된 JSON 형식으로 반환하는 것입니다.
+
+            #  규칙 (Rules):
+                1. 만약 요청한 요소가 HTML 문서에 존재하지 않는다면, 반드시 {{"found": false}} 만 반환해야 합니다. 
+                2. 억지로 추측하거나 존재하지 않는 요소에 대한 정보를 생성하지 마세요. 
+
+            # 목표 (Goal):
+
+            ## 제목 입력 영역 찾기:
+            "제목"이 포함된 요소 찾기
+               - HTML에서 "제목"이라는 한글 텍스트를 포함한 모든 요소 검색
+               - 이 요소와 같은 부모나 형제 관계에 있는 요소 찾기
+
+            ## 글 내용 입력 영역 찾기:
+            "글 내용 입력"이 포함된 요소 찾기:
+                - iframe 내부의 요소 우선 검색
+                - "글 내용 입력"이라는 한글 텍스트를 포함한 요소 검색
+                - contenteditable="true" 속성을 가진 요소 우선 검색
+
+            # "tag" or "태그" 입력 필드 찾기:
+            "tag" or "태그"가 포함된 요소 찾기:
+                - HTML에서 "tag" or "태그"라는 텍스트를 포함한 모든 요소 검색
+                - id나 placeholder에 "tag" or "태그" 관련 내용이 있는 것
+
+            # 완료 버튼 찾기:
+            "완료"가 포함된 버튼 요소 찾기:
+                - HTML에서 정확히 "완료"라는 한글 텍스트를 포함한 모든 버튼
+                - 이 버튼이 글 작성을 완료하는 역할을 하는지 확인
+
+            # 반환 형식:
+                    {{
+                        "found": true/false,
+                        "elements": [
+                            {{
+                                "title_css_selector": "제목 입력을 위한 요소의 CSS 선택자 또는 null",
+                                "title_xpath": "제목 입력을 위한 요소의 XPath 또는 null"
+                            }},
+                            {{
+                                "content_css_selector": "글 내용 입력을 위한 요소의 CSS 선택자 또는 null",
+                                "content_xpath": "글 내용 입력을 위한 요소의 XPath 또는 null"
+                            }},
+                            {{
+                                "tag_input_css_selector": "태그 입력 필드의 CSS 선택자 또는 null",
+                                "tag_input_xpath": "태그 입력 필드의 XPath 또는 null"
+                            }},
+                            {{
+                                "complete_css_selector": "완료 버튼의 CSS 선택자 또는 null",
+                                "complete_xpath": "완료 버튼의 XPath 또는 null"
+                            }}
+                        ]
+                    }}
+
+            # 분석할 HTML:
+            {html}
+            """
+
+    def tistory_publish_extraction_prompt(self, html: str):
+        """
+        티스토리 발행 관련 요소들 (공개 라디오, 발행 버튼) 추출 프롬프트
+        완료 버튼 클릭 후 동적으로 생성되는 요소들을 찾기 위한 프롬프트
+        :param html: 분석할 HTML (완료 버튼 클릭 후 업데이트된 HTML)
+        :return: 프롬프트 문자열
+        """
+        return f"""
+            # 지시 (Instructions):
+                1. 당신은 HTML에서 웹 자동화에 필요한 정확한 요소를 찾는 전문가입니다.
+                2. 당신의 임무는 목표(Goal)와 가장 일치하는 요소에 대한 CSS Selector와 XPath를 정확하게 찾아내어 지정된 JSON 형식으로 반환하는 것입니다.
+
+            #  규칙 (Rules):
+                1. 만약 요청한 요소가 HTML 문서에 존재하지 않는다면, 반드시 {{"found": false}} 만 반환해야 합니다. 
+                2. 억지로 추측하거나 존재하지 않는 요소에 대한 정보를 생성하지 마세요. 
+                3. CSS 선택자에서 Selenium이 지원하지 않는 문법을 사용하지 마세요:
+                   - :contains() 선택자 금지 (jQuery 전용)
+                   - :visible, :hidden 같은 jQuery 전용 선택자 금지
+                   - 표준 CSS 선택자만 사용 (id, class, attribute, tag 등)
+                
+            # 목표 (Goal):
+
+            # 공개 radio 버튼 찾기:
+            "공개"가 포함된 radio 요소 찾기:
+                - input type="radio" 요소 우선 검색
+                - HTML에서 "공개"라는 한글 텍스트를 포함한 모든 radio 버튼
+                - 글의 공개/비공개 설정을 위한 라디오 버튼
+
+            # 발행 버튼 찾기:
+            "발행"이 포함된 버튼 요소 찾기:
+                - HTML에서 "발행"이라는 한글 텍스트를 포함한 모든 버튼
+                - "게시", "Publish" 등의 유사한 텍스트도 포함
+                - publish-btn, btn-publish 등의 id나 class를 가진 버튼 우선 검색
+                - 이 버튼이 최종적으로 글을 발행하는 역할을 하는지 확인
+
+            # 반환 형식:
+                    {{
+                        "found": true/false,
+                        "elements": [
+                            {{
+                                "public_radio_css_selector": "공개 radio의 CSS 선택자 또는 null",
+                                "public_radio_xpath": "공개 radio의 XPath 또는 null"
+                            }},
+                            {{
+                                "publish_css_selector": "발행 버튼의 CSS 선택자 또는 null",
+                                "publish_xpath": "발행 버튼의 XPath 또는 null"
+                            }}
+                        ]
+                    }}
+
+            # 분석할 HTML:
+            {html}
+            """
diff --git a/apps/pre-processing-service/app/utils/preprocess_html.py b/apps/pre-processing-service/app/utils/preprocess_html.py
new file mode 100644
index 00000000..6edfb9d6
--- /dev/null
+++ b/apps/pre-processing-service/app/utils/preprocess_html.py
@@ -0,0 +1,210 @@
+from bs4 import BeautifulSoup, Comment
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import re
+
+
+def preprocess_html(html_content):
+    """
+    HTML 전처리
+    :param html_content: 원본 HTML 문자열
+    :return: 전처리된 HTML 문자열 리스트
+    """
+    soup = BeautifulSoup(html_content, "html.parser")
+
+    # 불필요한 태그 제거
+    unnecessary_tags = [
+        "script",  # JavaScript 코드
+        "style",  # CSS 스타일
+        "noscript",  # JavaScript 비활성화 시 내용
+        "meta",  # 메타데이터
+        "link",  # 외부 리소스 링크 (중요한 것 제외)
+        "head",  # head 전체
+        "title",  # 페이지 제목
+        "base",  # base URL
+    ]
+
+    for tag_name in unnecessary_tags:
+        for tag in soup.find_all(tag_name):
+            if tag_name == "link" and tag.get("rel") in ["stylesheet", "icon"]:
+                continue
+            tag.decompose()
+
+    # HTML 주석 제거
+    comments = soup.find_all(string=lambda text: isinstance(text, Comment))
+    for comment in comments:
+        comment.extract()
+
+    # display:none만 제거하고 다른 숨김 요소는 보존
+    hidden_elements = soup.find_all(
+        attrs={"style": re.compile(r"display\s*:\s*none", re.I)}
+    )
+    for element in hidden_elements:
+        element.decompose()
+
+    # 중요한 속성들을 보존
+    important_attributes = {
+        "id",
+        "class",
+        "name",
+        "type",
+        "value",
+        "href",
+        "src",
+        "alt",
+        "title",
+        "placeholder",
+        "role",
+        "aria-label",
+        "aria-describedby",
+        "aria-expanded",
+        "onclick",
+        "onchange",
+        "onfocus",
+        "onblur",
+        "disabled",
+        "readonly",
+        "required",
+        "checked",
+        "selected",
+        "hidden",
+        "tabindex",
+        "contenteditable",
+        "spellcheck",
+        "autocomplete",
+        "maxlength",
+        "minlength",
+        "for",
+        "form",
+        "method",
+        "action",
+        "target",
+    }
+
+    for tag in soup.find_all(True):
+        attrs_to_remove = []
+        for attr_name in tag.attrs.keys():
+            # data-* 속성은 모두 보존
+            if attr_name.startswith("data-"):
+                continue
+            # aria-* 속성도 모두 보존
+            if attr_name.startswith("aria-"):
+                continue
+            # on* 이벤트 속성들도 보존
+            if attr_name.startswith("on"):
+                continue
+            # 중요 속성이 아니면 제거
+            if attr_name not in important_attributes:
+                attrs_to_remove.append(attr_name)
+
+        for attr_name in attrs_to_remove:
+            del tag.attrs[attr_name]
+
+    # 빈 태그 제거
+    interactive_tags = {
+        "input",
+        "button",
+        "select",
+        "textarea",
+        "a",
+        "img",
+        "br",
+        "hr",
+        "div",
+        "span",
+    }
+
+    def remove_empty_tags_conservative():
+        removed_any = True
+        iteration = 0
+        while removed_any and iteration < 3:  # 최대 3번만 반복
+            removed_any = False
+            iteration += 1
+
+            for tag in soup.find_all():
+                # 상호작용 가능한 태그는 보존
+                if tag.name in interactive_tags:
+                    continue
+
+                # contenteditable 속성이 있으면 보존
+                if tag.get("contenteditable"):
+                    continue
+
+                # data-* 속성이 있으면 보존
+                if any(attr.startswith("data-") for attr in tag.attrs.keys()):
+                    continue
+
+                # 텍스트도 없고 자식 요소도 없으면 제거
+                if not tag.get_text(strip=True) and not tag.find_all():
+                    tag.decompose()
+                    removed_any = True
+
+    remove_empty_tags_conservative()
+
+    # 연속된 공백 정리
+    for text_node in soup.find_all(string=True):
+        if text_node.parent.name not in ["script", "style"]:
+            cleaned_text = re.sub(r"\s+", " ", str(text_node))
+            if cleaned_text != str(text_node):
+                text_node.replace_with(cleaned_text)
+
+    html_list = _chunking_html(str(soup))
+    return html_list
+
+
+def _chunking_html(html_content, chunk_size=50000):
+    """
+    HTML을 지정된 크기로 분할하는 메서드
+    :param html_content: 원본 HTML 문자열
+    :param chunk_size: 각 청크의 최대 크기 (문자 수)
+    :return: HTML 청크 리스트
+    """
+    chunks = []
+    for i in range(0, len(html_content), chunk_size):
+        chunks.append(html_content[i : i + chunk_size])
+    return chunks
+
+
+def wait_for_tistory_editor_complete(driver, timeout=30):
+    """
+    티스토리 TinyMCE 에디터가 완전히 로드될 때까지 대기
+    """
+    from selenium.webdriver.support.ui import WebDriverWait
+
+    wait = WebDriverWait(driver, timeout)
+
+    # 페이지 기본 로딩
+    wait.until(lambda d: d.execute_script("return document.readyState") == "complete")
+
+    # TinyMCE 라이브러리 로딩
+    wait.until(lambda d: d.execute_script("return typeof tinymce !== 'undefined'"))
+
+    # 에디터 인스턴스 초기화
+    wait.until(
+        lambda d: d.execute_script(
+            """
+        return tinymce.get('editor-tistory') && 
+               tinymce.get('editor-tistory').initialized
+    """
+        )
+    )
+
+    # iframe 준비
+    wait.until(EC.presence_of_element_located((By.ID, "editor-tistory_ifr")))
+
+    # iframe 내부 document 준비
+    wait.until(
+        lambda d: d.execute_script(
+            """
+        try {
+            var editor = tinymce.get('editor-tistory');
+            var doc = editor.getDoc();
+            return doc && doc.readyState === 'complete';
+        } catch (e) {
+            return false;
+        }
+    """
+        )
+    )
+
+    return True