Skip to content

Commit 5a0d3bd

Browse files
committed
Feat: cron 테스트 파일 등록
1 parent a8cd41c commit 5a0d3bd

19 files changed

+379616
-0
lines changed

.github/workflows/cron_cralwer.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Scheduled Crawling
2+
3+
on:
4+
schedule:
5+
- cron: "0 21 * * *" # 매일 오전 6시 (KST)
6+
workflow_dispatch:
7+
8+
jobs:
9+
crawl:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout Repository
14+
uses: actions/checkout@v3
15+
16+
- name: Set up Python 3.11
17+
uses: actions/setup-python@v4
18+
with:
19+
python-version: "3.11"
20+
21+
- name: Install Python dependencies
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install -r requirements.txt
25+
26+
- name: Generate .env file from GitHub Secrets
27+
run: |
28+
echo "MYSQL_HOST=${{ secrets.MYSQL_HOST }}" >> .env
29+
echo "MYSQL_USER=${{ secrets.MYSQL_USER }}" >> .env
30+
echo "MYSQL_PASSWORD=${{ secrets.MYSQL_PASSWORD }}" >> .env
31+
echo "MYSQL_DATABASE=${{ secrets.MYSQL_DATABASE }}" >> .env
32+
echo "DISCORD_WEBHOOK_URL=${{ secrets.DISCORD_WEBHOOK_URL }}" >> .env
33+
echo "OPEN_API_KEY=${{ secrets.OPEN_API_KEY }}" >> .env
34+
echo "BIZ_INFO_API_KEY=${{ secrets.BIZ_INFO_API_KEY }}" >> .env
35+
36+
- name: Run Crawling Script
37+
run: |
38+
python main.py
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Test Run at 9:35 KST
2+
3+
on:
4+
schedule:
5+
- cron: "35 0 * * *" # 한국시간 오전 9:35 = UTC 0:35
6+
workflow_dispatch:
7+
8+
jobs:
9+
crawl:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout Repository
14+
uses: actions/checkout@v3
15+
16+
- name: Set up Python 3.11
17+
uses: actions/setup-python@v4
18+
with:
19+
python-version: "3.11"
20+
21+
- name: Install Python dependencies
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install -r requirements.txt
25+
26+
- name: Generate .env file from GitHub Secrets
27+
run: |
28+
echo "MYSQL_HOST=${{ secrets.MYSQL_HOST }}" >> .env
29+
echo "MYSQL_USER=${{ secrets.MYSQL_USER }}" >> .env
30+
echo "MYSQL_PASSWORD=${{ secrets.MYSQL_PASSWORD }}" >> .env
31+
echo "MYSQL_DATABASE=${{ secrets.MYSQL_DATABASE }}" >> .env
32+
echo "DISCORD_WEBHOOK_URL=${{ secrets.DISCORD_WEBHOOK_URL }}" >> .env
33+
echo "OPEN_API_KEY=${{ secrets.OPEN_API_KEY }}" >> .env
34+
echo "BIZ_INFO_API_KEY=${{ secrets.BIZ_INFO_API_KEY }}" >> .env
35+
36+
- name: Run Crawling Script
37+
run: |
38+
python main.py

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ venv/
22
__pycache__/
33
*.pyc
44
.env
5+
.venv

api/server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# 🦦 api 응답용 서버 코드

crawlers/crawler_financial.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from selenium import webdriver
2+
from selenium.webdriver.common.by import By
3+
from selenium.webdriver.common.keys import Keys
4+
from selenium.webdriver.support.ui import WebDriverWait
5+
from selenium.webdriver.support import expected_conditions as EC
6+
import time
7+
8+
def crawl_kinfa_social_finance():
9+
options = webdriver.ChromeOptions()
10+
options.add_argument("--start-maximized")
11+
driver = webdriver.Chrome(options=options)
12+
wait = WebDriverWait(driver, 10)
13+
14+
url = "https://www.kinfa.or.kr/financialProduct/socialFinanceGlance.do"
15+
driver.get(url)
16+
17+
try:
18+
# 사회적기업 체크
19+
social_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='사회적기업']")))
20+
social_button.click()
21+
print("✅ 사회적기업 체크 완료")
22+
time.sleep(1)
23+
24+
# 내게 맞는 상품 검색하기 클릭
25+
search_button = wait.until(EC.element_to_be_clickable((By.ID, "loanProductSearch")))
26+
search_button.click()
27+
print("✅ 상품 검색 클릭 완료")
28+
time.sleep(2)
29+
30+
# 스크롤 해서 항목들 더 불러오기
31+
prev_count = 0
32+
for _ in range(5): # 최대 5번 스크롤
33+
driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
34+
time.sleep(2)
35+
cards = driver.find_elements(By.CSS_SELECTOR, "div.card-main")
36+
if len(cards) == prev_count:
37+
break
38+
prev_count = len(cards)
39+
print(f"✅ 총 {len(cards)}개 카드 발견")
40+
41+
results = []
42+
43+
for idx in range(len(cards)):
44+
try:
45+
# 다시 요소를 찾아야 함 (StaleElementException 방지)
46+
detail_buttons = driver.find_elements(By.CSS_SELECTOR, "a.learnMorePopup")
47+
wait.until(EC.element_to_be_clickable(detail_buttons[idx]))
48+
driver.execute_script("arguments[0].click();", detail_buttons[idx])
49+
print(f"✅ [{idx+1}] 카드 클릭 완료")
50+
51+
# 팝업 제목 요소 존재 기다리기
52+
title_elem = WebDriverWait(driver, 10).until(
53+
EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-detail-left p.tit-01"))
54+
)
55+
56+
# 텍스트가 들어올 때까지 추가 대기
57+
WebDriverWait(driver, 10).until(lambda d: title_elem.text.strip() != "")
58+
59+
# 제목, 지원대상, 분류 가져오기
60+
title = title_elem.text.strip()
61+
info_items = driver.find_elements(By.CSS_SELECTOR, "div.big-number ul li")
62+
category = info_items[0].find_elements(By.TAG_NAME, "span")[1].text.strip() # 분류
63+
target = info_items[1].find_elements(By.TAG_NAME, "span")[1].text.strip() # 지원대상
64+
65+
results.append({
66+
"제목": title,
67+
"분류": category,
68+
"지원대상": target,
69+
})
70+
71+
print(f"📄 [{idx+1}] {title} 저장 완료")
72+
73+
# 팝업 닫기
74+
close_button = driver.find_element(By.CSS_SELECTOR, "div.product-detail button[title='닫기']")
75+
driver.execute_script("arguments[0].click();", close_button)
76+
time.sleep(1)
77+
78+
except Exception as e:
79+
print(f"⚠️ [{idx+1}] 에러 발생: {e}")
80+
try:
81+
# 혹시 팝업 열려있으면 닫기
82+
close_buttons = driver.find_elements(By.CSS_SELECTOR, "div.product-detail button[title='닫기']")
83+
if close_buttons:
84+
driver.execute_script("arguments[0].click();", close_buttons[0])
85+
time.sleep(1)
86+
except:
87+
pass
88+
continue
89+
90+
except Exception as e:
91+
print(f"❌ 전체 프로세스 실패: {e}")
92+
finally:
93+
driver.quit()
94+
95+
print("\n✅ 최종 결과:")
96+
for item in results:
97+
print(item)
98+
99+
return results
100+
101+
# 실행
102+
if __name__ == "__main__":
103+
crawl_kinfa_social_finance()

0 commit comments

Comments
 (0)