-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
36 lines (25 loc) · 1007 Bytes
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import datetime
from pathlib import Path
from bs4 import BeautifulSoup
import database
from job_model import Job
def parse_html(date: str) -> list[Job]:
html_file = Path(f"jobs/jobs-{date}.html")
html_data = html_file.read_text(encoding="utf-8")
soup = BeautifulSoup(html_data, "lxml")
html_jobs = soup.find_all("a", attrs={"class": "JobThumb"})
jobs: list[Job] = []
for html_job in html_jobs:
job = Job(
href=html_job.attrs["href"],
title=html_job.find("h3", attrs={"class": "JobThumb__title"}).text,
category=html_job.find("p", attrs={"class": "JobThumb__category"}).text,
description=html_job.find("p", attrs={"class": "JobThumb__description"}).text,
)
jobs.append(job)
return jobs
def main(date: str = str(datetime.date.today())) -> None:
database.prepare()
jobs = parse_html(date)
database.save_db(jobs)
print(f"Вакансии за {date} успешно спарсены")