-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathshige_csv.py
30 lines (27 loc) · 1.15 KB
/
shige_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
from shige import fetch_html, extract_poem_urls, fetch_poem_details
file_path = "poems.csv"
if __name__ == "__main__":
if not os.path.exists(file_path):
with open(file_path, "w", encoding="utf-8") as f:
f.write(
"name,author,dynasty,content,trans,annotation,appreciation,background\n"
)
url = input(
"Please enter the URL(example:https://so.gushiwen.cn/gushi/tangshi.aspx): "
)
poem_urls = []
html_content = fetch_html(url)
if html_content:
poem_urls.extend(extract_poem_urls(html_content))
else:
print("Failed to fetch or parse HTML content.")
for url in poem_urls:
details = fetch_poem_details(url)
with open(file_path, "a", encoding="utf-8") as f:
print(f"Writing details for poem: {details['name']}")
for key in details:
details[key] = details[key].replace("\xa0", "")
f.write(
f"{details['name']},{details['author']},{details['dynasty']},{details['content']},{details['trans']},{details['annotation']},{details['appreciation']},{details['background']}\n"
)