-
Notifications
You must be signed in to change notification settings - Fork 0
/
change_wiz.py
83 lines (64 loc) · 2.62 KB
/
change_wiz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#coding=gbk
import os
import time
import hashlib
import random
import codecs
from bs4 import BeautifulSoup
def process_html_file(file_path):
# 获取文件名(不含扩展名)
file_name = os.path.splitext(os.path.basename(file_path))[0]
# 获取当前时间
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# 计算文件内容的哈希值
content_hash = str(int(hashlib.sha256(str(int(time.time() * 1000)).encode('utf-8')).hexdigest()[:8], 16))
# 构建Markdown头部
md_header = f"""---
title: {file_name}
date: {current_time}
categories:
abbrlink: {content_hash}
---\n"""
# 读取文件内容
with open(file_path, 'r', encoding='utf-8-sig') as file:
html_content = file.read()
# 使用 BeautifulSoup 解析 HTML
soup = BeautifulSoup(html_content, 'html.parser')
# 找到所有的标题标签(h1到h6)
heading_tags = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
# 遍历每个标题标签
for heading_tag in heading_tags:
# 如果标签没有 id 属性,添加 id 属性
if not heading_tag.get('id'):
# 生成一个唯一的标识符(结合当前时间戳和随机数)
unique_id = int(time.time() * 1000) + random.randint(10000, 99999)
heading_tag['id'] = f'wiz-toc-{unique_id}' # 替换 random_number 为实际的随机数值
# 找到所有的 img 标签
img_tags = soup.find_all('img')
# 遍历每个 img 标签
for img_tag in img_tags:
src = img_tag['src'] # 获取 img 标签的 src 属性
title = img_tag.get('title', '') # 获取 img 标签的 title 属性,如果没有则为空字符串
# 去除 src 属性中的 '/' 前缀
src = src.split('/')[-1]
# 获取Markdown格式的字符串
markdown_img = f"\n\n![{title if title else src}]({src})\n"
# 用 Markdown 格式替代原始的 img 标签
new_tag = soup.new_tag('p')
new_tag.string = markdown_img
img_tag.replace_with(new_tag)
# 获取替换后的 Markdown 内容
new_md_content = str(soup)
# 将 Markdown 头部和内容写入文件
output_path = os.path.join(os.path.dirname(file_path), f"{file_name}.md")
with codecs.open(output_path, 'w', encoding='utf-8-sig') as file:
file.write(md_header + new_md_content)
# 获取对应的 _files 文件夹路径
files_folder_path = os.path.join(os.path.dirname(file_path), f"{file_name}_files")
# 如果存在 _files 文件夹,将其重命名为文件名
if os.path.exists(files_folder_path):
renamed_files_folder_path = os.path.join(os.path.dirname(file_path), file_name)
os.rename(files_folder_path, renamed_files_folder_path)
# 示例使用
file_path = '/mnt/d/longz/Desktop/004 为知笔记转hexo.htm' # 请替换为实际的文件路径
process_html_file(file_path)