-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstart.py
153 lines (127 loc) · 5.54 KB
/
start.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import argparse # Import argparse module
import glob
import json
import logging
import os
import re
import shutil # Import shutil for directory operations
from datetime import datetime, timezone
from os.path import join
from rich.traceback import install
install()
abs_dir = os.path.dirname(os.path.abspath(__file__))
# Constants
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
MARKDOWN_TEMPLATE = "# {title}\n\n{content}"
ISO_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"
markdown_dir_path = join(abs_dir, "data/markdown")
os.makedirs(markdown_dir_path, exist_ok=True)
# Set up logging
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
# Define constants
JSON_FILE_EXTENSION = ".json"
MARKDOWN_FILE_EXTENSION = ".md"
def clear_directory(directory):
"""Remove all files in the specified directory."""
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
logging.error(f"Failed to delete {file_path}. Reason: {e}")
def main():
# Set up argument parser
parser = argparse.ArgumentParser(
description="Convert Google Keep JSON files to Markdown."
)
parser.add_argument("--input_dir", type=str, help="Directory containing JSON files")
args = parser.parse_args()
# Use the input directory provided by the user
json_dir = getattr(args, "input_dir") # Access the 'input_dir' attribute
# Clear the markdown directory before starting
clear_directory(markdown_dir_path)
logging.info("Cleared markdown directory.")
# Find all JSON files in the directory
json_files = glob.glob(join(json_dir, f"*{JSON_FILE_EXTENSION}"))
print(f"Found {len(json_files)} JSON files.")
# Perform a dry run to ensure all JSON files are valid
for index, file_path in enumerate(json_files):
try:
with open(file_path, "r") as file:
json.load(file)
except json.JSONDecodeError:
logging.error(f"Invalid JSON file [Index {index + 1}: {file_path}]")
# Process each JSON file
for index, file_path in enumerate(json_files):
with open(file_path, "r") as file:
data = json.load(file)
# Extract relevant fields from the JSON data with error handling
try:
text_content = data.get(
"textContent", ""
) # Use empty string if "textContent" is missing
title = data.get(
"title", "NoTitle"
) # Use empty string if "title" is missing
labels = [
label["name"] for label in data.get("labels", [])
] # Use empty list if "labels" is missing
except KeyError as e:
logging.error(
f"Error extracting data from JSON file [Index {index + 1}: {file_path}]: {e}"
)
continue # Skip processing this file
created_timestamp_usec = int(data["createdTimestampUsec"]) / 1e6
user_edited_timestamp_usec = int(data["userEditedTimestampUsec"]) / 1e6
# Convert user-edited timestamp to the desired date format
formatted_modified_date = datetime.fromtimestamp(
user_edited_timestamp_usec, timezone.utc
).strftime("%A, %B %w %Y, %H:%M %p")
formatted_created_date = datetime.fromtimestamp(
created_timestamp_usec, timezone.utc
).strftime("%A, %B %w %Y, %H:%M %p")
created_filename_date = datetime.fromtimestamp(
created_timestamp_usec, timezone.utc
).strftime("%Y-%m-%d-%H-%M")
# Define a maximum length for filenames
MAX_FILENAME_LENGTH = 255 # Adjust based on your filesystem limits, typically 255 characters for most systems
# Replace all non-alphanumeric characters with '_'
title = re.sub(r"\W+", "_", title)
# Truncate the title to ensure the filename length does not exceed the maximum allowed
max_title_length = (
MAX_FILENAME_LENGTH
- len(created_filename_date)
- len(MARKDOWN_FILE_EXTENSION)
- 1
) # Subtract lengths of date, extension, and hyphen
if len(title) > max_title_length:
title = title[:max_title_length]
# Create the Markdown file name
markdown_file_name = f"{created_filename_date}-{title}{MARKDOWN_FILE_EXTENSION}"
# Write the Markdown content
try:
with open(
join(markdown_dir_path, markdown_file_name), "w+"
) as markdown_file:
# YAML front matter
markdown_file.write("---\n")
markdown_file.write(f"title: {title}\n")
markdown_file.write(f"date created: {formatted_created_date}\n")
markdown_file.write(f"date modified: {formatted_modified_date}\n")
markdown_file.write("---\n\n")
# Markdown content
markdown_file.write(f"### {title}\n\n")
for label in labels:
markdown_file.write(f"* {label}\n")
markdown_file.write(f"\n{text_content}")
logging.info(
f"Markdown file '{markdown_file_name}' created successfully. [Index {index + 1}]"
)
except Exception as e:
logging.error(f"Error writing to file '{markdown_file_name}': {e}")
raise
if __name__ == "__main__":
main()