Skip to content

Commit cf25053

Browse files
committed
Add Markdown parsing
1 parent f376137 commit cf25053

File tree

3 files changed

+483
-2
lines changed

3 files changed

+483
-2
lines changed

catalog/utilities/media_props_gen/generate_media_properties.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
# Constants
1616
DOC_MD_PATH = Path(__file__).parent / "media_properties.md"
17+
SOURCE_MD_PATH = Path(__file__).parent / "media_props.md"
1718
LOCAL_POSTGRES_FOLDER = Path(__file__).parents[3] / "docker" / "upstream_db"
1819

1920
SQL_PATH = {
@@ -110,13 +111,42 @@ def add_column_props(media_props, python_columns):
110111
return media_props
111112

112113

114+
def parse_markdown() -> dict[str, str]:
115+
"""
116+
Parse the markdown documentation file and return a dictionary with the
117+
field name as key and the description as value.
118+
"""
119+
with open(SOURCE_MD_PATH) as f:
120+
contents = [line for line in f.readlines() if line.strip()]
121+
current_field = ""
122+
properties = {}
123+
property = ""
124+
value = {}
125+
for i, line in enumerate(contents):
126+
if line.startswith("# "):
127+
if current_field and value:
128+
properties[current_field] = value
129+
current_field = line.replace("# ", "").strip()
130+
value = {}
131+
continue
132+
elif line.startswith("## "):
133+
property = line.replace("## ", "").strip()
134+
value[property] = ""
135+
continue
136+
else:
137+
value[property] += line
138+
139+
return properties
140+
141+
113142
def generate_media_props() -> dict:
114143
"""
115144
Generate a dictionary with the media properties from the database,
116145
python code and markdown documentation files.
117146
"""
118147
media_props = {}
119148
python_columns = parse_python_columns()
149+
120150
for media_type in ["image", "audio"]:
121151
media_props[media_type] = create_db_props_dict(media_type)
122152
media_props[media_type] = add_column_props(
@@ -148,7 +178,35 @@ def generate_media_props_table(media_properties) -> str:
148178
return table
149179

150180

151-
def generate_markdown_doc(media_properties: dict[str, dict]) -> str:
181+
def generate_media_props_doc(
182+
markdown_descriptions: dict, media_properties: dict
183+
) -> str:
184+
"""Generate the long-form documentation for each media property."""
185+
media_docs = ""
186+
for prop, description in markdown_descriptions.items():
187+
prop_heading = f"### {prop}\n\n"
188+
media_types = []
189+
for media_type, value in media_properties.items():
190+
print(prop in value.keys())
191+
if prop in value.keys():
192+
media_types.append(media_type)
193+
194+
print(f"\nMedia Types: {', '.join(media_types)}\n")
195+
prop_heading += f"Media Types: {', '.join(media_types)}\n\n"
196+
prop_doc = ""
197+
for name, value in description.items():
198+
if value:
199+
prop_doc += f"#### {name}\n\n"
200+
prop_doc += f"{value}\n\n"
201+
if prop_doc:
202+
media_docs += prop_heading + prop_doc
203+
204+
return media_docs
205+
206+
207+
def generate_markdown_doc(
208+
media_properties: dict[str, dict], markdown_descriptions: dict[str, dict]
209+
) -> str:
152210
"""
153211
Generate the tables with media properties database column and
154212
Python objects characteristics.
@@ -162,13 +220,17 @@ def generate_markdown_doc(media_properties: dict[str, dict]) -> str:
162220
media_props_doc += f"""## Audio Properties\n
163221
{generate_media_props_table(media_properties["audio"])}
164222
"""
223+
media_props_doc += f"""## Media Property Descriptions\n
224+
{generate_media_props_doc(markdown_descriptions, media_properties)}
225+
"""
165226
return media_props_doc
166227

167228

168229
def write_media_props_doc(path: Path = DOC_MD_PATH) -> None:
169230
"""Generate the DAG documentation and write it to a file."""
170231
media_properties = generate_media_props()
171-
doc_text = generate_markdown_doc(media_properties)
232+
markdown_descriptions = parse_markdown()
233+
doc_text = generate_markdown_doc(media_properties, markdown_descriptions)
172234
log.info(f"Writing DAG doc to {path}")
173235
path.write_text(doc_text)
174236

0 commit comments

Comments
 (0)