diff --git a/carte_cli/main.py b/carte_cli/main.py index db16457..1965700 100644 --- a/carte_cli/main.py +++ b/carte_cli/main.py @@ -12,6 +12,7 @@ from carte_cli.loader.carte_loader import CarteLoader from carte_cli.utils.config_parser import parse_config from carte_cli.scaffolding.frontend import create_frontend_dir +from carte_cli.utils.flatten import flatten as execute_flatten app = typer.Typer() @@ -58,5 +59,19 @@ def new_frontend( create_frontend_dir(name, init_admin=(not no_admin), sample_data=(not no_sample)) +@app.command("flatten") +def flatten( + input_dir: str = typer.Argument( + ..., help="The source metadata directory, the same as the extraction output" + ), + output_dir: str = typer.Argument( + ..., help="The destination directory for flattened markdown files" + ), + template: str = typer.Option(None, "--template", "-t", help="The template to use for flattening datasets") +): + execute_flatten(input_dir, output_dir, template) + + + if __name__ == "__main__": app() diff --git a/carte_cli/utils/flatten.py b/carte_cli/utils/flatten.py new file mode 100644 index 0000000..77b7a4a --- /dev/null +++ b/carte_cli/utils/flatten.py @@ -0,0 +1,51 @@ +import os +import glob +from typing import Tuple +from jinja2 import Environment, PackageLoader, select_autoescape, Template +from pathlib import Path + +from jinja2.loaders import FileSystemLoader + +import carte_cli.utils.frontmatter as frontmatter +from carte_cli.model.carte_table_model import TableMetadata + + +def _get_flattened_template(template_path: str): + if template_path is None: + env = Environment( + loader=PackageLoader("carte_cli.utils", "templates"), + autoescape=select_autoescape(["md"]), + ) + template = env.get_template("dataset_flattened.md") + + else: + env = Environment(loader=FileSystemLoader(searchpath="./")) + template = env.get_template(template_path) + + return template + + +def flatten(input_dir: str, output_dir: str, template_path: str) -> None: + + template = _get_flattened_template(template_path) + + file_paths = glob.glob(input_dir + "/*/*/*.md", recursive=True) + output_paths = [ + os.path.join(output_dir, file_path[(len(input_dir)+1) :]) + for file_path in file_paths + ] + + for file_path, output_path in zip(file_paths, output_paths): + metadata, content = frontmatter.parse(file_path) + dataset = TableMetadata.from_frontmatter(metadata, content) + flattened_metadata, flattened_content = flatten_dataset(dataset, template) + Path("/".join(output_path.split("/")[:-1])).mkdir(parents=True, exist_ok=True) + frontmatter.dump(output_path, flattened_metadata, flattened_content) + + print("Done!") + + +def flatten_dataset(dataset: TableMetadata, template: Template) -> Tuple[dict, str]: + metadata = {"title": dataset.name} + content = template.render(dataset=dataset) + return metadata, content diff --git a/carte_cli/utils/templates/dataset_flattened.md b/carte_cli/utils/templates/dataset_flattened.md new file mode 100644 index 0000000..08802dd --- /dev/null +++ b/carte_cli/utils/templates/dataset_flattened.md @@ -0,0 +1,47 @@ +# {{ dataset.name }} {: .page-title } + + + +#### Type: {: .attribute } +`{{ dataset.table_type.value }}` + +
+{: .attribute-break} + +#### Database: {: .attribute } +`{{ dataset.database }}` + +
+{: .attribute-break} + +#### Location: {: .attribute } +`{{ dataset.location }}` + +
+{: .attribute-break} + +## Description + +{% if dataset.description.strip() != '' %} +{{ dataset.description.strip() }} +{% else %} +No description +{: .no-description } +{% endif %} + +## Columns + +{% for column in dataset.columns %} +##### {{ column.name }} {: .column-name } + +`{{ column.column_type }}`{: .column-type } +{% if column.description != none %} +{{- column.description -}} +{: .column-description } + +{% else %} +No description +{: .column-description .no-description } + +{% endif %} +{% endfor %} diff --git a/poetry.lock b/poetry.lock index 708de74..cf8d11c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -595,7 +595,7 @@ postgres = ["psycopg2", "SQLAlchemy"] [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "be4a58fdf242160f93ef98088f1f83c83599c5ebe585092d869385eea13d0cb0" +content-hash = "71336fc09e83b4b5cc3af42eb55c9e8471e6cbb9a37167f10047493f9e890ade" [metadata.files] amundsen-databuilder = [ diff --git a/pyproject.toml b/pyproject.toml index 85e883a..5231040 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "carte-cli" -version = "0.2.5" +version = "0.3.0" description = "A static site generator for data catalogs" authors = ["Balint Haller "] license = "Apache-2.0" @@ -21,6 +21,7 @@ typer = "^0.3.2" click-spinner = "^0.1.10" psycopg2 = { version = "^2.8.6", optional = true } SQLAlchemy = { version = "^1.3.23", optional = true } +Jinja2 = "^2.11.3" [tool.poetry.extras] postgres = ["psycopg2", "SQLAlchemy"]