From d3a6a628b8bfd45ee2da9985e5bc51b111798905 Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Wed, 17 Mar 2021 09:22:45 +0100 Subject: [PATCH 01/11] flatten-command: add jinja as dependency --- poetry.lock | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 708de74..cf8d11c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -595,7 +595,7 @@ postgres = ["psycopg2", "SQLAlchemy"] [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "be4a58fdf242160f93ef98088f1f83c83599c5ebe585092d869385eea13d0cb0" +content-hash = "71336fc09e83b4b5cc3af42eb55c9e8471e6cbb9a37167f10047493f9e890ade" [metadata.files] amundsen-databuilder = [ diff --git a/pyproject.toml b/pyproject.toml index 85e883a..3dc8e18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ typer = "^0.3.2" click-spinner = "^0.1.10" psycopg2 = { version = "^2.8.6", optional = true } SQLAlchemy = { version = "^1.3.23", optional = true } +Jinja2 = "^2.11.3" [tool.poetry.extras] postgres = ["psycopg2", "SQLAlchemy"] From 5aff8f5d999748ac0f545ac72cf01ef53fefddb0 Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Wed, 17 Mar 2021 20:48:33 +0100 Subject: [PATCH 02/11] flatten-command: add flattening utility --- carte_cli/utils/flatten.py | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 carte_cli/utils/flatten.py diff --git a/carte_cli/utils/flatten.py b/carte_cli/utils/flatten.py new file mode 100644 index 0000000..0107407 --- /dev/null +++ b/carte_cli/utils/flatten.py @@ -0,0 +1,46 @@ +import os +import glob +from typing import Tuple +from jinja2 import Environment, PackageLoader, select_autoescape, Template +from pathlib import Path + +import carte_cli.utils.frontmatter as frontmatter +from carte_cli.model.carte_table_model import TableMetadata + +def _get_flattened_template(): + env = Environment( + loader=PackageLoader("carte_cli.utils", "templates"), + autoescape=select_autoescape(["md"]), + ) + template = env.get_template("dataset_flattened.md") + return template + + +def flatten(input_dir: str, output_dir: str) -> None: + + template = _get_flattened_template() + + file_paths = glob.glob(input_dir + "/*/*/*.md", recursive=True) + print(file_paths) + output_paths = [ + os.path.join(output_dir, file_path[len(input_dir) :]) + for file_path in file_paths + ] + + for file_path, output_path in zip(file_paths, output_paths): + print(f"Flattening dataset {file_path}") + metadata, content = frontmatter.parse(file_path) + dataset = TableMetadata.from_frontmatter(metadata, content) + flattened_metadata, flattened_content = flatten_dataset(dataset, template) + Path("/".join(output_path.split("/")[:-1])).mkdir( + parents=True, exist_ok=True + ) + frontmatter.dump(output_path, flattened_metadata, flattened_content) + + print("Done!") + + +def flatten_dataset(dataset: TableMetadata, template: Template) -> Tuple[dict, str]: + metadata = {"title": dataset.name} + content = template.render(dataset=dataset) + return metadata, content From 4f7962a3156c66ea4cb356c0749a09508d30475f Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Wed, 17 Mar 2021 20:48:42 +0100 Subject: [PATCH 03/11] flatten-command: add flattened template --- .../utils/templates/dataset_flattened.md | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 carte_cli/utils/templates/dataset_flattened.md diff --git a/carte_cli/utils/templates/dataset_flattened.md b/carte_cli/utils/templates/dataset_flattened.md new file mode 100644 index 0000000..bd5896a --- /dev/null +++ b/carte_cli/utils/templates/dataset_flattened.md @@ -0,0 +1,35 @@ +# {{ dataset.name }} +Edit +
+

Type:

{{ dataset.table_type }} +
+
+

Database:

{{ dataset.database }} +
+
+

Location:

{{ dataset.location }} +
+
+ {{ dataset.description.strip() }} +
+ +{% for column in dataset.columns %} +
+
{{ column.name }}
+
{{ column.column_type }}
+ {% if column.description != none %} +
+ {{- column.description -}} +
+ {% else %} +
No description
+ {% endif %} + {% if column.values is not none %} +
+ {% for column_value in column.values %} + {{ column_value }} + {% endfor %} +
+ {% endif %} +
+{% endfor %} From 65f720d1f0282924de8edbdc711641dd85f69330 Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Wed, 17 Mar 2021 20:48:47 +0100 Subject: [PATCH 04/11] flatten-command: add flatten command --- carte_cli/main.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/carte_cli/main.py b/carte_cli/main.py index db16457..6852a9c 100644 --- a/carte_cli/main.py +++ b/carte_cli/main.py @@ -12,6 +12,7 @@ from carte_cli.loader.carte_loader import CarteLoader from carte_cli.utils.config_parser import parse_config from carte_cli.scaffolding.frontend import create_frontend_dir +from carte_cli.utils.flatten import flatten as execute_flatten app = typer.Typer() @@ -58,5 +59,18 @@ def new_frontend( create_frontend_dir(name, init_admin=(not no_admin), sample_data=(not no_sample)) +@app.command("flatten") +def flatten( + input_dir: str = typer.Argument( + ..., help="The source metadata directory, the same as the extraction output" + ), + output_dir: str = typer.Argument( + ..., help="The destination directory for flattened markdown files" + ), +): + execute_flatten(input_dir, output_dir) + + + if __name__ == "__main__": app() From fbb69489cef6cdcda96bcd396d7651b0fd0a8990 Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Wed, 17 Mar 2021 20:50:55 +0100 Subject: [PATCH 05/11] flatten-command: bump minor version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3dc8e18..5231040 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "carte-cli" -version = "0.2.5" +version = "0.3.0" description = "A static site generator for data catalogs" authors = ["Balint Haller "] license = "Apache-2.0" From f7f1b5dcc6dd6be1684b82edbd691242d089952c Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Wed, 17 Mar 2021 21:02:14 +0100 Subject: [PATCH 06/11] flatten-command: remove print statements --- carte_cli/utils/flatten.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/carte_cli/utils/flatten.py b/carte_cli/utils/flatten.py index 0107407..ad7938c 100644 --- a/carte_cli/utils/flatten.py +++ b/carte_cli/utils/flatten.py @@ -21,14 +21,12 @@ def flatten(input_dir: str, output_dir: str) -> None: template = _get_flattened_template() file_paths = glob.glob(input_dir + "/*/*/*.md", recursive=True) - print(file_paths) output_paths = [ os.path.join(output_dir, file_path[len(input_dir) :]) for file_path in file_paths ] for file_path, output_path in zip(file_paths, output_paths): - print(f"Flattening dataset {file_path}") metadata, content = frontmatter.parse(file_path) dataset = TableMetadata.from_frontmatter(metadata, content) flattened_metadata, flattened_content = flatten_dataset(dataset, template) From 0cc0fc1a7263dc1022ed60ee9d8497fab5b52080 Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Thu, 18 Mar 2021 16:00:02 +0100 Subject: [PATCH 07/11] flatten-command: add optional template parameter --- carte_cli/main.py | 3 ++- carte_cli/utils/flatten.py | 29 ++++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/carte_cli/main.py b/carte_cli/main.py index 6852a9c..1965700 100644 --- a/carte_cli/main.py +++ b/carte_cli/main.py @@ -67,8 +67,9 @@ def flatten( output_dir: str = typer.Argument( ..., help="The destination directory for flattened markdown files" ), + template: str = typer.Option(None, "--template", "-t", help="The template to use for flattening datasets") ): - execute_flatten(input_dir, output_dir) + execute_flatten(input_dir, output_dir, template) diff --git a/carte_cli/utils/flatten.py b/carte_cli/utils/flatten.py index ad7938c..0bfe745 100644 --- a/carte_cli/utils/flatten.py +++ b/carte_cli/utils/flatten.py @@ -4,21 +4,30 @@ from jinja2 import Environment, PackageLoader, select_autoescape, Template from pathlib import Path +from jinja2.loaders import FileSystemLoader + import carte_cli.utils.frontmatter as frontmatter from carte_cli.model.carte_table_model import TableMetadata -def _get_flattened_template(): - env = Environment( - loader=PackageLoader("carte_cli.utils", "templates"), - autoescape=select_autoescape(["md"]), - ) - template = env.get_template("dataset_flattened.md") + +def _get_flattened_template(template_path: str): + if template_path is None: + env = Environment( + loader=PackageLoader("carte_cli.utils", "templates"), + autoescape=select_autoescape(["md"]), + ) + template = env.get_template("dataset_flattened.md") + + else: + env = Environment(loader=FileSystemLoader(searchpath="./")) + template = env.get_template(template_path) + return template -def flatten(input_dir: str, output_dir: str) -> None: +def flatten(input_dir: str, output_dir: str, template_path: str) -> None: - template = _get_flattened_template() + template = _get_flattened_template(template_path) file_paths = glob.glob(input_dir + "/*/*/*.md", recursive=True) output_paths = [ @@ -30,9 +39,7 @@ def flatten(input_dir: str, output_dir: str) -> None: metadata, content = frontmatter.parse(file_path) dataset = TableMetadata.from_frontmatter(metadata, content) flattened_metadata, flattened_content = flatten_dataset(dataset, template) - Path("/".join(output_path.split("/")[:-1])).mkdir( - parents=True, exist_ok=True - ) + Path("/".join(output_path.split("/")[:-1])).mkdir(parents=True, exist_ok=True) frontmatter.dump(output_path, flattened_metadata, flattened_content) print("Done!") From b898e0da039a61b8642a02c8c0cdf5db9742f459 Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Thu, 18 Mar 2021 16:00:10 +0100 Subject: [PATCH 08/11] flatten-command: update template for MkDocs --- .../utils/templates/dataset_flattened.md | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/carte_cli/utils/templates/dataset_flattened.md b/carte_cli/utils/templates/dataset_flattened.md index bd5896a..8d6735f 100644 --- a/carte_cli/utils/templates/dataset_flattened.md +++ b/carte_cli/utils/templates/dataset_flattened.md @@ -1,35 +1,42 @@ -# {{ dataset.name }} -Edit -
-

Type:

{{ dataset.table_type }} -
-
-

Database:

{{ dataset.database }} -
-
-

Location:

{{ dataset.location }} -
-
- {{ dataset.description.strip() }} -
+# {{ dataset.name }} {: .page-title } + + + +#### Type: {: .attribute } +`{{ dataset.table_type }}` + +
+{: .attribute-break} + +#### Database: {: .attribute } +`{{ dataset.database }}` + +
+{: .attribute-break} + +#### Location: {: .attribute } +`{{ dataset.location }}` + +
+{: .attribute-break} + +#### Description + +{{ dataset.description.strip() }} + +#### Columns {% for column in dataset.columns %} -
-
{{ column.name }}
-
{{ column.column_type }}
- {% if column.description != none %} -
- {{- column.description -}} -
- {% else %} -
No description
- {% endif %} - {% if column.values is not none %} -
- {% for column_value in column.values %} - {{ column_value }} - {% endfor %} -
- {% endif %} -
+##### {{ column.name }} {: .column-name } + +`{{ column.column_type }}`{: .column-type } +{% if column.description != none %} +{{- column.description -}} +{: .column-description } + +{% else %} +No description +{: .column-description .no-description } + +{% endif %} {% endfor %} From 371a54f40c9b5b6bf517d35ec1c966618a0b1a5a Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Thu, 18 Mar 2021 16:07:46 +0100 Subject: [PATCH 09/11] flatten-command: fix output path handling --- carte_cli/utils/flatten.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/carte_cli/utils/flatten.py b/carte_cli/utils/flatten.py index 0bfe745..7fee5d0 100644 --- a/carte_cli/utils/flatten.py +++ b/carte_cli/utils/flatten.py @@ -31,7 +31,7 @@ def flatten(input_dir: str, output_dir: str, template_path: str) -> None: file_paths = glob.glob(input_dir + "/*/*/*.md", recursive=True) output_paths = [ - os.path.join(output_dir, file_path[len(input_dir) :]) + os.path.join(output_dir, file_path[(len(input_dir)+1) :]) for file_path in file_paths ] @@ -39,8 +39,9 @@ def flatten(input_dir: str, output_dir: str, template_path: str) -> None: metadata, content = frontmatter.parse(file_path) dataset = TableMetadata.from_frontmatter(metadata, content) flattened_metadata, flattened_content = flatten_dataset(dataset, template) - Path("/".join(output_path.split("/")[:-1])).mkdir(parents=True, exist_ok=True) - frontmatter.dump(output_path, flattened_metadata, flattened_content) + output_file = os.path.join(output_dir, output_path) + Path("/".join(output_file.split("/")[:-1])).mkdir(parents=True, exist_ok=True) + frontmatter.dump(output_file, flattened_metadata, flattened_content) print("Done!") From 1308a60c751b34c3b89be420562d41ff7cd7a3cf Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Thu, 18 Mar 2021 16:24:13 +0100 Subject: [PATCH 10/11] flatten-command: fix path calculation --- carte_cli/utils/flatten.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/carte_cli/utils/flatten.py b/carte_cli/utils/flatten.py index 7fee5d0..77b7a4a 100644 --- a/carte_cli/utils/flatten.py +++ b/carte_cli/utils/flatten.py @@ -39,9 +39,8 @@ def flatten(input_dir: str, output_dir: str, template_path: str) -> None: metadata, content = frontmatter.parse(file_path) dataset = TableMetadata.from_frontmatter(metadata, content) flattened_metadata, flattened_content = flatten_dataset(dataset, template) - output_file = os.path.join(output_dir, output_path) - Path("/".join(output_file.split("/")[:-1])).mkdir(parents=True, exist_ok=True) - frontmatter.dump(output_file, flattened_metadata, flattened_content) + Path("/".join(output_path.split("/")[:-1])).mkdir(parents=True, exist_ok=True) + frontmatter.dump(output_path, flattened_metadata, flattened_content) print("Done!") From 3102ac32a0ab1b78401addedd1e09429b621a26c Mon Sep 17 00:00:00 2001 From: Balint Haller Date: Thu, 18 Mar 2021 16:24:27 +0100 Subject: [PATCH 11/11] flatten-command: better page structure --- carte_cli/utils/templates/dataset_flattened.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/carte_cli/utils/templates/dataset_flattened.md b/carte_cli/utils/templates/dataset_flattened.md index 8d6735f..08802dd 100644 --- a/carte_cli/utils/templates/dataset_flattened.md +++ b/carte_cli/utils/templates/dataset_flattened.md @@ -3,7 +3,7 @@ #### Type: {: .attribute } -`{{ dataset.table_type }}` +`{{ dataset.table_type.value }}`
{: .attribute-break} @@ -20,11 +20,16 @@
{: .attribute-break} -#### Description +## Description +{% if dataset.description.strip() != '' %} {{ dataset.description.strip() }} +{% else %} +No description +{: .no-description } +{% endif %} -#### Columns +## Columns {% for column in dataset.columns %} ##### {{ column.name }} {: .column-name }