diff --git a/.github/workflows/export.yml b/.github/workflows/export.yml index 307957e..8555a0f 100644 --- a/.github/workflows/export.yml +++ b/.github/workflows/export.yml @@ -43,7 +43,6 @@ jobs: - name: Commit and push .env file run: | - cp release_notes/${{env.VERSION}}.md README.md git config --local user.email "action@github.com" git config --local user.name "GitHub Action" git add .env README.md release_notes/${{env.VERSION}}.md diff --git a/Makefile b/Makefile index 4f29bfd..f213b27 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -.PHONY: export install compress upload release +.PHONY: export install compress upload release-note # Define the date format for CalVer YEAR := $(shell date +"%Y") @@ -19,11 +19,12 @@ compress: @find ./raws/CA -name "*.jsonl" -type f -print0 | xargs -0 tar czvf compressed/cour_d_appel.jsonl.tar.gz -C . --files-from=- @find ./raws/TJ -name "*.jsonl" -type f -print0 | xargs -0 tar czvf compressed/tribunal_judiciaire.jsonl.tar.gz -C . --files-from=- @find ./raws/CC -name "*.jsonl" -type f -print0 | xargs -0 tar czvf compressed/cour_de_cassation.jsonl.tar.gz -C . --files-from=- + +release-note: + @python jurisprudence.py release-note ./raws ./release_notes --version $(VERSION) + @cp release_notes/$(VERSION).md README.md upload: - @cp ./metadata.yaml > ./compressed/README.md + @cp ./metadata.yaml ./compressed/README.md @cat ./release_notes/$(VERSION).md >> ./compressed/README.md @huggingface-cli upload --repo-type=dataset --commit-message="✨ $(VERSION) 🏛️" --revision=main ajeannot/jurisprudence ./compressed - -release-note: - @python jurisprudence.py release-note ./raws ./release_notes/$(VERSION).md diff --git a/jurisprudence.py b/jurisprudence.py index 21a0663..d4494ee 100644 --- a/jurisprudence.py +++ b/jurisprudence.py @@ -362,7 +362,10 @@ def export( default=".", ) @click.option( - "--version", type=str, default=None, help="Version number for the release" + "--version", + type=str, + default=f"v{datetime.datetime.now().strftime('%Y.%m.%d')}", + help="Version number for the release", ) def release_note(input_path: Path, output_path: Path, version: str): """ @@ -373,10 +376,10 @@ def release_note(input_path: Path, output_path: Path, version: str): output_path: The directory where the release-note will be written. version: The version number for the release. If not provided, uses the current date. """ - if not version: - version = f"v{datetime.datetime.now().strftime('%Y.%m.%d')}" encoding = tiktoken.encoding_for_model("gpt-4") + output_path = output_path / f"{version}.md" release_note = '

\n\n' + release_note += "[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/ajeannot/jurisprudence)\n\n" release_note += f"# ✨ Jurisprudence, release {version} 🏛️\n\n" release_note += "Jurisprudence is an open-source project that automates the collection and distribution of French legal decisions. It leverages the Judilibre API provided by the Cour de Cassation to:\n\n" release_note += "- Fetch rulings from major French courts (Cour de Cassation, Cour d'Appel, Tribunal Judiciaire)\n" @@ -458,7 +461,7 @@ def release_note(input_path: Path, output_path: Path, version: str): # Add total row (excluding date range and download link for total) release_note += f"| **Total** | **{_human_readable_size(total_size)}** | **{total_jurisprudences:,}** | - | - | **{total_tokens:,} +** | - |\n\n" release_note += ( - f"Last update date: {version.lstrip("v").replace(".", "-")}\n\n" + f"Latest update date: {version.lstrip("v").replace(".", "-")}\n\n" ) release_note += "# Tokens are computed GPT-4 using tiktoken \n\n" release_note += "\n## 🤗 Hugging Face Dataset\n\n" @@ -479,7 +482,7 @@ def release_note(input_path: Path, output_path: Path, version: str): release_note += "This project relies on the [Judilibre API par la Cour de Cassation](https://www.data.gouv.fr/en/datasets/api-judilibre/), which is made available under the Open License 2.0 (Licence Ouverte 2.0)\n\n" release_note += "It scans the API every 3 days at 2am UTC and exports its data in various formats to Hugging Face, without any fundamental transformation but conversions.\n\n" release_note += '

\n\n' - output_path.write_text(release_note) + assert output_path.write_text(release_note) console.print(f"[green]Release note generated at:[/green] {output_path}") diff --git a/metadata.yml b/metadata.yaml similarity index 100% rename from metadata.yml rename to metadata.yaml