Skip to content

Commit

Permalink
Fix various things
Browse files Browse the repository at this point in the history
  • Loading branch information
antoinejeannot committed Sep 6, 2024
1 parent b5538c0 commit ddf98f3
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 11 deletions.
1 change: 0 additions & 1 deletion .github/workflows/export.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ jobs:

- name: Commit and push .env file
run: |
cp release_notes/${{env.VERSION}}.md README.md
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add .env README.md release_notes/${{env.VERSION}}.md
Expand Down
11 changes: 6 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

.PHONY: export install compress upload release
.PHONY: export install compress upload release-note

# Define the date format for CalVer
YEAR := $(shell date +"%Y")
Expand All @@ -19,11 +19,12 @@ compress:
@find ./raws/CA -name "*.jsonl" -type f -print0 | xargs -0 tar czvf compressed/cour_d_appel.jsonl.tar.gz -C . --files-from=-
@find ./raws/TJ -name "*.jsonl" -type f -print0 | xargs -0 tar czvf compressed/tribunal_judiciaire.jsonl.tar.gz -C . --files-from=-
@find ./raws/CC -name "*.jsonl" -type f -print0 | xargs -0 tar czvf compressed/cour_de_cassation.jsonl.tar.gz -C . --files-from=-

release-note:
@python jurisprudence.py release-note ./raws ./release_notes --version $(VERSION)
@cp release_notes/$(VERSION).md README.md

upload:
@cp ./metadata.yaml > ./compressed/README.md
@cp ./metadata.yaml ./compressed/README.md
@cat ./release_notes/$(VERSION).md >> ./compressed/README.md
@huggingface-cli upload --repo-type=dataset --commit-message="$(VERSION) 🏛️" --revision=main ajeannot/jurisprudence ./compressed

release-note:
@python jurisprudence.py release-note ./raws ./release_notes/$(VERSION).md
13 changes: 8 additions & 5 deletions jurisprudence.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,10 @@ def export(
default=".",
)
@click.option(
"--version", type=str, default=None, help="Version number for the release"
"--version",
type=str,
default=f"v{datetime.datetime.now().strftime('%Y.%m.%d')}",
help="Version number for the release",
)
def release_note(input_path: Path, output_path: Path, version: str):
"""
Expand All @@ -373,10 +376,10 @@ def release_note(input_path: Path, output_path: Path, version: str):
output_path: The directory where the release-note will be written.
version: The version number for the release. If not provided, uses the current date.
"""
if not version:
version = f"v{datetime.datetime.now().strftime('%Y.%m.%d')}"
encoding = tiktoken.encoding_for_model("gpt-4")
output_path = output_path / f"{version}.md"
release_note = '<p align="center"><img src="https://raw.githubusercontent.com/antoinejeannot/jurisprudence/artefacts/jurisprudence.svg" width=650></p>\n\n'
release_note += "[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/ajeannot/jurisprudence)\n\n"
release_note += f"# ✨ Jurisprudence, release {version} 🏛️\n\n"
release_note += "Jurisprudence is an open-source project that automates the collection and distribution of French legal decisions. It leverages the Judilibre API provided by the Cour de Cassation to:\n\n"
release_note += "- Fetch rulings from major French courts (Cour de Cassation, Cour d'Appel, Tribunal Judiciaire)\n"
Expand Down Expand Up @@ -458,7 +461,7 @@ def release_note(input_path: Path, output_path: Path, version: str):
# Add total row (excluding date range and download link for total)
release_note += f"| **Total** | **{_human_readable_size(total_size)}** | **{total_jurisprudences:,}** | - | - | **{total_tokens:,} +** | - |\n\n"
release_note += (
f"<i>Last update date: {version.lstrip("v").replace(".", "-")}</i>\n\n"
f"<i>Latest update date: {version.lstrip("v").replace(".", "-")}</i>\n\n"
)
release_note += "<i># Tokens are computed GPT-4 using tiktoken </i>\n\n"
release_note += "\n## 🤗 Hugging Face Dataset\n\n"
Expand All @@ -479,7 +482,7 @@ def release_note(input_path: Path, output_path: Path, version: str):
release_note += "This project relies on the [Judilibre API par la Cour de Cassation](https://www.data.gouv.fr/en/datasets/api-judilibre/), which is made available under the Open License 2.0 (Licence Ouverte 2.0)\n\n"
release_note += "It scans the API every 3 days at 2am UTC and exports its data in various formats to Hugging Face, without any fundamental transformation but conversions.\n\n"
release_note += '<p align="center"><a href="https://www.etalab.gouv.fr/licence-ouverte-open-licence/" alt="license ouverte / open license"><img src="https://raw.githubusercontent.com/antoinejeannot/jurisprudence/artefacts/license.png" width=50></a></p>\n\n'
output_path.write_text(release_note)
assert output_path.write_text(release_note)
console.print(f"[green]Release note generated at:[/green] {output_path}")


Expand Down
File renamed without changes.

0 comments on commit ddf98f3

Please sign in to comment.