From 25449ef36471a1029cf063e3cc5df7e5edab5b01 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Fri, 25 Oct 2024 01:39:46 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Bump=20v2024.10.25?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 ++--- jurisprudence/settings.py | 2 +- release_notes/v2024.10.25.md | 90 ++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 release_notes/v2024.10.25.md diff --git a/README.md b/README.md index 54e5d12..a2e0c66 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/antoinejeannot/jurisprudence) [![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/antoinejeannot/jurisprudence) -# ✨ Jurisprudence, release v2024.10.22 🏛️ +# ✨ Jurisprudence, release v2024.10.25 🏛️ Jurisprudence is an open-source project that automates the collection and distribution of French legal decisions. It leverages the Judilibre API provided by the Cour de Cassation to: @@ -17,12 +17,12 @@ Whether you're conducting legal research, developing AI models, or simply intere | Jurisdiction | Jurisprudences | Oldest | Latest | Tokens | JSONL (gzipped) | Parquet | |--------------|----------------|--------|--------|--------|-----------------|---------| -| Cour d'Appel | 394,430 | 1996-03-25 | 2024-10-17 | 1,973,704,478 | [Download (1.73 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.jsonl.gz?download=true) | [Download (2.88 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.parquet?download=true) | -| Tribunal Judiciaire | 79,188 | 2023-12-14 | 2024-10-17 | 282,055,094 | [Download (254.85 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.jsonl.gz?download=true) | [Download (422.93 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.parquet?download=true) | -| Cour de Cassation | 537,000 | 1860-08-01 | 2024-10-17 | 1,107,418,242 | [Download (931.99 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.jsonl.gz?download=true) | [Download (1.58 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet?download=true) | -| **Total** | **1,010,618** | **1860-08-01** | **2024-10-17** | **3,363,177,814** | **2.89 GB** | **4.88 GB** | +| Cour d'Appel | 395,224 | 1996-03-25 | 2024-10-18 | 1,977,104,348 | [Download (1.73 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.jsonl.gz?download=true) | [Download (2.89 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.parquet?download=true) | +| Tribunal Judiciaire | 79,721 | 2023-12-14 | 2024-10-17 | 283,788,133 | [Download (256.39 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.jsonl.gz?download=true) | [Download (425.64 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.parquet?download=true) | +| Cour de Cassation | 537,065 | 1860-08-01 | 2024-10-24 | 1,107,898,877 | [Download (932.22 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.jsonl.gz?download=true) | [Download (1.58 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet?download=true) | +| **Total** | **1,012,010** | **1860-08-01** | **2024-10-24** | **3,368,791,358** | **2.89 GB** | **4.88 GB** | -Latest update date: 2024-10-22 +Latest update date: 2024-10-25 # Tokens are computed using GPT-4 tiktoken and the `text` column. diff --git a/jurisprudence/settings.py b/jurisprudence/settings.py index 509fcab..dba8f10 100644 --- a/jurisprudence/settings.py +++ b/jurisprudence/settings.py @@ -1 +1 @@ -JURISPRUDENCE_LAST_EXPORT_DATETIME = "2024-10-22 01:05:15" +JURISPRUDENCE_LAST_EXPORT_DATETIME = "2024-10-25 01:05:25" diff --git a/release_notes/v2024.10.25.md b/release_notes/v2024.10.25.md new file mode 100644 index 0000000..a2e0c66 --- /dev/null +++ b/release_notes/v2024.10.25.md @@ -0,0 +1,90 @@ +

+ +[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/antoinejeannot/jurisprudence) [![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/antoinejeannot/jurisprudence) + +# ✨ Jurisprudence, release v2024.10.25 🏛️ + +Jurisprudence is an open-source project that automates the collection and distribution of French legal decisions. It leverages the Judilibre API provided by the Cour de Cassation to: + +- Fetch rulings from major French courts (Cour de Cassation, Cour d'Appel, Tribunal Judiciaire) +- Process and convert the data into easily accessible formats +- Publish & version updated datasets on Hugging Face every few days. + +It aims to democratize access to legal information, enabling researchers, legal professionals and the public to easily access and analyze French court decisions. +Whether you're conducting legal research, developing AI models, or simply interested in French jurisprudence, this project might provide a valuable, open resource for exploring the French legal landscape. + +## 📊 Exported Data + +| Jurisdiction | Jurisprudences | Oldest | Latest | Tokens | JSONL (gzipped) | Parquet | +|--------------|----------------|--------|--------|--------|-----------------|---------| +| Cour d'Appel | 395,224 | 1996-03-25 | 2024-10-18 | 1,977,104,348 | [Download (1.73 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.jsonl.gz?download=true) | [Download (2.89 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_d_appel.parquet?download=true) | +| Tribunal Judiciaire | 79,721 | 2023-12-14 | 2024-10-17 | 283,788,133 | [Download (256.39 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.jsonl.gz?download=true) | [Download (425.64 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/tribunal_judiciaire.parquet?download=true) | +| Cour de Cassation | 537,065 | 1860-08-01 | 2024-10-24 | 1,107,898,877 | [Download (932.22 MB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.jsonl.gz?download=true) | [Download (1.58 GB)](https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet?download=true) | +| **Total** | **1,012,010** | **1860-08-01** | **2024-10-24** | **3,368,791,358** | **2.89 GB** | **4.88 GB** | + +Latest update date: 2024-10-25 + +# Tokens are computed using GPT-4 tiktoken and the `text` column. + +## 🤗 Hugging Face Dataset + +The up-to-date jurisprudences dataset is available at: https://huggingface.co/datasets/antoinejeannot/jurisprudence in JSONL (gzipped) and parquet formats. + +This allows you to easily fetch, query, process and index all jurisprudences in the blink of an eye! + +### Usage Examples +#### HuggingFace Datasets +```python +# pip install datasets +import datasets + +dataset = load_dataset("antoinejeannot/jurisprudence") +dataset.shape +>> {'tribunal_judiciaire': (58986, 33), +'cour_d_appel': (378392, 33), +'cour_de_cassation': (534258, 33)} + +# alternatively, you can load each jurisdiction separately +cour_d_appel = load_dataset("antoinejeannot/jurisprudence", "cour_d_appel") +tribunal_judiciaire = load_dataset("antoinejeannot/jurisprudence", "tribunal_judiciaire") +cour_de_cassation = load_dataset("antoinejeannot/jurisprudence", "cour_de_cassation") +``` + +Leveraging datasets allows you to easily ingest data to [PyTorch](https://huggingface.co/docs/datasets/use_with_pytorch), [Tensorflow](https://huggingface.co/docs/datasets/use_with_tensorflow), [Jax](https://huggingface.co/docs/datasets/use_with_jax) etc. + +#### BYOL: Bring Your Own Lib +For analysis, using polars, pandas or duckdb is quite common and also possible: +```python +url = "https://huggingface.co/datasets/antoinejeannot/jurisprudence/resolve/main/cour_de_cassation.parquet" # or tribunal_judiciaire.parquet, cour_d_appel.parquet + +# pip install polars +import polars as pl +df = pl.scan_parquet(url) + +# pip install pandas +import pandas as pd +df = pd.read_parquet(url) + +# pip install duckdb +import duckdb +table = duckdb.read_parquet(url) +``` + +## 🪪 Citing & Authors + +If you use this code in your research, please use the following BibTeX entry: +```bibtex +@misc{antoinejeannot2024, +author = {Jeannot Antoine and {Cour de Cassation}}, +title = {Jurisprudence}, +year = {2024}, +howpublished = {\url{https://github.com/antoinejeannot/jurisprudence}}, +note = {Data source: API Judilibre, \url{https://www.data.gouv.fr/en/datasets/api-judilibre/}} +} +``` + +This project relies on the [Judilibre API par la Cour de Cassation](https://www.data.gouv.fr/en/datasets/api-judilibre/), which is made available under the Open License 2.0 (Licence Ouverte 2.0) + +It scans the API every 3 days at midnight UTC and exports its data in various formats to Hugging Face, without any fundamental transformation but conversions. + +

license ouverte / open license

\ No newline at end of file