-
-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add CLI command to pre-annotate object detection tasks with Yol…
…o-world (#350) * feat: ➕ Pre-annotation Yolo-world + label studio * feat: 📈 Annotation & Export * feat: ⚡ Add code to Crop-Detection * refactor: ✨ Add __main__.py * refactor: 🎨 Refactor cli command * fix: ⚡ Wrong model path fixed
- Loading branch information
1 parent
00d8dce
commit 6b7abe4
Showing
16 changed files
with
582 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import os | ||
import uuid | ||
from typing import List, Iterable, Dict, Iterator | ||
from pathlib import Path | ||
import tqdm | ||
|
||
from ultralytics import YOLO | ||
from ultralytics.engine.results import Results | ||
|
||
from openfoodfacts.utils import get_logger | ||
|
||
|
||
logger = get_logger(__name__) | ||
|
||
IMAGE_FORMAT = [".jpg", ".jpeg", ".png"] | ||
MODEL_NAME = "yolov8x-worldv2.pt" | ||
LABELS = ["packaging"] | ||
|
||
|
||
def format_object_detection_sample_from_yolo( | ||
images_dir: Path, | ||
model_name: str, | ||
labels: List[str], | ||
batch_size: int, | ||
) -> Iterable[Dict]: | ||
logger.info("Loading images from %s", images_dir) | ||
image_paths = [image_path for image_path in images_dir.iterdir() if image_path.suffix in IMAGE_FORMAT] | ||
logger.info("Found %d images in %s", len(image_paths), images_dir) | ||
ls_data = generate_ls_data_from_images(image_paths=image_paths) | ||
logger.info("Pre-annotating images with YOLO") | ||
predictions = format_predictions_from_yolo( | ||
image_paths=image_paths, | ||
model_name=model_name, | ||
labels=labels, | ||
batch_size=batch_size, | ||
) | ||
return [ | ||
{ | ||
"data": { | ||
"image_id": data["image_id"], | ||
"image_url": data["image_url"], | ||
"split": "train", | ||
}, | ||
"predictions": [prediction] if prediction["result"] else [], | ||
} | ||
for data, prediction in zip(ls_data, predictions) | ||
] | ||
|
||
|
||
def generate_ls_data_from_images(image_paths: Iterable[Path]): | ||
for image_path in image_paths: | ||
yield { | ||
"image_id": image_path.stem.replace("_", "-"), | ||
"image_url": transform_id_to_url(image_path.name), | ||
} | ||
|
||
|
||
def transform_id_to_url(image_id: str) -> str: | ||
"""Format image_id: 325_938_117_1114_1 => https://images.openfoodfacts.org/images/products/325/938/117/1114/1""" | ||
off_base_url = "https://images.openfoodfacts.org/images/products/" | ||
return os.path.join(off_base_url, "/".join(image_id.split("_"))) | ||
|
||
|
||
def format_predictions_from_yolo( | ||
image_paths: Iterable[Path], | ||
model_name: str, | ||
labels: List[str], | ||
batch_size: int, | ||
) -> Iterator[Dict]: | ||
results = pre_annotate_with_yolo( | ||
image_paths=image_paths, | ||
model_name=model_name, | ||
labels=labels, | ||
batch_size=batch_size, | ||
) | ||
for batch in results: | ||
for result in batch: | ||
annotation_results = [] | ||
orig_height, orig_width = result.orig_shape | ||
model_version = model_name.split("/")[-1] | ||
for xyxyn in result.boxes.xyxyn: | ||
# Boxes found. | ||
if len(xyxyn) > 0: | ||
xyxyn = xyxyn.tolist() | ||
x1 = xyxyn[0] * 100 | ||
y1 = xyxyn[1] * 100 | ||
x2 = xyxyn[2] * 100 | ||
y2 = xyxyn[3] * 100 | ||
width = x2 - x1 | ||
height = y2 - y1 | ||
annotation_results.append( | ||
{ | ||
"id": str(uuid.uuid4())[:5], | ||
"type": "rectanglelabels", | ||
"from_name": "label", | ||
"to_name": "image", | ||
"original_width": orig_width, | ||
"original_height": orig_height, | ||
"image_rotation": 0, | ||
"value": { | ||
"rotation": 0, | ||
"x": x1, | ||
"y": y1, | ||
"width": width, | ||
"height": height, | ||
"rectanglelabels": ["product"], # Label studio label | ||
}, | ||
}, | ||
) | ||
yield { | ||
"model_version": model_version, | ||
"result": annotation_results | ||
} | ||
|
||
|
||
def pre_annotate_with_yolo( | ||
image_paths: Iterable[Path], | ||
model_name: str, | ||
labels: List[str], | ||
batch_size: int, | ||
conf: float = 0.1, | ||
max_det: int = 1, | ||
) -> Iterator[Iterable[Results]]: | ||
"""To fasten the annotation, we leveraged Yolo-World and its capacity to predict object using natural language. | ||
https://docs.ultralytics.com/modes/predict/#working-with-results""" | ||
model = YOLO(model_name) | ||
model.set_classes(labels) | ||
# Transform image_paths into batch | ||
batches = _batch(image_paths, batch_size=batch_size) | ||
for batch in tqdm.tqdm(batches, desc="Yolo-predictions"): | ||
results = model.predict( | ||
batch, | ||
conf=conf, | ||
max_det=max_det, | ||
) | ||
yield results | ||
|
||
|
||
def _batch(iterable: Iterable, batch_size: int) -> Iterator: | ||
total = len(iterable) | ||
for ndx in range(0, total, batch_size): | ||
yield iterable[ndx:min(ndx + batch_size, total)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<View> | ||
<Image name="image" value="$image_url"/> | ||
<RectangleLabels name="label" toName="image"> | ||
<Label value="product" background="blue"/> | ||
</RectangleLabels> | ||
</View> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
MODEL_URL = "https://huggingface.co/openfoodfacts/crop-detection/resolve/main/weights/best_saved_model/best_float16.tflite?download=true" | ||
MODEL_PATH = models/yolov8n_float16.tflite | ||
|
||
.PHONY: * | ||
|
||
init: hello install load-model | ||
|
||
hello: | ||
@echo "🍋Welcome to the Crop Detection project.🍋" | ||
|
||
install: | ||
@echo "Install dependencies." | ||
pip install -r requirements.txt | ||
|
||
load-model: | ||
@echo "Load model from the HF repository 🤗: https://huggingface.co/openfoodfacts/crop-detection" | ||
@if [ ! -f "${MODEL_PATH}" ]; then \ | ||
echo "Model not found. Downloading from HF repository 🤗..."; \ | ||
wget -O "${MODEL_PATH}" "${MODEL_URL}" ; \ | ||
else \ | ||
echo "Model already exists in models/"; \ | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# :lemon: Crop detection :lemon: | ||
|
||
|
||
When contributors use the mobile app, they are asked to take pictures of the product, then to crop it. But this stage is | ||
fastidious, especially when contributors need to add several pictures of the same product. | ||
|
||
To assist users during the process, we create a crop-detection model desin to detect the product edges. We fine-tuned **Yolov8n** on images extracted from the Open Food Facts database. | ||
|
||
|
||
<p align="center"> | ||
<img src="assets/product.jpg" alt="Image 1" width="200"/> | ||
<img src="assets/cropped.jpg" alt="Image 2" width="180"/> | ||
</p> | ||
|
||
*Product image before and after automatic cropping.* | ||
|
||
## Dev | ||
You shall generate a new environment before installing new dependencies. Using Conda: | ||
|
||
```bash | ||
conda create -n crop-detection python=3.11 | ||
``` | ||
|
||
Then, prepare your local environment with the following command: | ||
|
||
```bash | ||
make init | ||
``` | ||
|
||
If you just want to load the model, use: | ||
|
||
```bash | ||
make load-model | ||
``` | ||
|
||
This command load the float16.tflite version of the Yolov8n from the [Crop-Detection repository](https://huggingface.co/openfoodfacts/crop-detection) on HuggingFace. | ||
|
||
|
||
## Run crop-detection | ||
|
||
We use Tensorflow Lite to perform the crop-detection inference on image. After `make init`, you can use the CLI to run the model on your computer: | ||
|
||
```bash | ||
python -m cli --help | ||
``` | ||
|
||
## Model training | ||
|
||
### Data pipeline | ||
|
||
To train Yolov8, we extracted product images from the Open Food Facts AWS Bucket. This solution enables us to download a large batch of images without the complexity of using the OFF API, mainly due to the number of requests limit. | ||
|
||
To understand how to reproduce the images extraction, check the Product Opener [documentation](https://openfoodfacts.github.io/openfoodfacts-server/api/aws-images-dataset/), you'll find a code snippet that was actually used to download a batch of images. | ||
|
||
However, all images are not equal for our use case. We're seeking for images of products that needs cropping, whereas most of images in the database are already cropped... | ||
|
||
Therefore, we filtered the images on 2 criteria: | ||
|
||
* The image editor shouldn't be **Yuka** | ||
* We pick images before 2020. | ||
|
||
We used DuckDB coupled with the JSONL dump to filtered codes respecting these 2 criteria. We generate a `.txt` file to store all product barcodes corresponding to our search. | ||
|
||
```sql | ||
CREATE TABLE object_detection AS | ||
SELECT code, last_image_dates_tags, correctors_tags | ||
FROM read_ndjson('openfoodfacts-products.jsonl.gz') | ||
; | ||
|
||
COPY( | ||
SELECT code | ||
FROM object_detection | ||
WHERE (last_image_dates_tags[-1]::integer) < '2020' | ||
AND list_aggregate(correctors_tags, 'string_agg', '|') NOT LIKE '%yuka%' | ||
) TO 'best_image_codes.txt' (DELIMITER ' ', HEADER FALSE) | ||
; | ||
``` | ||
|
||
We then generate the set of images using the command `load_images_from_aws.sh`. | ||
|
||
### Annotation on Label-Studio | ||
|
||
We used Label-Studio for the annotation. You can find the annotated images at https://annotate.openfoodfacts.org/projects/50/data. | ||
|
||
We also pre-annotated the images using [Yolo-World](https://huggingface.co/spaces/stevengrove/YOLO-World), an object detection model using custom labels. | ||
|
||
You'll find the code to pre-annotate, upload and download the data in `ml_utils/ml_utils_cli/cli`. | ||
|
||
### Training | ||
|
||
The model training was done using the Ultralytics library. Learn more by check the [official documentation](https://docs.ultralytics.com/modes/train/). We used Lightning AI to run the training job using GPUs (L4) | ||
|
||
```bash | ||
yolo detect train \ | ||
data=data/data.yaml \ | ||
model=models/yolov8n.pt \ | ||
epochs=200 \ | ||
imgsz=640 \ | ||
batch=64 | ||
``` | ||
|
||
### Export to TFLite | ||
|
||
To export is as easy as the training with Ultralytics: | ||
|
||
```bash | ||
yolo export model=weights/best.pt format=tflite | ||
``` | ||
|
||
## Links | ||
|
||
* Demo: https://huggingface.co/spaces/openfoodfacts/crop-detection | ||
* Model repo: https://huggingface.co/openfoodfacts/crop-detection | ||
* Label Studio: https://annotate.openfoodfacts.org/projects/50/data |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
Oops, something went wrong.