From 0a0759b56bf9cf3b6b0968dec8a741479ded6d0e Mon Sep 17 00:00:00 2001
From: Mehrdad
Date: Wed, 5 Feb 2025 13:20:21 -0800
Subject: [PATCH] Merge remote-tracking branch 'upstream/main' Staged
---
.dockerignore | 35 +
.github/ISSUE_TEMPLATE/bug-report.yml | 18 +-
.github/ISSUE_TEMPLATE/config.yml | 4 +-
.github/ISSUE_TEMPLATE/feature-request.yml | 16 +-
.github/ISSUE_TEMPLATE/question.yml | 6 +-
.github/dependabot.yml | 3 +-
.github/workflows/{ci.yaml => ci.yml} | 96 +-
.github/workflows/cla.yml | 5 +-
.github/workflows/codeql.yaml | 42 -
.github/workflows/{docker.yaml => docker.yml} | 96 +-
.github/workflows/docs.yml | 40 +-
.github/workflows/format.yml | 26 +-
.github/workflows/links.yml | 33 +-
.github/workflows/merge-main-into-prs.yml | 5 +-
.github/workflows/publish.yml | 190 ++--
.github/workflows/stale.yml | 6 +-
.gitignore | 4 +
CONTRIBUTING.md | 6 +-
README.md | 145 +--
README.zh-CN.md | 141 +--
docker/Dockerfile | 16 +-
docker/Dockerfile-arm64 | 4 +-
docker/Dockerfile-cpu | 16 +-
docker/Dockerfile-jetson-jetpack4 | 7 +-
docker/Dockerfile-jetson-jetpack5 | 29 +-
docker/Dockerfile-jetson-jetpack6 | 25 +-
docker/Dockerfile-jupyter | 33 +
docker/Dockerfile-python | 6 +-
docker/Dockerfile-runner | 7 +-
docs/README.md | 12 +-
docs/build_docs.py | 128 ++-
docs/build_reference.py | 2 +-
docs/en/datasets/classify/caltech101.md | 8 +-
docs/en/datasets/classify/caltech256.md | 12 +-
docs/en/datasets/classify/cifar10.md | 10 +-
docs/en/datasets/classify/cifar100.md | 19 +-
docs/en/datasets/classify/fashion-mnist.md | 14 +-
docs/en/datasets/classify/imagenet.md | 20 +-
docs/en/datasets/classify/imagenet10.md | 8 +-
docs/en/datasets/classify/imagenette.md | 20 +-
docs/en/datasets/classify/imagewoof.md | 12 +-
docs/en/datasets/classify/index.md | 13 +-
docs/en/datasets/classify/mnist.md | 14 +-
docs/en/datasets/detect/african-wildlife.md | 27 +-
docs/en/datasets/detect/argoverse.md | 12 +-
docs/en/datasets/detect/brain-tumor.md | 20 +-
docs/en/datasets/detect/coco.md | 32 +-
docs/en/datasets/detect/coco8.md | 24 +-
docs/en/datasets/detect/globalwheat2020.md | 14 +-
docs/en/datasets/detect/index.md | 19 +-
docs/en/datasets/detect/lvis.md | 16 +-
docs/en/datasets/detect/medical-pills.md | 147 +++
docs/en/datasets/detect/objects365.md | 16 +-
docs/en/datasets/detect/open-images-v7.md | 57 +-
docs/en/datasets/detect/roboflow-100.md | 4 +-
docs/en/datasets/detect/signature.md | 16 +-
docs/en/datasets/detect/sku-110k.md | 18 +-
docs/en/datasets/detect/visdrone.md | 14 +-
docs/en/datasets/detect/voc.md | 14 +-
docs/en/datasets/detect/xview.md | 8 +-
docs/en/datasets/explorer/api.md | 26 +-
docs/en/datasets/explorer/dashboard.md | 4 +
docs/en/datasets/explorer/explorer.ipynb | 604 ----------
docs/en/datasets/explorer/explorer.md | 278 +++++
docs/en/datasets/explorer/index.md | 4 +
docs/en/datasets/index.md | 22 +-
docs/en/datasets/obb/dota-v2.md | 18 +-
docs/en/datasets/obb/dota8.md | 26 +-
docs/en/datasets/obb/index.md | 26 +-
docs/en/datasets/pose/coco.md | 39 +-
docs/en/datasets/pose/coco8-pose.md | 28 +-
docs/en/datasets/pose/dog-pose.md | 141 +++
docs/en/datasets/pose/hand-keypoints.md | 31 +-
docs/en/datasets/pose/index.md | 19 +-
docs/en/datasets/pose/tiger-pose.md | 28 +-
docs/en/datasets/segment/carparts-seg.md | 20 +-
docs/en/datasets/segment/coco.md | 38 +-
docs/en/datasets/segment/coco8-seg.md | 22 +-
docs/en/datasets/segment/crack-seg.md | 14 +-
docs/en/datasets/segment/index.md | 22 +-
docs/en/datasets/segment/package-seg.md | 20 +-
docs/en/datasets/track/index.md | 10 +-
docs/en/guides/analytics.md | 439 +++----
docs/en/guides/azureml-quickstart.md | 81 +-
docs/en/guides/conda-quickstart.md | 6 +-
.../guides/coral-edge-tpu-on-raspberry-pi.md | 111 +-
.../guides/data-collection-and-annotation.md | 6 +-
docs/en/guides/deepstream-nvidia-jetson.md | 169 ++-
docs/en/guides/defining-project-goals.md | 16 +-
docs/en/guides/distance-calculation.md | 70 +-
docs/en/guides/docker-quickstart.md | 6 +-
docs/en/guides/heatmaps.md | 309 +----
docs/en/guides/hyperparameter-tuning.md | 76 +-
docs/en/guides/index.md | 16 +-
.../instance-segmentation-and-tracking.md | 46 +-
.../guides/isolating-segmentation-objects.md | 30 +-
docs/en/guides/kfold-cross-validation.md | 13 +-
docs/en/guides/model-deployment-options.md | 97 +-
docs/en/guides/model-deployment-practices.md | 26 +-
docs/en/guides/model-evaluation-insights.md | 63 +-
.../model-monitoring-and-maintenance.md | 17 +-
docs/en/guides/model-testing.md | 37 +-
docs/en/guides/model-training-tips.md | 46 +-
docs/en/guides/nvidia-jetson.md | 445 +++++---
docs/en/guides/object-blurring.md | 42 +-
docs/en/guides/object-counting.md | 332 ++----
docs/en/guides/object-cropping.md | 40 +-
...ng-openvino-latency-vs-throughput-modes.md | 2 +-
docs/en/guides/parking-management.md | 56 +-
.../en/guides/preprocessing_annotated_data.md | 32 +-
docs/en/guides/queue-management.md | 158 ++-
docs/en/guides/raspberry-pi.md | 206 ++--
docs/en/guides/region-counting.md | 116 +-
docs/en/guides/sahi-tiled-inference.md | 86 +-
docs/en/guides/security-alarm-system.md | 227 ++--
docs/en/guides/speed-estimation.md | 123 +-
docs/en/guides/steps-of-a-cv-project.md | 16 +-
docs/en/guides/streamlit-live-inference.md | 83 +-
docs/en/guides/trackzone.md | 173 +++
docs/en/guides/triton-inference-server.md | 107 +-
docs/en/guides/view-results-in-terminal.md | 6 +-
docs/en/guides/vision-eye.md | 48 +-
docs/en/guides/workouts-monitoring.md | 149 +--
docs/en/guides/yolo-common-issues.md | 62 +-
docs/en/guides/yolo-performance-metrics.md | 48 +-
docs/en/guides/yolo-thread-safe-inference.md | 12 +-
docs/en/help/CI.md | 29 +-
docs/en/help/CLA.md | 120 +-
docs/en/help/FAQ.md | 52 +-
...{code_of_conduct.md => code-of-conduct.md} | 0
docs/en/help/contributing.md | 139 ++-
docs/en/help/index.md | 12 +-
...ple.md => minimum-reproducible-example.md} | 0
docs/en/help/privacy.md | 3 +-
docs/en/help/security.md | 2 +-
docs/en/hub/app/android.md | 4 +-
docs/en/hub/app/index.md | 4 +-
docs/en/hub/app/ios.md | 4 +-
docs/en/hub/cloud-training.md | 2 +-
docs/en/hub/datasets.md | 4 +-
docs/en/hub/index.md | 14 +-
docs/en/hub/inference-api.md | 8 +-
docs/en/hub/models.md | 16 +-
docs/en/hub/quickstart.md | 6 +-
docs/en/index.md | 183 ++-
docs/en/integrations/albumentations.md | 199 ++++
docs/en/integrations/amazon-sagemaker.md | 62 +-
docs/en/integrations/clearml.md | 64 +-
docs/en/integrations/comet.md | 66 +-
docs/en/integrations/coreml.md | 68 +-
docs/en/integrations/dvc.md | 66 +-
docs/en/integrations/edge-tpu.md | 66 +-
docs/en/integrations/google-colab.md | 42 +-
docs/en/integrations/gradio.md | 46 +-
docs/en/integrations/ibm-watsonx.md | 64 +-
docs/en/integrations/index.md | 58 +-
docs/en/integrations/jupyterlab.md | 58 +-
docs/en/integrations/kaggle.md | 57 +-
docs/en/integrations/mnn.md | 344 ++++++
docs/en/integrations/ncnn.md | 64 +-
docs/en/integrations/neural-magic.md | 96 +-
docs/en/integrations/onnx.md | 78 +-
docs/en/integrations/openvino.md | 27 +-
docs/en/integrations/paddlepaddle.md | 78 +-
docs/en/integrations/paperspace.md | 52 +-
docs/en/integrations/ray-tune.md | 64 +-
docs/en/integrations/roboflow.md | 76 +-
docs/en/integrations/rockchip-rknn.md | 206 ++++
docs/en/integrations/seeedstudio-recamera.md | 110 ++
docs/en/integrations/sony-imx500.md | 330 ++++++
docs/en/integrations/tensorboard.md | 78 +-
docs/en/integrations/tensorrt.md | 86 +-
docs/en/integrations/tf-graphdef.md | 90 +-
docs/en/integrations/tf-savedmodel.md | 62 +-
docs/en/integrations/tfjs.md | 72 +-
docs/en/integrations/tflite.md | 76 +-
docs/en/integrations/torchscript.md | 82 +-
docs/en/integrations/vscode.md | 12 +-
docs/en/integrations/weights-biases.md | 208 ++--
docs/en/macros/augmentation-args.md | 2 +-
docs/en/macros/export-args.md | 30 +-
docs/en/macros/export-table.md | 33 +-
docs/en/macros/predict-args.md | 37 +-
docs/en/macros/sam-auto-annotate.md | 12 +
docs/en/macros/solutions-args.md | 12 +
docs/en/macros/train-args.md | 100 +-
docs/en/macros/validation-args.md | 4 +-
docs/en/macros/yolo-cls-perf.md | 7 +
docs/en/macros/yolo-det-perf.md | 7 +
docs/en/macros/yolo-obb-perf.md | 7 +
docs/en/macros/yolo-pose-perf.md | 7 +
docs/en/macros/yolo-seg-perf.md | 7 +
docs/en/models/index.md | 6 +-
docs/en/models/mobile-sam.md | 55 +-
docs/en/models/rtdetr.md | 9 +-
docs/en/models/sam-2.md | 107 +-
docs/en/models/sam.md | 40 +-
docs/en/models/yolo-nas.md | 3 +-
docs/en/models/yolo-world.md | 4 +-
docs/en/models/yolo11.md | 75 +-
docs/en/models/yolov10.md | 5 +
docs/en/models/yolov3.md | 56 +-
docs/en/models/yolov5.md | 11 +-
docs/en/models/yolov6.md | 23 +-
docs/en/models/yolov7.md | 16 +-
docs/en/models/yolov8.md | 11 +-
docs/en/models/yolov9.md | 15 +-
docs/en/modes/benchmark.md | 78 +-
docs/en/modes/export.md | 40 +-
docs/en/modes/index.md | 85 +-
docs/en/modes/predict.md | 169 +--
docs/en/modes/track.md | 64 +-
docs/en/modes/train.md | 78 +-
docs/en/modes/val.md | 50 +-
docs/en/quickstart.md | 2 +-
docs/en/reference/cfg/__init__.md | 6 +-
docs/en/reference/data/converter.md | 4 +
docs/en/reference/data/explorer/explorer.md | 21 -
docs/en/reference/data/explorer/gui/dash.md | 57 -
docs/en/reference/data/explorer/utils.md | 33 -
docs/en/reference/data/utils.md | 4 +
docs/en/reference/engine/exporter.md | 8 +
docs/en/reference/models/sam/predict.md | 4 +
docs/en/reference/nn/modules/block.md | 4 +
docs/en/reference/nn/modules/conv.md | 4 +
docs/en/reference/solutions/region_counter.md | 16 +
docs/en/reference/solutions/security_alarm.md | 16 +
docs/en/reference/solutions/solutions.md | 16 +
.../solutions/streamlit_inference.md | 2 +-
docs/en/reference/solutions/trackzone.md | 16 +
docs/en/reference/utils/__init__.md | 4 +
docs/en/reference/utils/checks.md | 8 +
docs/en/reference/utils/metrics.md | 2 +-
docs/en/reference/utils/ops.md | 4 +
docs/en/reference/utils/torch_utils.md | 12 +
docs/en/solutions/index.md | 86 +-
docs/en/tasks/classify.md | 84 +-
docs/en/tasks/detect.md | 98 +-
docs/en/tasks/index.md | 63 +-
docs/en/tasks/obb.md | 94 +-
docs/en/tasks/pose.md | 119 +-
docs/en/tasks/segment.md | 92 +-
docs/en/usage/callbacks.md | 71 +-
docs/en/usage/cfg.md | 35 +-
docs/en/usage/cli.md | 84 +-
docs/en/usage/engine.md | 30 +-
docs/en/usage/python.md | 160 +--
docs/en/usage/simple-utilities.md | 140 ++-
.../environments/aws_quickstart_tutorial.md | 2 +-
.../docker_image_quickstart_tutorial.md | 4 +-
docs/en/yolov5/index.md | 14 +-
.../tutorials/clearml_logging_integration.md | 4 +-
.../tutorials/comet_logging_integration.md | 2 +-
.../tutorials/hyperparameter_evolution.md | 2 +-
docs/en/yolov5/tutorials/model_ensembling.md | 2 +-
docs/en/yolov5/tutorials/model_export.md | 30 +-
.../tutorials/model_pruning_and_sparsity.md | 2 +-
.../en/yolov5/tutorials/multi_gpu_training.md | 2 +-
.../tutorials/pytorch_hub_model_loading.md | 2 +-
.../roboflow_datasets_integration.md | 6 +-
.../tutorials/test_time_augmentation.md | 2 +-
.../tips_for_best_training_results.md | 2 +-
docs/en/yolov5/tutorials/train_custom_data.md | 8 +-
.../transfer_learning_with_frozen_layers.md | 2 +-
docs/mkdocs_github_authors.yaml | 55 +-
docs/model_data.py | 93 ++
docs/overrides/assets/favicon.ico | Bin 9662 -> 0 bytes
docs/overrides/javascript/benchmark.js | 229 ++++
docs/overrides/javascript/extra.js | 196 +++-
docs/overrides/javascript/giscus.js | 85 ++
docs/overrides/main.html | 2 +-
docs/overrides/partials/comments.html | 48 +-
docs/overrides/partials/source-file.html | 26 -
docs/overrides/stylesheets/style.css | 27 +-
examples/README.md | 11 +-
examples/RTDETR-ONNXRuntime-Python/README.md | 43 +
examples/RTDETR-ONNXRuntime-Python/main.py | 222 ++++
.../YOLO-Series-ONNXRuntime-Rust/Cargo.toml | 14 +
.../YOLO-Series-ONNXRuntime-Rust/README.md | 94 ++
.../YOLO-Series-ONNXRuntime-Rust/src/main.rs | 236 ++++
.../action_recognition.py | 8 +-
examples/YOLOv8-CPP-Inference/README.md | 8 +-
.../YOLOv8-LibTorch-CPP-Inference/README.md | 2 +-
.../YOLOv8-LibTorch-CPP-Inference/main.cc | 1 +
examples/YOLOv8-ONNXRuntime-CPP/inference.cpp | 2 +-
examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml | 13 +-
examples/YOLOv8-ONNXRuntime-Rust/README.md | 33 +-
examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs | 2 +-
examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs | 41 +
examples/YOLOv8-ONNXRuntime-Rust/src/main.rs | 2 +-
examples/YOLOv8-ONNXRuntime-Rust/src/model.rs | 29 +-
.../src/ort_backend.rs | 183 +--
examples/YOLOv8-ONNXRuntime/main.py | 2 +-
examples/YOLOv8-OpenCV-ONNX-Python/main.py | 2 +-
.../README.md | 65 --
.../YOLOv8-OpenCV-int8-tflite-Python/main.py | 298 -----
.../YOLOv8-OpenVINO-CPP-Inference/README.md | 2 +-
examples/YOLOv8-Region-Counter/readme.md | 13 +-
.../yolov8_region_counter.py | 24 +-
.../YOLOv8-SAHI-Inference-Video/readme.md | 20 +-
.../yolov8_sahi.py | 36 +-
.../main.py | 2 +-
examples/YOLOv8-TFLite-Python/README.md | 55 +
examples/YOLOv8-TFLite-Python/main.py | 221 ++++
examples/heatmaps.ipynb | 47 +-
examples/hub.ipynb | 6 +-
examples/object_counting.ipynb | 54 +-
examples/object_tracking.ipynb | 30 +-
examples/tutorial.ipynb | 310 ++---
mkdocs.yml | 111 +-
pyproject.toml | 36 +-
tests/__init__.py | 3 +-
tests/conftest.py | 6 +-
tests/test_cli.py | 10 +-
tests/test_cuda.py | 26 +-
tests/test_engine.py | 2 +-
tests/test_explorer.py | 66 --
tests/test_exports.py | 101 +-
tests/test_integrations.py | 2 +-
tests/test_python.py | 15 +-
tests/test_solutions.py | 74 +-
ultralytics/__init__.py | 11 +-
ultralytics/cfg/__init__.py | 490 +++++---
ultralytics/cfg/datasets/Argoverse.yaml | 3 +-
ultralytics/cfg/datasets/DOTAv1.5.yaml | 3 +-
ultralytics/cfg/datasets/DOTAv1.yaml | 3 +-
ultralytics/cfg/datasets/GlobalWheat2020.yaml | 3 +-
ultralytics/cfg/datasets/ImageNet.yaml | 3 +-
ultralytics/cfg/datasets/Objects365.yaml | 3 +-
ultralytics/cfg/datasets/SKU-110K.yaml | 3 +-
ultralytics/cfg/datasets/VOC.yaml | 3 +-
ultralytics/cfg/datasets/VisDrone.yaml | 3 +-
.../cfg/datasets/african-wildlife.yaml | 3 +-
ultralytics/cfg/datasets/brain-tumor.yaml | 3 +-
ultralytics/cfg/datasets/carparts-seg.yaml | 3 +-
ultralytics/cfg/datasets/coco-pose.yaml | 11 +-
ultralytics/cfg/datasets/coco.yaml | 3 +-
ultralytics/cfg/datasets/coco128-seg.yaml | 5 +-
ultralytics/cfg/datasets/coco128.yaml | 5 +-
ultralytics/cfg/datasets/coco8-pose.yaml | 3 +-
ultralytics/cfg/datasets/coco8-seg.yaml | 3 +-
ultralytics/cfg/datasets/coco8.yaml | 3 +-
ultralytics/cfg/datasets/crack-seg.yaml | 3 +-
ultralytics/cfg/datasets/dog-pose.yaml | 24 +
ultralytics/cfg/datasets/dota8.yaml | 3 +-
ultralytics/cfg/datasets/hand-keypoints.yaml | 3 +-
ultralytics/cfg/datasets/lvis.yaml | 5 +-
ultralytics/cfg/datasets/medical-pills.yaml | 22 +
ultralytics/cfg/datasets/open-images-v7.yaml | 3 +-
ultralytics/cfg/datasets/package-seg.yaml | 7 +-
ultralytics/cfg/datasets/signature.yaml | 3 +-
ultralytics/cfg/datasets/tiger-pose.yaml | 3 +-
ultralytics/cfg/datasets/xView.yaml | 3 +-
ultralytics/cfg/default.yaml | 13 +-
.../cfg/models/11/yolo11-cls-resnet18.yaml | 17 +
ultralytics/cfg/models/11/yolo11-cls.yaml | 7 +-
ultralytics/cfg/models/11/yolo11-obb.yaml | 7 +-
ultralytics/cfg/models/11/yolo11-pose.yaml | 7 +-
ultralytics/cfg/models/11/yolo11-seg.yaml | 7 +-
ultralytics/cfg/models/11/yolo11.yaml | 7 +-
ultralytics/cfg/models/README.md | 6 +-
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml | 7 +-
.../cfg/models/rt-detr/rtdetr-resnet101.yaml | 7 +-
.../cfg/models/rt-detr/rtdetr-resnet50.yaml | 7 +-
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml | 7 +-
ultralytics/cfg/models/v10/yolov10b.yaml | 7 +-
ultralytics/cfg/models/v10/yolov10l.yaml | 7 +-
ultralytics/cfg/models/v10/yolov10m.yaml | 7 +-
ultralytics/cfg/models/v10/yolov10n.yaml | 7 +-
ultralytics/cfg/models/v10/yolov10s.yaml | 7 +-
ultralytics/cfg/models/v10/yolov10x.yaml | 7 +-
ultralytics/cfg/models/v3/yolov3-spp.yaml | 7 +-
ultralytics/cfg/models/v3/yolov3-tiny.yaml | 7 +-
ultralytics/cfg/models/v3/yolov3.yaml | 7 +-
ultralytics/cfg/models/v5/yolov5-p6.yaml | 7 +-
ultralytics/cfg/models/v5/yolov5-relu6.yaml | 2 +-
ultralytics/cfg/models/v5/yolov5.yaml | 7 +-
ultralytics/cfg/models/v6/yolov6.yaml | 9 +-
.../cfg/models/v8/relu6-yolov8-cls.yaml | 2 +-
.../cfg/models/v8/relu6-yolov8-regress.yaml | 2 +-
.../cfg/models/v8/relu6-yolov8-regress6.yaml | 2 +-
ultralytics/cfg/models/v8/relu6-yolov8.yaml | 2 +-
.../cfg/models/v8/yolov8-cls-resnet101.yaml | 7 +-
.../cfg/models/v8/yolov8-cls-resnet50.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-cls.yaml | 7 +-
.../cfg/models/v8/yolov8-ghost-p2.yaml | 8 +-
.../cfg/models/v8/yolov8-ghost-p6.yaml | 8 +-
ultralytics/cfg/models/v8/yolov8-ghost.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-obb.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-p2.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-p6.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml | 7 +-
.../cfg/models/v8/yolov8-pose-relu6.yaml | 2 +-
ultralytics/cfg/models/v8/yolov8-pose.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-relu6.yaml | 2 +-
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-seg.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-world.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8-worldv2.yaml | 7 +-
ultralytics/cfg/models/v8/yolov8.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9c-seg.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9c.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9e-seg.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9e.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9m.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9s.yaml | 7 +-
ultralytics/cfg/models/v9/yolov9t.yaml | 7 +-
ultralytics/cfg/solutions/default.yaml | 24 +
ultralytics/cfg/trackers/botsort.yaml | 11 +-
ultralytics/cfg/trackers/bytetrack.yaml | 11 +-
ultralytics/data/__init__.py | 2 +-
ultralytics/data/annotator.py | 26 +-
ultralytics/data/augment.py | 50 +-
ultralytics/data/base.py | 44 +-
ultralytics/data/build.py | 14 +-
ultralytics/data/converter.py | 99 +-
ultralytics/data/dataset.py | 64 +-
ultralytics/data/explorer/__init__.py | 5 -
ultralytics/data/explorer/explorer.py | 460 --------
ultralytics/data/explorer/gui/__init__.py | 1 -
ultralytics/data/explorer/gui/dash.py | 282 -----
ultralytics/data/explorer/utils.py | 167 ---
ultralytics/data/loaders.py | 247 ++--
ultralytics/data/scripts/download_weights.sh | 4 +-
ultralytics/data/split_dota.py | 16 +-
ultralytics/data/utils.py | 80 +-
ultralytics/engine/__init__.py | 2 +-
ultralytics/engine/exporter.py | 663 +++++++++--
ultralytics/engine/model.py | 198 ++--
ultralytics/engine/predictor.py | 41 +-
ultralytics/engine/results.py | 135 ++-
ultralytics/engine/trainer.py | 63 +-
ultralytics/engine/tuner.py | 31 +-
ultralytics/engine/validator.py | 35 +-
ultralytics/hub/__init__.py | 8 +-
ultralytics/hub/auth.py | 4 +-
ultralytics/hub/google/__init__.py | 2 +-
ultralytics/hub/session.py | 2 +-
ultralytics/hub/utils.py | 4 +-
ultralytics/models/__init__.py | 2 +-
ultralytics/models/fastsam/__init__.py | 2 +-
ultralytics/models/fastsam/model.py | 2 +-
ultralytics/models/fastsam/predict.py | 12 +-
ultralytics/models/fastsam/utils.py | 2 +-
ultralytics/models/fastsam/val.py | 2 +-
ultralytics/models/nas/__init__.py | 2 +-
ultralytics/models/nas/model.py | 2 +-
ultralytics/models/nas/predict.py | 2 +-
ultralytics/models/nas/val.py | 10 +-
ultralytics/models/rtdetr/__init__.py | 2 +-
ultralytics/models/rtdetr/model.py | 2 +-
ultralytics/models/rtdetr/predict.py | 2 +-
ultralytics/models/rtdetr/train.py | 5 +-
ultralytics/models/rtdetr/val.py | 2 +-
ultralytics/models/sam/__init__.py | 6 +-
ultralytics/models/sam/amg.py | 4 +-
ultralytics/models/sam/build.py | 12 +-
ultralytics/models/sam/model.py | 4 +-
ultralytics/models/sam/modules/__init__.py | 2 +-
ultralytics/models/sam/modules/blocks.py | 24 +-
ultralytics/models/sam/modules/decoders.py | 2 +-
ultralytics/models/sam/modules/encoders.py | 8 +-
.../models/sam/modules/memory_attention.py | 2 +-
ultralytics/models/sam/modules/sam.py | 196 ++--
.../models/sam/modules/tiny_encoder.py | 5 +-
ultralytics/models/sam/modules/transformer.py | 2 +-
ultralytics/models/sam/modules/utils.py | 2 +-
ultralytics/models/sam/predict.py | 1004 +++++++++++++++--
ultralytics/models/utils/__init__.py | 2 +-
ultralytics/models/utils/loss.py | 13 +-
ultralytics/models/utils/ops.py | 4 +-
ultralytics/models/yolo/__init__.py | 2 +-
ultralytics/models/yolo/classify/__init__.py | 2 +-
ultralytics/models/yolo/classify/predict.py | 5 +-
ultralytics/models/yolo/classify/train.py | 7 +-
ultralytics/models/yolo/classify/val.py | 8 +-
ultralytics/models/yolo/detect/__init__.py | 2 +-
ultralytics/models/yolo/detect/predict.py | 52 +-
ultralytics/models/yolo/detect/train.py | 11 +-
ultralytics/models/yolo/detect/val.py | 21 +-
ultralytics/models/yolo/model.py | 4 +-
ultralytics/models/yolo/obb/__init__.py | 2 +-
ultralytics/models/yolo/obb/predict.py | 45 +-
ultralytics/models/yolo/obb/train.py | 8 +-
ultralytics/models/yolo/obb/val.py | 26 +-
ultralytics/models/yolo/pose/__init__.py | 2 +-
ultralytics/models/yolo/pose/predict.py | 27 +-
ultralytics/models/yolo/pose/train.py | 4 +-
ultralytics/models/yolo/pose/val.py | 12 +-
ultralytics/models/yolo/segment/__init__.py | 2 +-
ultralytics/models/yolo/segment/predict.py | 48 +-
ultralytics/models/yolo/segment/train.py | 4 +-
ultralytics/models/yolo/segment/val.py | 10 +-
ultralytics/models/yolo/world/__init__.py | 2 +-
ultralytics/models/yolo/world/train.py | 2 +-
ultralytics/models/yolo/world/train_world.py | 2 +-
ultralytics/nn/__init__.py | 2 +-
ultralytics/nn/autobackend.py | 240 +++-
ultralytics/nn/modules/__init__.py | 6 +-
ultralytics/nn/modules/activation.py | 2 +-
ultralytics/nn/modules/block.py | 56 +-
ultralytics/nn/modules/conv.py | 22 +-
ultralytics/nn/modules/head.py | 73 +-
ultralytics/nn/modules/transformer.py | 2 +-
ultralytics/nn/modules/utils.py | 2 +-
ultralytics/nn/tasks.py | 224 ++--
ultralytics/solutions/__init__.py | 12 +-
ultralytics/solutions/ai_gym.py | 210 ++--
ultralytics/solutions/analytics.py | 478 ++++----
ultralytics/solutions/distance_calculation.py | 143 ++-
ultralytics/solutions/heatmap.py | 324 ++----
ultralytics/solutions/object_counter.py | 404 +++----
ultralytics/solutions/parking_management.py | 349 +++---
ultralytics/solutions/queue_management.py | 207 ++--
ultralytics/solutions/region_counter.py | 116 ++
ultralytics/solutions/security_alarm.py | 144 +++
ultralytics/solutions/solutions.py | 178 +++
ultralytics/solutions/speed_estimation.py | 148 ++-
ultralytics/solutions/streamlit_inference.py | 315 +++---
ultralytics/solutions/trackzone.py | 68 ++
ultralytics/trackers/README.md | 52 +-
ultralytics/trackers/__init__.py | 2 +-
ultralytics/trackers/basetrack.py | 4 +-
ultralytics/trackers/bot_sort.py | 2 +-
ultralytics/trackers/byte_tracker.py | 2 +-
ultralytics/trackers/track.py | 5 +-
ultralytics/trackers/utils/__init__.py | 2 +-
ultralytics/trackers/utils/gmc.py | 26 +-
ultralytics/trackers/utils/kalman_filter.py | 2 +-
ultralytics/trackers/utils/matching.py | 15 +-
ultralytics/utils/__init__.py | 237 ++--
ultralytics/utils/autobatch.py | 39 +-
ultralytics/utils/benchmarks.py | 106 +-
ultralytics/utils/callbacks/__init__.py | 2 +-
ultralytics/utils/callbacks/base.py | 2 +-
ultralytics/utils/callbacks/clearml.py | 6 +-
ultralytics/utils/callbacks/comet.py | 53 +-
ultralytics/utils/callbacks/dvc.py | 2 +-
ultralytics/utils/callbacks/hub.py | 16 +-
ultralytics/utils/callbacks/mlflow.py | 6 +-
ultralytics/utils/callbacks/neptune.py | 8 +-
ultralytics/utils/callbacks/raytune.py | 5 +-
ultralytics/utils/callbacks/tensorboard.py | 35 +-
ultralytics/utils/callbacks/wb.py | 33 +-
ultralytics/utils/checks.py | 175 ++-
ultralytics/utils/dist.py | 4 +-
ultralytics/utils/downloads.py | 17 +-
ultralytics/utils/errors.py | 2 +-
ultralytics/utils/files.py | 8 +-
ultralytics/utils/instance.py | 23 +-
ultralytics/utils/loss.py | 18 +-
ultralytics/utils/metrics.py | 51 +-
ultralytics/utils/ops.py | 88 +-
ultralytics/utils/patches.py | 14 +-
ultralytics/utils/plotting.py | 280 +++--
ultralytics/utils/tal.py | 46 +-
ultralytics/utils/torch_utils.py | 164 ++-
ultralytics/utils/triton.py | 3 +-
ultralytics/utils/tuner.py | 25 +-
560 files changed, 16714 insertions(+), 11459 deletions(-)
create mode 100644 .dockerignore
rename .github/workflows/{ci.yaml => ci.yml} (82%)
delete mode 100644 .github/workflows/codeql.yaml
rename .github/workflows/{docker.yaml => docker.yml} (66%)
create mode 100644 docker/Dockerfile-jupyter
create mode 100644 docs/en/datasets/detect/medical-pills.md
delete mode 100644 docs/en/datasets/explorer/explorer.ipynb
create mode 100644 docs/en/datasets/explorer/explorer.md
create mode 100644 docs/en/datasets/pose/dog-pose.md
create mode 100644 docs/en/guides/trackzone.md
rename docs/en/help/{code_of_conduct.md => code-of-conduct.md} (100%)
rename docs/en/help/{minimum_reproducible_example.md => minimum-reproducible-example.md} (100%)
create mode 100644 docs/en/integrations/albumentations.md
create mode 100644 docs/en/integrations/mnn.md
create mode 100644 docs/en/integrations/rockchip-rknn.md
create mode 100644 docs/en/integrations/seeedstudio-recamera.md
create mode 100644 docs/en/integrations/sony-imx500.md
create mode 100644 docs/en/macros/sam-auto-annotate.md
create mode 100644 docs/en/macros/solutions-args.md
create mode 100644 docs/en/macros/yolo-cls-perf.md
create mode 100644 docs/en/macros/yolo-det-perf.md
create mode 100644 docs/en/macros/yolo-obb-perf.md
create mode 100644 docs/en/macros/yolo-pose-perf.md
create mode 100644 docs/en/macros/yolo-seg-perf.md
delete mode 100644 docs/en/reference/data/explorer/explorer.md
delete mode 100644 docs/en/reference/data/explorer/gui/dash.md
delete mode 100644 docs/en/reference/data/explorer/utils.md
create mode 100644 docs/en/reference/solutions/region_counter.md
create mode 100644 docs/en/reference/solutions/security_alarm.md
create mode 100644 docs/en/reference/solutions/solutions.md
create mode 100644 docs/en/reference/solutions/trackzone.md
create mode 100644 docs/model_data.py
delete mode 100644 docs/overrides/assets/favicon.ico
create mode 100644 docs/overrides/javascript/benchmark.js
create mode 100644 docs/overrides/javascript/giscus.js
delete mode 100644 docs/overrides/partials/source-file.html
create mode 100644 examples/RTDETR-ONNXRuntime-Python/README.md
create mode 100644 examples/RTDETR-ONNXRuntime-Python/main.py
create mode 100644 examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml
create mode 100644 examples/YOLO-Series-ONNXRuntime-Rust/README.md
create mode 100644 examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs
delete mode 100644 examples/YOLOv8-OpenCV-int8-tflite-Python/README.md
delete mode 100644 examples/YOLOv8-OpenCV-int8-tflite-Python/main.py
create mode 100644 examples/YOLOv8-TFLite-Python/README.md
create mode 100644 examples/YOLOv8-TFLite-Python/main.py
delete mode 100644 tests/test_explorer.py
create mode 100644 ultralytics/cfg/datasets/dog-pose.yaml
create mode 100644 ultralytics/cfg/datasets/medical-pills.yaml
create mode 100644 ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
create mode 100644 ultralytics/cfg/solutions/default.yaml
delete mode 100644 ultralytics/data/explorer/__init__.py
delete mode 100644 ultralytics/data/explorer/explorer.py
delete mode 100644 ultralytics/data/explorer/gui/__init__.py
delete mode 100644 ultralytics/data/explorer/gui/dash.py
delete mode 100644 ultralytics/data/explorer/utils.py
create mode 100644 ultralytics/solutions/region_counter.py
create mode 100644 ultralytics/solutions/security_alarm.py
create mode 100644 ultralytics/solutions/solutions.py
create mode 100644 ultralytics/solutions/trackzone.py
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000000..4903d51fa80
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,35 @@
+# Python
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.py[cod]
+*$py.class
+.pytest_cache
+.coverage
+coverage.xml
+.ruff_cache
+*.egg-info
+dist
+build
+
+# Development
+.env
+.venv
+env/
+venv/
+ENV/
+.idea
+.vscode
+*.swp
+*.swo
+.DS_Store
+
+# Project specific
+*.log
+benchmarks.log
+runs/
+
+# Dependencies
+node_modules/
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 430b05957ab..f5f9022ddeb 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
name: ๐ Bug Report
# title: " "
@@ -14,7 +14,7 @@ body:
attributes:
label: Search before asking
description: >
- Please search the Ultralytics [Docs](https://docs.ultralytics.com) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar bug report already exists.
+ Please search the Ultralytics [Docs](https://docs.ultralytics.com/) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar bug report already exists.
options:
- label: >
I have searched the Ultralytics YOLO [issues](https://github.com/ultralytics/ultralytics/issues) and found no similar bug report.
@@ -43,7 +43,7 @@ body:
- type: textarea
attributes:
label: Bug
- description: Please provide as much information as possible. Copy and paste console output and error messages. Use [Markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to format text, code and logs. If necessary, include screenshots for visual elements only. Providing detailed information will help us resolve the issue more efficiently.
+ description: Please provide as much information as possible. Copy and paste console output and error messages including the _full_ traceback. Use [Markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to format text, code and logs. If necessary, include screenshots for visual elements only. Providing detailed information will help us resolve the issue more efficiently.
placeholder: |
๐ก ProTip! Include as much information as possible (logs, tracebacks, screenshots, etc.) to receive the most helpful response.
validations:
@@ -52,11 +52,11 @@ body:
- type: textarea
attributes:
label: Environment
- description: Many issues are often related to dependency versions and hardware. Please provide the output of `yolo checks` or `ultralytics.checks()` command to help us diagnose the problem.
+ description: Try the latest version (`pip install -U ultralytics`) before reporting a bug. If it's still present, please provide the output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command to help us diagnose the problem.
placeholder: |
- Paste output of `yolo checks` or `ultralytics.checks()` command, i.e.:
+ Paste output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command, i.e.:
```
- Ultralytics YOLOv8.0.181 ๐ Python-3.11.2 torch-2.0.1 CPU (Apple M2)
+ Ultralytics 8.3.2 ๐ Python-3.11.2 torch-2.4.1 CPU (Apple M3)
Setup complete โ (8 CPUs, 16.0 GB RAM, 266.5/460.4 GB disk)
OS macOS-13.5.2
@@ -64,7 +64,7 @@ body:
Python 3.11.2
Install git
RAM 16.00 GB
- CPU Apple M2
+ CPU Apple M3
CUDA None
```
validations:
@@ -74,7 +74,7 @@ body:
attributes:
label: Minimal Reproducible Example
description: >
- When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem. This is referred to by community members as creating a [minimal reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/).
+ When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem. This is referred to by community members as creating a [minimal reproducible example](https://docs.ultralytics.com/help/minimum-reproducible-example/).
placeholder: |
```
# Code to reproduce your issue here
@@ -92,6 +92,6 @@ body:
label: Are you willing to submit a PR?
description: >
(Optional) We encourage you to submit a [Pull Request](https://github.com/ultralytics/ultralytics/pulls) (PR) to help improve Ultralytics YOLO for everyone, especially if you have a good understanding of how to implement a fix or feature.
- See the Ultralytics YOLO [Contributing Guide](https://docs.ultralytics.com/help/contributing) to get started.
+ See the Ultralytics YOLO [Contributing Guide](https://docs.ultralytics.com/help/contributing/) to get started.
options:
- label: Yes I'd like to help by submitting a PR!
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 73745a3a562..0da481e9fb9 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,10 +1,10 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
blank_issues_enabled: true
contact_links:
- name: ๐ Docs
url: https://docs.ultralytics.com/
- about: Full Ultralytics YOLOv8 Documentation
+ about: Full Ultralytics YOLO Documentation
- name: ๐ฌ Forum
url: https://community.ultralytics.com/
about: Ask on Ultralytics Community Forum
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
index c065446c1f4..6b72a38433c 100644
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -1,23 +1,23 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
name: ๐ Feature Request
-description: Suggest a YOLOv8 idea
+description: Suggest an Ultralytics YOLO idea
# title: " "
labels: [enhancement]
body:
- type: markdown
attributes:
value: |
- Thank you for submitting a YOLOv8 ๐ Feature Request!
+ Thank you for submitting an Ultralytics ๐ Feature Request!
- type: checkboxes
attributes:
label: Search before asking
description: >
- Please search the Ultralytics [Docs](https://docs.ultralytics.com) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar feature request already exists.
+ Please search the Ultralytics [Docs](https://docs.ultralytics.com/) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar feature request already exists.
options:
- label: >
- I have searched the YOLOv8 [issues](https://github.com/ultralytics/ultralytics/issues) and found no similar feature requests.
+ I have searched the Ultralytics [issues](https://github.com/ultralytics/ultralytics/issues) and found no similar feature requests.
required: true
- type: textarea
@@ -25,7 +25,7 @@ body:
label: Description
description: A short description of your feature.
placeholder: |
- What new feature would you like to see in YOLOv8?
+ What new feature would you like to see in YOLO?
validations:
required: true
@@ -46,7 +46,7 @@ body:
attributes:
label: Are you willing to submit a PR?
description: >
- (Optional) We encourage you to submit a [Pull Request](https://github.com/ultralytics/ultralytics/pulls) (PR) to help improve YOLOv8 for everyone, especially if you have a good understanding of how to implement a fix or feature.
- See the YOLOv8 [Contributing Guide](https://docs.ultralytics.com/help/contributing) to get started.
+ (Optional) We encourage you to submit a [Pull Request](https://github.com/ultralytics/ultralytics/pulls) (PR) to help improve YOLO for everyone, especially if you have a good understanding of how to implement a fix or feature.
+ See the Ultralytics [Contributing Guide](https://docs.ultralytics.com/help/contributing/) to get started.
options:
- label: Yes I'd like to help by submitting a PR!
diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml
index f957b43d6d0..5c6c2f39a21 100644
--- a/.github/ISSUE_TEMPLATE/question.yml
+++ b/.github/ISSUE_TEMPLATE/question.yml
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
name: โ Question
description: Ask an Ultralytics YOLO question
@@ -14,10 +14,10 @@ body:
attributes:
label: Search before asking
description: >
- Please search the Ultralytics [Docs](https://docs.ultralytics.com), [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/ultralytics/ultralytics/discussions) to see if a similar question already exists.
+ Please search the Ultralytics [Docs](https://docs.ultralytics.com/), [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/orgs/ultralytics/discussions) to see if a similar question already exists.
options:
- label: >
- I have searched the Ultralytics YOLO [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/ultralytics/ultralytics/discussions) and found no similar questions.
+ I have searched the Ultralytics YOLO [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/orgs/ultralytics/discussions) and found no similar questions.
required: true
- type: textarea
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 2d4ae31873b..233db72b026 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Dependabot for package version updates
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yml
similarity index 82%
rename from .github/workflows/ci.yaml
rename to .github/workflows/ci.yml
index ef1fa85161b..7bf01805628 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# YOLO Continuous Integration (CI) GitHub Actions tests
name: Ultralytics CI
@@ -9,7 +10,7 @@ on:
pull_request:
branches: [main]
schedule:
- - cron: "0 0 * * *" # runs at 00:00 UTC every day
+ - cron: "0 8 * * *" # runs at 08:00 UTC every day
workflow_dispatch:
inputs:
hub:
@@ -51,16 +52,15 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- cache: "pip" # caching pip dependencies
+ - uses: astral-sh/setup-uv@v5
- name: Install requirements
shell: bash # for Windows compatibility
run: |
- python -m pip install --upgrade pip wheel
- pip install -e . --extra-index-url https://download.pytorch.org/whl/cpu
+ uv pip install --system . --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check environment
run: |
yolo checks
- pip list
+ uv pip list
- name: Test HUB training
shell: python
env:
@@ -98,7 +98,8 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest, windows-latest, macos-14]
+ # Temporarily disable windows-latest due to https://github.com/ultralytics/ultralytics/actions/runs/13020330819/job/36319338854?pr=18921
+ os: [ubuntu-latest, macos-15, ubuntu-24.04-arm]
python-version: ["3.11"]
model: [yolo11n]
steps:
@@ -106,12 +107,11 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- cache: "pip" # caching pip dependencies
+ - uses: astral-sh/setup-uv@v5
- name: Install requirements
shell: bash # for Windows compatibility
run: |
- python -m pip install --upgrade pip wheel
- pip install -e ".[export]" "coverage[toml]" --extra-index-url https://download.pytorch.org/whl/cpu
+ uv pip install --system -e ".[export]" "coverage[toml]" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-first-match
- name: Check environment
run: |
yolo checks
@@ -130,6 +130,7 @@ jobs:
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 export_hw_optimized=True verbose=0.185
# Benchmarks for default configuration
+ uv pip list
- name: Benchmark DetectionModel
shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.318
@@ -171,15 +172,19 @@ jobs:
coverage xml -o coverage-benchmarks.xml
- name: Upload Coverage Reports to CodeCov
if: github.repository == 'ultralytics/ultralytics'
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
with:
flags: Benchmarks
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+ - name: Prune uv Cache
+ run: uv cache prune --ci
- name: Benchmark Summary
run: |
cat benchmarks.log
- echo "$(cat benchmarks.log)" >> $GITHUB_STEP_SUMMARY
+ echo '```' >> $GITHUB_STEP_SUMMARY
+ cat benchmarks.log >> $GITHUB_STEP_SUMMARY
+ echo '```' >> $GITHUB_STEP_SUMMARY
Tests:
if: github.event_name != 'workflow_dispatch' || github.event.inputs.tests == 'true'
@@ -188,37 +193,36 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest, macos-14, windows-latest]
+ os: [ubuntu-latest, macos-15, windows-latest, ubuntu-24.04-arm]
python-version: ["3.11"]
torch: [latest]
include:
- os: ubuntu-latest
- python-version: "3.8" # torch 1.8.0 requires python >=3.6, <=3.8
+ python-version: "3.8" # torch 1.8.0 requires python >=3.6, <=3.9
torch: "1.8.0" # min torch version CI https://pypi.org/project/torchvision/
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- cache: "pip" # caching pip dependencies
+ - uses: astral-sh/setup-uv@v5
- name: Install requirements
shell: bash # for Windows compatibility
run: |
# CoreML must be installed before export due to protobuf error from AutoInstall
- python -m pip install --upgrade pip wheel
slow=""
torch=""
if [ "${{ matrix.torch }}" == "1.8.0" ]; then
torch="torch==1.8.0 torchvision==0.9.0"
fi
if [[ "${{ github.event_name }}" =~ ^(schedule|workflow_dispatch)$ ]]; then
- slow="pycocotools mlflow ray[tune]"
+ slow="pycocotools mlflow"
fi
- pip install -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu
+ uv pip install --system -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check environment
run: |
yolo checks
- pip list
+ uv pip list
- name: Pytest tests
shell: bash # for Windows compatibility
run: |
@@ -229,11 +233,13 @@ jobs:
pytest $slow --cov=ultralytics/ --cov-report xml tests/
- name: Upload Coverage Reports to CodeCov
if: github.repository == 'ultralytics/ultralytics' # && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
with:
flags: Tests
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+ - name: Prune uv Cache
+ run: uv cache prune --ci
GPU:
if: github.repository == 'ultralytics/ultralytics' && (github.event_name != 'workflow_dispatch' || github.event.inputs.gpu == 'true')
@@ -241,12 +247,14 @@ jobs:
runs-on: gpu-latest
steps:
- uses: actions/checkout@v4
+ - uses: astral-sh/setup-uv@v5
- name: Install requirements
- run: pip install -e . pytest-cov
+ shell: bash # for Windows compatibility
+ run: uv pip install --system -e . pytest-cov
- name: Check environment
run: |
yolo checks
- pip list
+ uv pip list
- name: Pytest tests
run: |
slow=""
@@ -255,14 +263,15 @@ jobs:
fi
pytest $slow --cov=ultralytics/ --cov-report xml tests/test_cuda.py
- name: Upload Coverage Reports to CodeCov
- uses: codecov/codecov-action@v4
+ uses: codecov/codecov-action@v5
with:
flags: GPU
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
RaspberryPi:
- if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true')
+ # if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true')
+ if: false # temporarily disable RPi CI for maintainance
timeout-minutes: 120
runs-on: raspberry-pi
steps:
@@ -275,7 +284,7 @@ jobs:
- name: Install requirements
run: |
python -m pip install --upgrade pip wheel
- pip install -e ".[export]" pytest mlflow pycocotools "ray[tune]"
+ pip install -e ".[export]" pytest mlflow pycocotools
- name: Check environment
run: |
yolo checks
@@ -285,7 +294,7 @@ jobs:
- name: Benchmark ClassificationModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-cls.pt' imgsz=160 verbose=0.249
- name: Benchmark YOLOWorld DetectionModel
- run: python -m ultralytics.cfg.__init__ benchmark model='yolo11s-worldv2.pt' imgsz=160 verbose=0.337
+ run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.337
- name: Benchmark SegmentationModel
run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-seg.pt' imgsz=160 verbose=0.195
- name: Benchmark PoseModel
@@ -322,13 +331,8 @@ jobs:
channels: conda-forge,defaults
channel-priority: true
activate-environment: anaconda-client-env
- - name: Cleanup toolcache
- run: |
- echo "Free space before deletion:"
- df -h /
- rm -rf /opt/hostedtoolcache
- echo "Free space after deletion:"
- df -h /
+ - name: Cleanup disk space
+ uses: ultralytics/actions/cleanup-disk@main
- name: Install Linux packages
run: |
# Fix cv2 ImportError: 'libEGL.so.1: cannot open shared object file: No such file or directory'
@@ -349,16 +353,18 @@ jobs:
conda list
- name: Test CLI
run: |
- yolo predict model=yolov8n.pt imgsz=320
- yolo train model=yolov8n.pt data=coco8.yaml epochs=1 imgsz=32
- yolo val model=yolov8n.pt data=coco8.yaml imgsz=32
- yolo export model=yolov8n.pt format=torchscript imgsz=160
+ yolo predict model=yolo11n.pt imgsz=320
+ yolo train model=yolo11n.pt data=coco8.yaml epochs=1 imgsz=32
+ yolo val model=yolo11n.pt data=coco8.yaml imgsz=32
+ yolo export model=yolo11n.pt format=torchscript imgsz=160
+ yolo benchmark model=yolo11n.pt data='coco8.yaml' imgsz=640 format=onnx
+ yolo solutions
- name: Test Python
# Note this step must use the updated default bash environment, not a python environment
run: |
python -c "
from ultralytics import YOLO
- model = YOLO('yolov8n.pt')
+ model = YOLO('yolo11n.pt')
results = model.train(data='coco8.yaml', epochs=3, imgsz=160)
results = model.val(imgsz=160)
results = model.predict(imgsz=160)
@@ -375,14 +381,14 @@ jobs:
Summary:
runs-on: ubuntu-latest
- needs: [HUB, Benchmarks, Tests, GPU, RaspberryPi, Conda] # Add job names that you want to check for failure
- if: always() # This ensures the job runs even if previous jobs fail
+ needs: [HUB, Benchmarks, Tests, GPU, Conda]
+ if: always()
steps:
- name: Check for failure and notify
- if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.GPU.result == 'failure' || needs.RaspberryPi.result == 'failure' || needs.Conda.result == 'failure' ) && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push')
- uses: slackapi/slack-github-action@v1.27.0
+ if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.GPU.result == 'failure' || needs.Conda.result == 'failure' ) && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push') && github.run_attempt == '1'
+ uses: slackapi/slack-github-action@v2.0.0
with:
+ webhook-type: incoming-webhook
+ webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
- {"text": " GitHub Actions error for ${{ github.workflow }} โ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}
- env:
- SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+ text: " GitHub Actions error for ${{ github.workflow }} โ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"
diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml
index 5ca3abefba3..f3a6c5a350b 100644
--- a/.github/workflows/cla.yml
+++ b/.github/workflows/cla.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Ultralytics Contributor License Agreement (CLA) action https://docs.ultralytics.com/help/CLA
# This workflow automatically requests Pull Requests (PR) authors to sign the Ultralytics CLA before PRs can be merged
@@ -30,7 +31,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Must be repository secret PAT
- PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+ PERSONAL_ACCESS_TOKEN: ${{ secrets._GITHUB_TOKEN }}
with:
path-to-signatures: "signatures/version1/cla.json"
path-to-document: "https://docs.ultralytics.com/help/CLA" # CLA document
diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml
deleted file mode 100644
index e6e3e85d3ce..00000000000
--- a/.github/workflows/codeql.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-name: "CodeQL"
-
-on:
- schedule:
- - cron: "0 0 1 * *"
- workflow_dispatch:
-
-jobs:
- analyze:
- name: Analyze
- runs-on: ${{ 'ubuntu-latest' }}
- permissions:
- actions: read
- contents: read
- security-events: write
-
- strategy:
- fail-fast: false
- matrix:
- language: ["python"]
- # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
-
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
-
- # Initializes the CodeQL tools for scanning.
- - name: Initialize CodeQL
- uses: github/codeql-action/init@v3
- with:
- languages: ${{ matrix.language }}
- # If you wish to specify custom queries, you can do so here or in a config file.
- # By default, queries listed here will override any specified in a config file.
- # Prefix the list here with "+" to use these queries and those in the config file.
- # queries: security-extended,security-and-quality
-
- - name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v3
- with:
- category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yml
similarity index 66%
rename from .github/workflows/docker.yaml
rename to .github/workflows/docker.yml
index 8a3d41a91a9..b5bdabb22a7 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Builds ultralytics/ultralytics:latest images on DockerHub https://hub.docker.com/r/ultralytics
name: Publish Docker Images
@@ -81,12 +82,11 @@ jobs:
# - dockerfile: "Dockerfile-conda"
# tags: "latest-conda"
# platforms: "linux/amd64"
+ outputs:
+ new_release: ${{ steps.check_tag.outputs.new_release }}
steps:
- - name: Cleanup disk
- # Free up to 30GB of disk space per https://github.com/ultralytics/ultralytics/pull/15848
- uses: jlumbroso/free-disk-space@v1.3.1
- with:
- tool-cache: true
+ - name: Cleanup disk space
+ uses: ultralytics/actions/cleanup-disk@main
- name: Checkout repo
uses: actions/checkout@v4
@@ -111,7 +111,6 @@ jobs:
VERSION=$(grep "^__version__ =" ultralytics/__init__.py | awk -F'"' '{print $2}')
echo "Retrieved Ultralytics version: $VERSION"
echo "version=$VERSION" >> $GITHUB_OUTPUT
-
VERSION_TAG=$(echo "${{ matrix.tags }}" | sed "s/latest/${VERSION}/")
echo "Intended version tag: $VERSION_TAG"
echo "version_tag=$VERSION_TAG" >> $GITHUB_OUTPUT
@@ -123,25 +122,25 @@ jobs:
MESSAGE=$(echo $RESPONSE | jq -r '.message')
if [[ "$MESSAGE" == "null" ]]; then
echo "Tag $VERSION_TAG already exists on DockerHub."
- echo "exists=true" >> $GITHUB_OUTPUT
+ echo "new_release=false" >> $GITHUB_OUTPUT
elif [[ "$MESSAGE" == *"404"* ]]; then
echo "Tag $VERSION_TAG does not exist on DockerHub."
- echo "exists=false" >> $GITHUB_OUTPUT
+ echo "new_release=true" >> $GITHUB_OUTPUT
else
echo "Unexpected response from DockerHub. Please check manually."
- echo "exists=false" >> $GITHUB_OUTPUT
+ echo "new_release=false" >> $GITHUB_OUTPUT
fi
env:
VERSION_TAG: ${{ steps.get_version.outputs.version_tag }}
- name: Build Image
if: github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true'
- uses: nick-invision/retry@v3
+ uses: ultralytics/actions/retry@main
with:
timeout_minutes: 120
- retry_wait_seconds: 60
- max_attempts: 3 # retry twice
- command: |
+ retry_delay_seconds: 60
+ retries: 2
+ run: |
docker build \
--platform ${{ matrix.platforms }} \
-f docker/${{ matrix.dockerfile }} \
@@ -159,25 +158,64 @@ jobs:
run: docker run ultralytics/ultralytics:${{ matrix.tags }} yolo benchmark model=yolo11n.pt imgsz=160 verbose=0.309
- name: Push Docker Image with Ultralytics version tag
- if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.exists == 'false' && matrix.dockerfile != 'Dockerfile-conda'
- run: |
- docker push ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }}
+ if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.new_release == 'true' && matrix.dockerfile != 'Dockerfile-conda'
+ uses: ultralytics/actions/retry@main
+ with:
+ timeout_minutes: 15
+ retry_delay_seconds: 300
+ retries: 2
+ run: |
+ docker push ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }}
- name: Push Docker Image with latest tag
if: github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')
+ uses: ultralytics/actions/retry@main
+ with:
+ timeout_minutes: 15
+ retry_delay_seconds: 300
+ retries: 2
+ run: |
+ docker push ultralytics/ultralytics:${{ matrix.tags }}
+ if [[ "${{ matrix.tags }}" == "latest" ]]; then
+ t=ultralytics/ultralytics:latest-runner
+ docker build -f docker/Dockerfile-runner -t $t .
+ docker push $t
+ fi
+ if [[ "${{ matrix.tags }}" == "latest-python" ]]; then
+ t=ultralytics/ultralytics:latest-jupyter
+ v=ultralytics/ultralytics:${{ steps.get_version.outputs.version }}-jupyter
+ docker build -f docker/Dockerfile-jupyter -t $t -t $v .
+ docker push $t
+ if [[ "${{ steps.check_tag.outputs.new_release }}" == "true" ]]; then
+ docker push $v
+ fi
+ fi
+
+ trigger-actions:
+ runs-on: ubuntu-latest
+ needs: docker
+ # Only trigger actions on new Ultralytics releases
+ if: success() && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && needs.docker.outputs.new_release == 'true'
+ steps:
+ - name: Trigger Additional GitHub Actions
+ env:
+ GH_TOKEN: ${{ secrets._GITHUB_TOKEN }}
run: |
- docker push ultralytics/ultralytics:${{ matrix.tags }}
- if [[ "${{ matrix.tags }}" == "latest" ]]; then
- t=ultralytics/ultralytics:latest-runner
- docker build -f docker/Dockerfile-runner -t $t .
- docker push $t
- fi
+ sleep 60
+ gh workflow run deploy_cloud_run.yml \
+ --repo ultralytics/assistant \
+ --ref main
- - name: Notify on failure
- if: github.event_name == 'push' && failure() # do not notify on cancelled() as cancelling is performed by hand
- uses: slackapi/slack-github-action@v1.27.0
+ notify:
+ runs-on: ubuntu-latest
+ needs: [docker, trigger-actions]
+ if: always()
+ steps:
+ - name: Check for failure and notify
+ if: needs.docker.result == 'failure' && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && github.run_attempt == '1'
+ uses: slackapi/slack-github-action@v2.0.0
with:
+ webhook-type: incoming-webhook
+ webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
- {"text": " GitHub Actions error for ${{ github.workflow }} โ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}
- env:
- SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+ text: " GitHub Actions error for ${{ github.workflow }} โ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 02bc506a144..5d34e3f88d1 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Test and publish docs to https://docs.ultralytics.com
# Ignores the following Docs rules to match Google-style docstrings:
# D100: Missing docstring in public module
@@ -20,38 +21,45 @@ on:
pull_request:
branches: [main]
workflow_dispatch:
+ inputs:
+ publish_docs:
+ description: "Publish live to https://docs.ultralytics.com"
+ default: true
+ type: boolean
jobs:
Docs:
if: github.repository == 'ultralytics/ultralytics'
- runs-on: macos-14
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_REF: ${{ github.head_ref || github.ref }}
steps:
- - name: Git config
- run: |
- git config --global user.name "UltralyticsAssistant"
- git config --global user.email "web@ultralytics.com"
- name: Checkout Repository
uses: actions/checkout@v4
with:
+ # Fetch depth 0 required to capture full docs author history
repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
- token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
- ref: ${{ github.head_ref || github.ref }}
+ token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ ref: ${{ env.GITHUB_REF }}
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- cache: "pip" # caching pip dependencies
+ - uses: astral-sh/setup-uv@v5
- name: Install Dependencies
- run: pip install ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
+ # Note "beautifulsoup4<=4.12.3" required due to errors errors with >=4.13 in https://github.com/ultralytics/ultralytics/pull/19067
+ run: uv pip install --system "beautifulsoup4<=4.12.3" ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
- name: Ruff fixes
continue-on-error: true
run: ruff check --fix --unsafe-fixes --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 .
- name: Update Docs Reference Section and Push Changes
continue-on-error: true
run: |
+ git config --global user.name "UltralyticsAssistant"
+ git config --global user.email "web@ultralytics.com"
python docs/build_reference.py
- git pull origin ${{ github.head_ref || github.ref }}
+ git pull origin "$GITHUB_REF"
git add .
git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token
if ! git diff --staged --quiet; then
@@ -70,7 +78,7 @@ jobs:
continue-on-error: true
if: always()
run: |
- git pull origin ${{ github.head_ref || github.ref }}
+ git pull origin "$GITHUB_REF"
git add --update # only add updated files
git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token
if ! git diff --staged --quiet; then
@@ -80,11 +88,10 @@ jobs:
echo "No changes to commit"
fi
- name: Publish Docs to https://docs.ultralytics.com
- if: github.event_name == 'push'
+ if: github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_docs == 'true')
run: |
- git clone https://github.com/ultralytics/docs.git docs-repo
+ git clone --depth 1 --branch gh-pages https://github.com/ultralytics/docs.git docs-repo
cd docs-repo
- git checkout gh-pages || git checkout -b gh-pages
rm -rf *
cp -R ../site/* .
echo "${{ secrets.INDEXNOW_KEY_DOCS }}" > "${{ secrets.INDEXNOW_KEY_DOCS }}.txt"
@@ -92,7 +99,8 @@ jobs:
if git diff --staged --quiet; then
echo "No changes to commit"
else
+ git pull origin gh-pages
LATEST_HASH=$(git rev-parse --short=7 HEAD)
git commit -m "Update Docs for 'ultralytics ${{ steps.check_pypi.outputs.version }} - $LATEST_HASH'"
- git push https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/ultralytics/docs.git gh-pages
+ git push https://${{ secrets._GITHUB_TOKEN }}@github.com/ultralytics/docs.git gh-pages
fi
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 9befe5c9689..212faae2217 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -1,4 +1,5 @@
-# Ultralytics ๐ - AGPL-3.0 License https://ultralytics.com/license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Ultralytics Actions https://github.com/ultralytics/actions
# This workflow automatically formats code and documentation in PRs to official Ultralytics standards
@@ -9,34 +10,33 @@ on:
types: [opened, edited]
discussion:
types: [created]
- pull_request_target:
+ pull_request:
branches: [main]
types: [opened, closed, synchronize, review_requested]
jobs:
format:
- runs-on: macos-14
+ runs-on: ubuntu-latest
steps:
- name: Run Ultralytics Formatting
uses: ultralytics/actions@main
with:
- token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # note GITHUB_TOKEN automatically generated
+ token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
labels: true # autolabel issues and PRs
python: true # format Python code and docstrings
prettier: true # format YAML, JSON, Markdown and CSS
spelling: true # check spelling
links: false # check broken links
summary: true # print PR summary with GPT4o (requires 'openai_api_key')
- openai_azure_api_key: ${{ secrets.OPENAI_AZURE_API_KEY }}
- openai_azure_endpoint: ${{ secrets.OPENAI_AZURE_ENDPOINT }}
+ openai_api_key: ${{ secrets.OPENAI_API_KEY }}
first_issue_response: |
- ๐ Hello @${{ github.actor }}, thank you for your interest in Ultralytics ๐! We recommend a visit to the [Docs](https://docs.ultralytics.com) for new users where you can find many [Python](https://docs.ultralytics.com/usage/python/) and [CLI](https://docs.ultralytics.com/usage/cli/) usage examples and where many of the most common questions may already be answered.
+ ๐ Hello @${{ github.actor }}, thank you for your interest in Ultralytics ๐! We recommend a visit to the [Docs](https://docs.ultralytics.com/) for new users where you can find many [Python](https://docs.ultralytics.com/usage/python/) and [CLI](https://docs.ultralytics.com/usage/cli/) usage examples and where many of the most common questions may already be answered.
- If this is a ๐ Bug Report, please provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/) to help us debug it.
+ If this is a ๐ Bug Report, please provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum-reproducible-example/) to help us debug it.
If this is a custom training โ Question, please provide as much information as possible, including dataset image examples and training logs, and verify you are following our [Tips for Best Training Results](https://docs.ultralytics.com/guides/model-training-tips/).
- Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://ultralytics.com/discord) ๐ง. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com). Or dive into threads on our [Subreddit](https://reddit.com/r/ultralytics) to share knowledge with the community.
+ Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://discord.com/invite/ultralytics) ๐ง. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com/). Or dive into threads on our [Subreddit](https://reddit.com/r/Ultralytics) to share knowledge with the community.
## Upgrade
@@ -48,15 +48,15 @@ jobs:
## Environments
- YOLOv8 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
+ YOLO may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda-zone)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
- - **Notebooks** with free GPU:
+ - **Notebooks** with free GPU:
- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/)
- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/)
- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/)
## Status
-
+
- If this badge is green, all [Ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml?query=event%3Aschedule) tests are currently passing. CI tests verify correct operation of all YOLOv8 [Modes](https://docs.ultralytics.com/modes/) and [Tasks](https://docs.ultralytics.com/tasks/) on macOS, Windows, and Ubuntu every 24 hours and on every commit.
+ If this badge is green, all [Ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml?query=event%3Aschedule) tests are currently passing. CI tests verify correct operation of all YOLO [Modes](https://docs.ultralytics.com/modes/) and [Tasks](https://docs.ultralytics.com/tasks/) on macOS, Windows, and Ubuntu every 24 hours and on every commit.
diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml
index 4dd8aa38b05..932c6a9807a 100644
--- a/.github/workflows/links.yml
+++ b/.github/workflows/links.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Continuous Integration (CI) GitHub Actions tests broken link checker using https://github.com/lycheeverse/lychee
# Ignores the following status codes to reduce false positives:
# - 401(Vimeo, 'unauthorized')
@@ -24,17 +25,15 @@ jobs:
- name: Download and install lychee
run: |
LYCHEE_URL=$(curl -s https://api.github.com/repos/lycheeverse/lychee/releases/latest | grep "browser_download_url" | grep "x86_64-unknown-linux-gnu.tar.gz" | cut -d '"' -f 4)
- curl -L $LYCHEE_URL -o lychee.tar.gz
- tar xzf lychee.tar.gz
- sudo mv lychee /usr/local/bin
+ curl -L $LYCHEE_URL | tar xz -C /usr/local/bin
- name: Test Markdown and HTML links with retry
- uses: nick-invision/retry@v3
+ uses: ultralytics/actions/retry@main
with:
- timeout_minutes: 5
- retry_wait_seconds: 60
- max_attempts: 3
- command: |
+ timeout_minutes: 60
+ retry_delay_seconds: 900
+ retries: 2
+ run: |
lychee \
--scheme https \
--timeout 60 \
@@ -55,16 +54,16 @@ jobs:
--github-token ${{ secrets.GITHUB_TOKEN }} \
--header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \
'./**/*.md' \
- './**/*.html'
+ './**/*.html' | tee -a $GITHUB_STEP_SUMMARY
- name: Test Markdown, HTML, YAML, Python and Notebook links with retry
if: github.event_name == 'workflow_dispatch'
- uses: nick-invision/retry@v3
+ uses: ultralytics/actions/retry@main
with:
- timeout_minutes: 5
- retry_wait_seconds: 60
- max_attempts: 3
- command: |
+ timeout_minutes: 60
+ retry_delay_seconds: 900
+ retries: 2
+ run: |
lychee \
--scheme https \
--timeout 60 \
@@ -72,7 +71,7 @@ jobs:
--accept 401,403,429,500,502,999 \
--exclude-all-private \
--exclude 'https?://(www\.)?(linkedin\.com|twitter\.com|instagram\.com|kaggle\.com|fonts\.gstatic\.com|url\.com)' \
- --exclude-path '**/ci.yaml' \
+ --exclude-path '**/ci.yml' \
--exclude-path docs/zh \
--exclude-path docs/es \
--exclude-path docs/ru \
@@ -90,4 +89,4 @@ jobs:
'./**/*.yml' \
'./**/*.yaml' \
'./**/*.py' \
- './**/*.ipynb'
+ './**/*.ipynb' | tee -a $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml
index 347ec1b99c8..fb36aa77314 100644
--- a/.github/workflows/merge-main-into-prs.yml
+++ b/.github/workflows/merge-main-into-prs.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Automatically merges repository 'main' branch into all open PRs to keep them up-to-date
# Action runs on updates to main branch so when one PR merges to main all others update
@@ -33,7 +34,7 @@ jobs:
import os
import time
- g = Github("${{ secrets.PERSONAL_ACCESS_TOKEN }}")
+ g = Github("${{ secrets._GITHUB_TOKEN }}")
repo = g.get_repo("${{ github.repository }}")
# Fetch the default branch name
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 8276a7696a4..1a83a1bfe42 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Publish pip package to PyPI https://pypi.org/project/ultralytics/
name: Publish to PyPI
@@ -13,129 +14,112 @@ on:
description: Publish to PyPI
jobs:
- publish:
+ check:
if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher'
- name: Publish
runs-on: ubuntu-latest
+ permissions:
+ contents: write
+ outputs:
+ increment: ${{ steps.check_pypi.outputs.increment }}
+ current_tag: ${{ steps.check_pypi.outputs.current_tag }}
+ previous_tag: ${{ steps.check_pypi.outputs.previous_tag }}
steps:
- - name: Checkout code
- uses: actions/checkout@v4
- with:
- token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # use your PAT here
- - name: Git config
- run: |
- git config --global user.name "UltralyticsAssistant"
- git config --global user.email "web@ultralytics.com"
- - name: Set up Python environment
- uses: actions/setup-python@v5
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
with:
python-version: "3.x"
- cache: "pip" # caching pip dependencies
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip wheel
- pip install requests build twine toml
- - name: Check PyPI version
+ - uses: astral-sh/setup-uv@v5
+ - run: uv pip install --system --no-cache ultralytics-actions
+ - id: check_pypi
shell: python
run: |
import os
- import requests
- import toml
-
- # Load version and package name from pyproject.toml
- pyproject = toml.load('pyproject.toml')
- package_name = pyproject['project']['name']
- local_version = pyproject['project'].get('version', 'dynamic')
-
- # If version is dynamic, extract it from the specified file
- if local_version == 'dynamic':
- version_attr = pyproject['tool']['setuptools']['dynamic']['version']['attr']
- module_path, attr_name = version_attr.rsplit('.', 1)
- with open(f"{module_path.replace('.', '/')}/__init__.py") as f:
- local_version = next(line.split('=')[1].strip().strip("'\"") for line in f if line.startswith(attr_name))
-
- print(f"Local Version: {local_version}")
-
- # Get online version from PyPI
- response = requests.get(f"https://pypi.org/pypi/{package_name}/json")
- online_version = response.json()['info']['version'] if response.status_code == 200 else None
- print(f"Online Version: {online_version or 'Not Found'}")
-
- # Determine if a new version should be published
- publish = False
- if online_version:
- local_ver = tuple(map(int, local_version.split('.')))
- online_ver = tuple(map(int, online_version.split('.')))
- major_diff = local_ver[0] - online_ver[0]
- minor_diff = local_ver[1] - online_ver[1]
- patch_diff = local_ver[2] - online_ver[2]
-
- publish = (
- (major_diff == 0 and minor_diff == 0 and 0 < patch_diff <= 2) or
- (major_diff == 0 and minor_diff == 1 and local_ver[2] == 0) or
- (major_diff == 1 and local_ver[1] == 0 and local_ver[2] == 0)
- )
- else:
- publish = True # First release
-
+ from actions.utils import check_pypi_version
+ local_version, online_version, publish = check_pypi_version()
os.system(f'echo "increment={publish}" >> $GITHUB_OUTPUT')
os.system(f'echo "current_tag=v{local_version}" >> $GITHUB_OUTPUT')
os.system(f'echo "previous_tag=v{online_version}" >> $GITHUB_OUTPUT')
-
if publish:
print('Ready to publish new version to PyPI โ .')
- id: check_pypi
- - name: Publish to PyPI
- continue-on-error: true
- if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
- run: |
- python -m build
- python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
- - name: Publish new tag
- if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
- run: |
- git tag -a "${{ steps.check_pypi.outputs.current_tag }}" -m "$(git log -1 --pretty=%B)" # i.e. "v0.1.2 commit message"
- git push origin "${{ steps.check_pypi.outputs.current_tag }}"
- - name: Publish new release
- if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
+ - name: Tag and Release
+ if: steps.check_pypi.outputs.increment == 'True'
env:
- OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CURRENT_TAG: ${{ steps.check_pypi.outputs.current_tag }}
PREVIOUS_TAG: ${{ steps.check_pypi.outputs.previous_tag }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
- curl -s "https://raw.githubusercontent.com/ultralytics/actions/main/utils/summarize_release.py" | python -
- shell: bash
+ git config --global user.name "UltralyticsAssistant"
+ git config --global user.email "web@ultralytics.com"
+ git tag -a "$CURRENT_TAG" -m "$(git log -1 --pretty=%B)"
+ git push origin "$CURRENT_TAG"
+ ultralytics-actions-summarize-release
+ uv cache prune --ci
+
+ build:
+ needs: check
+ if: needs.check.outputs.increment == 'True'
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.x"
+ - uses: astral-sh/setup-uv@v5
+ - run: uv pip install --system --no-cache build
+ - run: python -m build
+ - uses: actions/upload-artifact@v4
+ with:
+ name: dist
+ path: dist/
+ - run: uv cache prune --ci
+
+ publish:
+ needs: [check, build]
+ if: needs.check.outputs.increment == 'True'
+ runs-on: ubuntu-latest
+ environment: # for GitHub Deployments tab
+ name: Release - PyPI
+ url: https://pypi.org/p/ultralytics
+ permissions:
+ id-token: write # for PyPI trusted publishing
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ name: dist
+ path: dist/
+ - uses: pypa/gh-action-pypi-publish@release/v1
+
+ notify:
+ needs: [check, publish]
+ if: always() && needs.check.outputs.increment == 'True'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
- name: Extract PR Details
env:
- GH_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
- # Check if the event is a pull request or pull_request_target
- if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then
- PR_NUMBER=${{ github.event.pull_request.number }}
- PR_TITLE=$(gh pr view $PR_NUMBER --json title --jq '.title')
- else
- # Use gh to find the PR associated with the commit
- COMMIT_SHA=${{ github.event.after }}
- PR_JSON=$(gh pr list --search "${COMMIT_SHA}" --state merged --json number,title --jq '.[0]')
- PR_NUMBER=$(echo $PR_JSON | jq -r '.number')
- PR_TITLE=$(echo $PR_JSON | jq -r '.title')
- fi
- echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
- echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV
- - name: Notify on Slack (Success)
- if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True'
- uses: slackapi/slack-github-action@v1.27.0
+ PR_JSON=$(gh pr list --search "${GITHUB_SHA}" --state merged --json number,title --jq '.[0]')
+ PR_NUMBER=$(echo "${PR_JSON}" | jq -r '.number')
+ PR_TITLE=$(echo "${PR_JSON}" | jq -r '.title')
+ echo "PR_NUMBER=${PR_NUMBER}" >> "${GITHUB_ENV}"
+ echo "PR_TITLE=${PR_TITLE}" >> "${GITHUB_ENV}"
+ - name: Notify Success
+ if: needs.publish.result == 'success' && github.event_name == 'push'
+ uses: slackapi/slack-github-action@v2.0.0
with:
+ webhook-type: incoming-webhook
+ webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
- {"text": " GitHub Actions success for ${{ github.workflow }} โ \n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW '${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}' pip package published ๐\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"}
- env:
- SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
- - name: Notify on Slack (Failure)
- if: failure()
- uses: slackapi/slack-github-action@v1.27.0
+ text: " GitHub Actions success for ${{ github.workflow }} โ \n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ needs.check.outputs.current_tag }}` pip package published ๐\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"
+ - name: Notify Failure
+ if: needs.publish.result != 'success'
+ uses: slackapi/slack-github-action@v2.0.0
with:
+ webhook-type: incoming-webhook
+ webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
- {"text": " GitHub Actions error for ${{ github.workflow }} โ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"}
- env:
- SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+ text: " GitHub Actions error for ${{ github.workflow }} โ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index dd8503541eb..1ec031f6f7b 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -1,10 +1,14 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
name: Close stale issues
on:
schedule:
- cron: "0 0 * * *" # Runs at 00:00 UTC every day
+permissions:
+ pull-requests: write
+ issues: write
+
jobs:
stale:
runs-on: ubuntu-latest
diff --git a/.gitignore b/.gitignore
index 589906e0ba8..ceb43cc00c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -124,6 +124,7 @@ venv.bak/
# VSCode project settings
.vscode/
+.devcontainer/
# Rope project settings
.ropeproject
@@ -157,12 +158,15 @@ weights/
*.torchscript
*.tflite
*.h5
+*.mnn
*_saved_model/
*_web_model/
*_openvino_model/
*_paddle_model/
*_ncnn_model/
+*_imx_model/
pnnx*
+*.rknn
# Autogenerated files for tests
/ultralytics/assets/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b3dbfe16952..15447bce784 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -25,7 +25,7 @@ Welcome! We're thrilled that you're considering contributing to our [Ultralytics
## Code of Conduct
-To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct/). Respect, kindness, and professionalism are at the heart of our community.
+To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code-of-conduct/). Respect, kindness, and professionalism are at the heart of our community.
## Contributing via Pull Requests
@@ -121,7 +121,7 @@ All pull requests must pass the GitHub Actions [Continuous Integration](https://
## Reporting Bugs
-We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/)โa simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem.
+We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/)โa simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem.
## License
@@ -163,4 +163,4 @@ the project's quality standards. Review the CI output and fix any issues. For de
### How do I report a bug in Ultralytics YOLO repositories?
-To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section.
+To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section.
diff --git a/README.md b/README.md
index 8fec98deaed..0be72727a86 100644
--- a/README.md
+++ b/README.md
@@ -72,26 +72,29 @@ metrics = model.val(path) # evaluate model performance on exported model
[ไธญๆ](https://docs.ultralytics.com/zh) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru) | [Deutsch](https://docs.ultralytics.com/de) | [Franรงais](https://docs.ultralytics.com/fr) | [Espaรฑol](https://docs.ultralytics.com/es) | [Portuguรชs](https://docs.ultralytics.com/pt) | [Tรผrkรงe](https://docs.ultralytics.com/tr) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar)
-
+
+
-
-
-
+
+
-
-
-
+
+
+
+
[Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics) is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLO11 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection and tracking, instance segmentation, image classification and pose estimation tasks.
-We hope that the resources here will help you get the most out of YOLO. Please browse the Ultralytics Docs for details, raise an issue on GitHub for support, questions, or discussions, become a member of the Ultralytics Discord, Reddit and Forums!
+We hope that the resources here will help you get the most out of YOLO. Please browse the Ultralytics Docs for details, raise an issue on GitHub for support, questions, or discussions, become a member of the Ultralytics Discord, Reddit and Forums!
To request an Enterprise License please complete the form at [Ultralytics Licensing](https://www.ultralytics.com/license).
-
+
+
+
@@ -106,7 +109,7 @@ To request an Enterprise License please complete the form at [Ultralytics Licens
-
+
@@ -117,9 +120,9 @@ See below for a quickstart install and usage examples, and see our [Docs](https:
Install
-Pip install the ultralytics package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/).
+Pip install the Ultralytics package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/).
-[](https://pypi.org/project/ultralytics/) [](https://pepy.tech/project/ultralytics) [](https://pypi.org/project/ultralytics/)
+[](https://pypi.org/project/ultralytics/) [](https://www.pepy.tech/projects/ultralytics) [](https://pypi.org/project/ultralytics/)
```bash
pip install ultralytics
@@ -127,7 +130,7 @@ pip install ultralytics
For alternative installation methods including [Conda](https://anaconda.org/conda-forge/ultralytics), [Docker](https://hub.docker.com/r/ultralytics/ultralytics), and Git, please refer to the [Quickstart Guide](https://docs.ultralytics.com/quickstart/).
-[](https://anaconda.org/conda-forge/ultralytics) [](https://hub.docker.com/r/ultralytics/ultralytics)
+[](https://anaconda.org/conda-forge/ultralytics) [](https://hub.docker.com/r/ultralytics/ultralytics) [](https://hub.docker.com/r/ultralytics/ultralytics)
@@ -142,7 +145,7 @@ YOLO may be used directly in the Command Line Interface (CLI) with a `yolo` comm
yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg'
```
-`yolo` can be used for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See the YOLO [CLI Docs](https://docs.ultralytics.com/usage/cli/) for examples.
+`yolo` can be used for a variety of tasks and modes and accepts additional arguments, e.g. `imgsz=640`. See the YOLO [CLI Docs](https://docs.ultralytics.com/usage/cli/) for examples.
### Python
@@ -179,11 +182,13 @@ See YOLO [Python Docs](https://docs.ultralytics.com/usage/python/) for more exam
##
Models
-YOLO11 [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/) and [Pose](https://docs.ultralytics.com/tasks/pose/) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset are available here, as well as YOLO11 [Classify](https://docs.ultralytics.com/tasks/classify/) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) dataset. [Track](https://docs.ultralytics.com/modes/track/) mode is available for all Detect, Segment and Pose models.
+YOLO11 [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/) and [Pose](https://docs.ultralytics.com/tasks/pose/) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset are available here, as well as YOLO11 [Classify](https://docs.ultralytics.com/tasks/classify/) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) dataset. [Track](https://docs.ultralytics.com/modes/track/) mode is available for all Detect, Segment and Pose models. All [Models](https://docs.ultralytics.com/models/) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
-
-
-All [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
+
+
+
+
+ Detection (COCO)
@@ -191,11 +196,11 @@ See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examp
| Model | size (pixels) | mAPval 50-95 | Speed CPU ONNX (ms) | Speed T4 TensorRT10 (ms) | params (M) | FLOPs (B) |
| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.12 ยฑ 0.82 ms | 1.55 ยฑ 0.01 ms | 2.6 | 6.5 |
-| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.01 ยฑ 1.17 ms | 2.46 ยฑ 0.00 ms | 9.4 | 21.5 |
-| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.20 ยฑ 2.04 ms | 4.70 ยฑ 0.06 ms | 20.1 | 68.0 |
-| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.64 ยฑ 1.39 ms | 6.16 ยฑ 0.08 ms | 25.3 | 86.9 |
-| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.78 ยฑ 6.66 ms | 11.31 ยฑ 0.24 ms | 56.9 | 194.9 |
+| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.1 ยฑ 0.8 | 1.5 ยฑ 0.0 | 2.6 | 6.5 |
+| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.0 ยฑ 1.2 | 2.5 ยฑ 0.0 | 9.4 | 21.5 |
+| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.2 ยฑ 2.0 | 4.7 ยฑ 0.1 | 20.1 | 68.0 |
+| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.6 ยฑ 1.4 | 6.2 ยฑ 0.1 | 25.3 | 86.9 |
+| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.8 ยฑ 6.7 | 11.3 ยฑ 0.2 | 56.9 | 194.9 |
- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. Reproduce by `yolo val detect data=coco.yaml device=0`
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val detect data=coco.yaml batch=1 device=0|cpu`
@@ -208,14 +213,31 @@ See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for usage e
| Model | size (pixels) | mAPbox 50-95 | mAPmask 50-95 | Speed CPU ONNX (ms) | Speed T4 TensorRT10 (ms) | params (M) | FLOPs (B) |
| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.90 ยฑ 1.14 ms | 1.84 ยฑ 0.00 ms | 2.9 | 10.4 |
-| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.56 ยฑ 4.89 ms | 2.94 ยฑ 0.01 ms | 10.1 | 35.5 |
-| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.63 ยฑ 1.16 ms | 6.31 ยฑ 0.09 ms | 22.4 | 123.3 |
-| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.16 ยฑ 3.17 ms | 7.78 ยฑ 0.16 ms | 27.6 | 142.2 |
-| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.50 ยฑ 3.24 ms | 15.75 ยฑ 0.67 ms | 62.1 | 319.0 |
+| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.9 ยฑ 1.1 | 1.8 ยฑ 0.0 | 2.9 | 10.4 |
+| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.6 ยฑ 4.9 | 2.9 ยฑ 0.0 | 10.1 | 35.5 |
+| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.6 ยฑ 1.2 | 6.3 ยฑ 0.1 | 22.4 | 123.3 |
+| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.2 ยฑ 3.2 | 7.8 ยฑ 0.2 | 27.6 | 142.2 |
+| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.5 ยฑ 3.2 | 15.8 ยฑ 0.7 | 62.1 | 319.0 |
+
+- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. Reproduce by `yolo val segment data=coco.yaml device=0`
+- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val segment data=coco.yaml batch=1 device=0|cpu`
+
+
+
+Classification (ImageNet)
+
+See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usage examples with these models trained on [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), which include 1000 pretrained classes.
-- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. Reproduce by `yolo val segment data=coco-seg.yaml device=0`
-- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu`
+| Model | size (pixels) | acc top1 | acc top5 | Speed CPU ONNX (ms) | Speed T4 TensorRT10 (ms) | params (M) | FLOPs (B) at 640 |
+| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ |
+| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.0 ยฑ 0.3 | 1.1 ยฑ 0.0 | 1.6 | 3.3 |
+| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.9 ยฑ 0.2 | 1.3 ยฑ 0.0 | 5.5 | 12.1 |
+| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.2 ยฑ 0.4 | 2.0 ยฑ 0.0 | 10.4 | 39.3 |
+| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.2 ยฑ 0.3 | 2.8 ยฑ 0.0 | 12.9 | 49.4 |
+| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.4 ยฑ 0.9 | 3.8 ยฑ 0.0 | 28.4 | 110.4 |
+
+- **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set. Reproduce by `yolo val classify data=path/to/ImageNet device=0`
+- **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu`
@@ -225,11 +247,11 @@ See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for usage examples wit
| Model | size (pixels) | mAPpose 50-95 | mAPpose 50 | Speed CPU ONNX (ms) | Speed T4 TensorRT10 (ms) | params (M) | FLOPs (B) |
| ---------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.40 ยฑ 0.51 ms | 1.72 ยฑ 0.01 ms | 2.9 | 7.6 |
-| [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.54 ยฑ 0.59 ms | 2.57 ยฑ 0.00 ms | 9.9 | 23.2 |
-| [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.28 ยฑ 0.77 ms | 4.94 ยฑ 0.05 ms | 20.9 | 71.7 |
-| [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.69 ยฑ 1.10 ms | 6.42 ยฑ 0.13 ms | 26.2 | 90.7 |
-| [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 487.97 ยฑ 13.91 ms | 12.06 ยฑ 0.20 ms | 58.8 | 203.3 |
+| [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.4 ยฑ 0.5 | 1.7 ยฑ 0.0 | 2.9 | 7.6 |
+| [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.5 ยฑ 0.6 | 2.6 ยฑ 0.0 | 9.9 | 23.2 |
+| [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.3 ยฑ 0.8 | 4.9 ยฑ 0.1 | 20.9 | 71.7 |
+| [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.7 ยฑ 1.1 | 6.4 ยฑ 0.1 | 26.2 | 90.7 |
+| [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 488.0 ยฑ 13.9 | 12.1 ยฑ 0.2 | 58.8 | 203.3 |
- **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](https://cocodataset.org/) dataset. Reproduce by `yolo val pose data=coco-pose.yaml device=0`
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val pose data=coco-pose.yaml batch=1 device=0|cpu`
@@ -242,50 +264,33 @@ See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for usage examples with
| Model | size (pixels) | mAPtest 50 | Speed CPU ONNX (ms) | Speed T4 TensorRT10 (ms) | params (M) | FLOPs (B) |
| -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.56 ยฑ 0.80 ms | 4.43 ยฑ 0.01 ms | 2.7 | 17.2 |
-| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.41 ยฑ 4.00 ms | 5.13 ยฑ 0.02 ms | 9.7 | 57.5 |
-| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.81 ยฑ 2.87 ms | 10.07 ยฑ 0.38 ms | 20.9 | 183.5 |
-| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.49 ยฑ 4.98 ms | 13.46 ยฑ 0.55 ms | 26.2 | 232.0 |
-| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.63 ยฑ 7.67 ms | 28.59 ยฑ 0.96 ms | 58.8 | 520.2 |
+| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.6 ยฑ 0.8 | 4.4 ยฑ 0.0 | 2.7 | 17.2 |
+| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.4 ยฑ 4.0 | 5.1 ยฑ 0.0 | 9.7 | 57.5 |
+| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.8 ยฑ 2.9 | 10.1 ยฑ 0.4 | 20.9 | 183.5 |
+| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.5 ยฑ 5.0 | 13.5 ยฑ 0.6 | 26.2 | 232.0 |
+| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.6 ยฑ 7.7 | 28.6 ยฑ 1.0 | 58.8 | 520.2 |
- **mAPtest** values are for single-model multiscale on [DOTAv1](https://captain-whu.github.io/DOTA/index.html) dataset. Reproduce by `yolo val obb data=DOTAv1.yaml device=0 split=test` and submit merged results to [DOTA evaluation](https://captain-whu.github.io/DOTA/evaluation.html).
- **Speed** averaged over DOTAv1 val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val obb data=DOTAv1.yaml batch=1 device=0|cpu`
-Classification (ImageNet)
-
-See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usage examples with these models trained on [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), which include 1000 pretrained classes.
-
-| Model | size (pixels) | acc top1 | acc top5 | Speed CPU ONNX (ms) | Speed T4 TensorRT10 (ms) | params (M) | FLOPs (B) at 640 |
-| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ |
-| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.03 ยฑ 0.32 ms | 1.10 ยฑ 0.01 ms | 1.6 | 3.3 |
-| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.89 ยฑ 0.18 ms | 1.34 ยฑ 0.01 ms | 5.5 | 12.1 |
-| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.17 ยฑ 0.40 ms | 1.95 ยฑ 0.00 ms | 10.4 | 39.3 |
-| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.17 ยฑ 0.29 ms | 2.76 ยฑ 0.00 ms | 12.9 | 49.4 |
-| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.41 ยฑ 0.94 ms | 3.82 ยฑ 0.00 ms | 28.4 | 110.4 |
-
-- **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set. Reproduce by `yolo val classify data=path/to/ImageNet device=0`
-- **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu`
-
-
-
##
Integrations
-Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [Roboflow](https://roboflow.com/?ref=ultralytics), ClearML, [Comet](https://bit.ly/yolov8-readme-comet), Neural Magic and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow.
+Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/), [Comet](https://bit.ly/yolov8-readme-comet), [Roboflow](https://roboflow.com/?ref=ultralytics) and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow.
-
-
-
+
+
+
-
-
+
+
-
-
+
+
@@ -294,15 +299,15 @@ Our key integrations with leading AI platforms extend the functionality of Ultra
-| Roboflow | ClearML โญ NEW | Comet โญ NEW | Neural Magic โญ NEW |
-| :--------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: |
-| Label and export your custom datasets directly to YOLO11 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | Automatically track, visualize and even remotely train YOLO11 using [ClearML](https://clear.ml/) (open-source!) | Free forever, [Comet](https://bit.ly/yolov5-readme-comet) lets you save YOLO11 models, resume training, and interactively visualize and debug predictions | Run YOLO11 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) |
+| Ultralytics HUB ๐ | W&B | Comet โญ NEW | Neural Magic |
+| :--------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: |
+| Streamline YOLO workflows: Label, train, and deploy effortlessly with [Ultralytics HUB](https://www.ultralytics.com/hub). Try now! | Track experiments, hyperparameters, and results with [Weights & Biases](https://docs.wandb.ai/guides/integrations/ultralytics/) | Free forever, [Comet](https://bit.ly/yolov5-readme-comet) lets you save YOLO11 models, resume training, and interactively visualize and debug predictions | Run YOLO11 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) |
##
Ultralytics HUB
Experience seamless AI with [Ultralytics HUB](https://www.ultralytics.com/hub) โญ, the all-in-one solution for data visualization, YOLO11 ๐ model training and deployment, without any coding. Transform images into actionable insights and bring your AI visions to life with ease using our cutting-edge platform and user-friendly [Ultralytics App](https://www.ultralytics.com/app-install). Start your journey for **Free** now!
-
+
##
@@ -274,5 +279,5 @@ Ultralytics ๆไพไธค็ง่ฎธๅฏ้้กนไปฅ้ๅบๅ็ง็จไพ๏ผ
-
+
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 3283c65076f..2cfbfd352e9 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -3,7 +3,7 @@
# Image is CUDA-optimized for YOLO11 single/multi-GPU training and inference
# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3
-FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime
+FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
# Set environment variables
# Avoid DDP error "MKL_THREADING_LAYER=INTEL is incompatible with libgomp.so.1 library" https://github.com/pytorch/pytorch/issues/37377
@@ -11,7 +11,8 @@ ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_BREAK_SYSTEM_PACKAGES=1 \
- MKL_THREADING_LAYER=GNU
+ MKL_THREADING_LAYER=GNU \
+ OMP_NUM_THREADS=1
# Downloads to user config dir
ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \
@@ -39,23 +40,22 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config
ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
# Install pip packages
-RUN python3 -m pip install --upgrade pip wheel
-# Pin TensorRT-cu12==10.1.0 to avoid 10.2.0 bug https://github.com/ultralytics/ultralytics/pull/14239 (note -cu12 must be used)
-RUN pip install -e ".[export]" "tensorrt-cu12==10.1.0" "albumentations>=1.4.6" comet pycocotools
+RUN pip install uv
+# Note -cu12 must be used with tensorrt
+RUN uv pip install --system -e ".[export]" tensorrt-cu12 "albumentations>=1.4.6" comet pycocotools
# Run exports to AutoInstall packages
# Edge TPU export fails the first time so is run twice here
RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32 || yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32
RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32
# Requires <= Python 3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991
-RUN pip install "paddlepaddle>=2.6.0" x2paddle
+RUN uv pip install --system "paddlepaddle>=2.6.0" x2paddle
# Fix error: `np.bool` was a deprecated alias for the builtin `bool` segmentation error in Tests
-RUN pip install numpy==1.23.5
+RUN uv pip install --system numpy==1.23.5
# Remove extra build files
RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
-
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
diff --git a/docker/Dockerfile-arm64 b/docker/Dockerfile-arm64
index b5bdbb0fb5c..dce27320381 100644
--- a/docker/Dockerfile-arm64
+++ b/docker/Dockerfile-arm64
@@ -34,8 +34,8 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config
ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
# Install pip packages
-RUN python3 -m pip install --upgrade pip wheel
-RUN pip install -e ".[export]"
+RUN pip install uv
+RUN uv pip install --system -e ".[export]" --break-system-packages
# Creates a symbolic link to make 'python' point to 'python3'
RUN ln -sf /usr/bin/python3 /usr/bin/python
diff --git a/docker/Dockerfile-cpu b/docker/Dockerfile-cpu
index fe8d88521f9..79d5d50b707 100644
--- a/docker/Dockerfile-cpu
+++ b/docker/Dockerfile-cpu
@@ -2,8 +2,8 @@
# Builds ultralytics/ultralytics:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLO11 deployments
-# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
-FROM ubuntu:23.10
+# Use official Python base image for reproducibility (3.11.10 for export and 3.12.6 for inference)
+FROM python:3.11.10-slim-bookworm
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
@@ -32,21 +32,21 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config
ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
# Install pip packages
-RUN python3 -m pip install --upgrade pip wheel
-RUN pip install -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu
+RUN pip install uv
+RUN uv pip install --system -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-first-match
# Run exports to AutoInstall packages
RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32
RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32
# Requires Python<=3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991
-# RUN pip install "paddlepaddle>=2.6.0" x2paddle
-
-# Creates a symbolic link to make 'python' point to 'python3'
-RUN ln -sf /usr/bin/python3 /usr/bin/python
+RUN uv pip install --system "paddlepaddle>=2.6.0" x2paddle
# Remove extra build files
RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
+# Set default command to bash
+CMD ["/bin/bash"]
+
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
diff --git a/docker/Dockerfile-jetson-jetpack4 b/docker/Dockerfile-jetson-jetpack4
index c140974807f..e11279dad90 100644
--- a/docker/Dockerfile-jetson-jetpack4
+++ b/docker/Dockerfile-jetson-jetpack4
@@ -43,13 +43,14 @@ ADD https://nvidia.box.com/shared/static/gjqofg7rkg97z3gc8jeyup6t8n9j8xjw.whl on
ADD https://forums.developer.nvidia.com/uploads/short-url/hASzFOm9YsJx6VVFrDW1g44CMmv.whl tensorrt-8.2.0.6-cp38-none-linux_aarch64.whl
# Install pip packages
-RUN python3 -m pip install --upgrade pip wheel
-RUN pip install \
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install uv
+RUN uv pip install --system \
onnxruntime_gpu-1.8.0-cp38-cp38-linux_aarch64.whl \
tensorrt-8.2.0.6-cp38-none-linux_aarch64.whl \
https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-1.11.0a0+gitbc2c6ed-cp38-cp38-linux_aarch64.whl \
https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.12.0a0+9b5a3fe-cp38-cp38-linux_aarch64.whl
-RUN pip install -e ".[export]"
+RUN uv pip install --system -e ".[export]"
# Remove extra build files
RUN rm -rf *.whl /root/.config/Ultralytics/persistent_cache.json
diff --git a/docker/Dockerfile-jetson-jetpack5 b/docker/Dockerfile-jetson-jetpack5
index 9949d26b71e..bfedb6e0cf2 100644
--- a/docker/Dockerfile-jetson-jetpack5
+++ b/docker/Dockerfile-jetson-jetpack5
@@ -1,9 +1,9 @@
# Ultralytics YOLO ๐, AGPL-3.0 license
# Builds ultralytics/ultralytics:jetson-jetson-jetpack5 image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
-# Supports JetPack5.x for YOLO11 on Jetson Xavier NX, AGX Xavier, AGX Orin, Orin Nano and Orin NX
+# Supports JetPack5.1.2 for YOLO11 on Jetson Xavier NX, AGX Xavier, AGX Orin, Orin Nano and Orin NX
-# Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-pytorch
-FROM nvcr.io/nvidia/l4t-pytorch:r35.2.1-pth2.0-py3
+# Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-jetpack
+FROM nvcr.io/nvidia/l4t-jetpack:r35.4.1
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
@@ -16,13 +16,10 @@ ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \
https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.Unicode.ttf \
/root/.config/Ultralytics/
-# Install linux packages
-# g++ required to build 'tflite_support' and 'lap' packages
-# libusb-1.0-0 required for 'tflite_support' package when exporting to TFLite
-# pkg-config and libhdf5-dev (not included) are needed to build 'h5py==3.11.0' aarch64 wheel required by 'tensorflow'
+# Install dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
- gcc git zip unzip wget curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 \
+ git python3-pip libopenmpi-dev libopenblas-base libomp-dev \
&& rm -rf /var/lib/apt/lists/*
# Create working directory
@@ -33,16 +30,14 @@ COPY . .
RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config
ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
-# Remove opencv-python from Ultralytics dependencies as it conflicts with opencv-python installed in base image
-RUN sed -i '/opencv-python/d' pyproject.toml
+# Pip install onnxruntime-gpu, torch, torchvision and ultralytics
+RUN python3 -m pip install --upgrade pip uv
+RUN uv pip install --system \
+ https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.18.0-cp38-cp38-linux_aarch64.whl \
+ https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl \
+ https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl
-# Download onnxruntime-gpu 1.15.1 for Jetson Linux 35.2.1 (JetPack 5.1). Other versions can be seen in https://elinux.org/Jetson_Zoo#ONNX_Runtime
-ADD https://nvidia.box.com/shared/static/mvdcltm9ewdy2d5nurkiqorofz1s53ww.whl onnxruntime_gpu-1.15.1-cp38-cp38-linux_aarch64.whl
-
-# Install pip packages manually for TensorRT compatibility https://github.com/NVIDIA/TensorRT/issues/2567
-RUN python3 -m pip install --upgrade pip wheel
-RUN pip install onnxruntime_gpu-1.15.1-cp38-cp38-linux_aarch64.whl
-RUN pip install -e ".[export]"
+RUN uv pip install --system -e ".[export]"
# Remove extra build files
RUN rm -rf *.whl /root/.config/Ultralytics/persistent_cache.json
diff --git a/docker/Dockerfile-jetson-jetpack6 b/docker/Dockerfile-jetson-jetpack6
index e4da5461db8..fa6ec651b0a 100644
--- a/docker/Dockerfile-jetson-jetpack6
+++ b/docker/Dockerfile-jetson-jetpack6
@@ -1,9 +1,9 @@
# Ultralytics YOLO ๐, AGPL-3.0 license
# Builds ultralytics/ultralytics:jetson-jetpack6 image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
-# Supports JetPack6.x for YOLO11 on Jetson AGX Orin, Orin NX and Orin Nano Series
+# Supports JetPack6.1 for YOLO11 on Jetson AGX Orin, Orin NX and Orin Nano Series
# Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-jetpack
-FROM nvcr.io/nvidia/l4t-jetpack:r36.3.0
+FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
@@ -17,9 +17,11 @@ ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \
/root/.config/Ultralytics/
# Install dependencies
-RUN apt-get update && \
+ADD https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb .
+RUN dpkg -i cuda-keyring_1.1-1_all.deb && \
+ apt-get update && \
apt-get install -y --no-install-recommends \
- git python3-pip libopenmpi-dev libopenblas-base libomp-dev \
+ git python3-pip libopenmpi-dev libopenblas-base libomp-dev libcusparselt0 libcusparselt-dev \
&& rm -rf /var/lib/apt/lists/*
# Create working directory
@@ -30,16 +32,13 @@ COPY . .
RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config
ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
-# Download onnxruntime-gpu 1.18.0 from https://elinux.org/Jetson_Zoo and https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048
-ADD https://nvidia.box.com/shared/static/48dtuob7meiw6ebgfsfqakc9vse62sg4.whl onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl
-
# Pip install onnxruntime-gpu, torch, torchvision and ultralytics
-RUN python3 -m pip install --upgrade pip wheel
-RUN pip install \
- onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl \
- https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.3.0-cp310-cp310-linux_aarch64.whl \
- https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.18.0a0+6043bc2-cp310-cp310-linux_aarch64.whl
-RUN pip install -e ".[export]"
+RUN python3 -m pip install --upgrade pip uv
+RUN uv pip install --system \
+ https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.20.0-cp310-cp310-linux_aarch64.whl \
+ https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.5.0a0+872d972e41.nv24.08-cp310-cp310-linux_aarch64.whl \
+ https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl
+RUN uv pip install --system -e ".[export]"
# Remove extra build files
RUN rm -rf *.whl /root/.config/Ultralytics/persistent_cache.json
diff --git a/docker/Dockerfile-jupyter b/docker/Dockerfile-jupyter
new file mode 100644
index 00000000000..c458ff88480
--- /dev/null
+++ b/docker/Dockerfile-jupyter
@@ -0,0 +1,33 @@
+# Ultralytics YOLO ๐, AGPL-3.0 license
+# Builds ultralytics/ultralytics:latest-jupyter image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
+# Image provides JupyterLab interface for interactive YOLO development and includes tutorial notebooks
+
+# Start from Python-based Ultralytics image for full Python environment
+FROM ultralytics/ultralytics:latest-python
+
+# Install JupyterLab for interactive development
+RUN uv pip install --system jupyterlab
+
+# Create persistent data directory structure
+RUN mkdir /data
+
+# Configure YOLO directories
+RUN mkdir /data/{datasets,weights,runs} && \
+ yolo settings datasets_dir="/data/datasets" weights_dir="/data/weights" runs_dir="/data/runs"
+
+# Start JupyterLab with tutorial notebook
+ENTRYPOINT ["/usr/local/bin/jupyter", "lab", "--allow-root", "--ip=*", "/ultralytics/examples/tutorial.ipynb"]
+
+# Usage Examples -------------------------------------------------------------------------------------------------------
+
+# Build and Push
+# t=ultralytics/ultralytics:latest-jupyter && sudo docker build -f docker/Dockerfile-jupyter -t $t . && sudo docker push $t
+
+# Run
+# t=ultralytics/ultralytics:latest-jupyter && sudo docker run -it --ipc=host -p 8888:8888 $t
+
+# Pull and Run
+# t=ultralytics/ultralytics:latest-jupyter && sudo docker pull $t && sudo docker run -it --ipc=host -p 8888:8888 $t
+
+# Pull and Run with local volume mounted
+# t=ultralytics/ultralytics:latest-jupyter && sudo docker pull $t && sudo docker run -it --ipc=host -p 8888:8888 -v "$(pwd)"/datasets:/data/datasets $t
diff --git a/docker/Dockerfile-python b/docker/Dockerfile-python
index c275dcd9d12..796d1887924 100644
--- a/docker/Dockerfile-python
+++ b/docker/Dockerfile-python
@@ -32,14 +32,14 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config
ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
# Install pip packages
-RUN python3 -m pip install --upgrade pip wheel
-RUN pip install -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu
+RUN pip install uv
+RUN uv pip install --system -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-first-match
# Run exports to AutoInstall packages
RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32
RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32
# Requires Python<=3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991
-RUN pip install "paddlepaddle>=2.6.0" x2paddle
+RUN uv pip install --system "paddlepaddle>=2.6.0" x2paddle
# Remove extra build files
RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
diff --git a/docker/Dockerfile-runner b/docker/Dockerfile-runner
index 642f1a1bae6..5de5ee06507 100644
--- a/docker/Dockerfile-runner
+++ b/docker/Dockerfile-runner
@@ -17,13 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
WORKDIR /actions-runner
# Download and unpack the latest runner from https://github.com/actions/runner
-RUN FILENAME=actions-runner-linux-x64-2.317.0.tar.gz && \
- curl -o $FILENAME -L https://github.com/actions/runner/releases/download/v2.317.0/$FILENAME && \
+RUN FILENAME=actions-runner-linux-x64-2.320.0.tar.gz && \
+ curl -o $FILENAME -L https://github.com/actions/runner/releases/download/v2.320.0/$FILENAME && \
tar xzf $FILENAME && \
rm $FILENAME
# Install runner dependencies
-RUN pip install pytest-cov
+RUN uv pip install --system pytest-cov
RUN ./bin/installdependencies.sh && \
apt-get -y install libicu-dev
@@ -35,7 +35,6 @@ ENTRYPOINT sh -c './config.sh --url https://github.com/ultralytics/ultralytics \
--replace && \
./run.sh'
-
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
diff --git a/docs/README.md b/docs/README.md
index 03285c41b4f..b4eaffcc70c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,5 +1,5 @@
-
+
# ๐ Ultralytics Docs
@@ -10,15 +10,15 @@
[](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml)
[](https://github.com/ultralytics/docs/actions/workflows/format.yml)
-
+
## ๐ ๏ธ Installation
[](https://pypi.org/project/ultralytics/)
-[](https://pepy.tech/project/ultralytics)
+[](https://www.pepy.tech/projects/ultralytics)
[](https://pypi.org/project/ultralytics/)
-To install the ultralytics package in developer mode, ensure you have Git and Python 3 installed on your system. Then, follow these steps:
+To install the Ultralytics package in developer mode, ensure you have Git and Python 3 installed on your system. Then, follow these steps:
1. Clone the ultralytics repository to your local machine using Git:
@@ -38,7 +38,7 @@ To install the ultralytics package in developer mode, ensure you have Git and Py
pip install -e '.[dev]'
```
-- This command installs the ultralytics package along with all development dependencies, allowing you to modify the package code and have the changes immediately reflected in your Python environment.
+- This command installs the Ultralytics package along with all development dependencies, allowing you to modify the package code and have the changes immediately reflected in your Python environment.
## ๐ Building and Serving Locally
@@ -142,5 +142,5 @@ For Ultralytics bug reports and feature requests please visit [GitHub Issues](ht
-
+
diff --git a/docs/build_docs.py b/docs/build_docs.py
index e342312bd6a..68a333b84fe 100644
--- a/docs/build_docs.py
+++ b/docs/build_docs.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Automates the building and post-processing of MkDocs documentation, particularly for projects with multilingual content.
It streamlines the workflow for generating localized versions of the documentation and updating HTML links to ensure
@@ -24,6 +24,7 @@
- This script is built to be run in an environment where Python and MkDocs are installed and properly configured.
"""
+import json
import os
import re
import shutil
@@ -36,27 +37,41 @@
os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs
DOCS = Path(__file__).parent.resolve()
SITE = DOCS.parent / "site"
+LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])")
+
+
+def create_vercel_config():
+ """Create vercel.json in the site directory with customized configuration settings."""
+ config = {"trailingSlash": True}
+ with open(SITE / "vercel.json", "w") as f:
+ json.dump(config, f, indent=2)
def prepare_docs_markdown(clone_repos=True):
"""Build docs using mkdocs."""
- if SITE.exists():
- print(f"Removing existing {SITE}")
- shutil.rmtree(SITE)
+ print("Removing existing build artifacts")
+ shutil.rmtree(SITE, ignore_errors=True)
+ shutil.rmtree(DOCS / "repos", ignore_errors=True)
- # Get hub-sdk repo
if clone_repos:
+ # Get hub-sdk repo
repo = "https://github.com/ultralytics/hub-sdk"
- local_dir = DOCS.parent / Path(repo).name
- if not local_dir.exists():
- os.system(f"git clone {repo} {local_dir}")
- os.system(f"git -C {local_dir} pull") # update repo
+ local_dir = DOCS / "repos" / Path(repo).name
+ os.system(f"git clone {repo} {local_dir} --depth 1 --single-branch --branch main")
shutil.rmtree(DOCS / "en/hub/sdk", ignore_errors=True) # delete if exists
shutil.copytree(local_dir / "docs", DOCS / "en/hub/sdk") # for docs
shutil.rmtree(DOCS.parent / "hub_sdk", ignore_errors=True) # delete if exists
shutil.copytree(local_dir / "hub_sdk", DOCS.parent / "hub_sdk") # for mkdocstrings
print(f"Cloned/Updated {repo} in {local_dir}")
+ # Get docs repo
+ repo = "https://github.com/ultralytics/docs"
+ local_dir = DOCS / "repos" / Path(repo).name
+ os.system(f"git clone {repo} {local_dir} --depth 1 --single-branch --branch main")
+ shutil.rmtree(DOCS / "en/compare", ignore_errors=True) # delete if exists
+ shutil.copytree(local_dir / "docs/en/compare", DOCS / "en/compare") # for docs
+ print(f"Cloned/Updated {repo} in {local_dir}")
+
# Add frontmatter
for file in tqdm((DOCS / "en").rglob("*.md"), desc="Adding frontmatter"):
update_markdown_files(file)
@@ -64,7 +79,6 @@ def prepare_docs_markdown(clone_repos=True):
def update_page_title(file_path: Path, new_title: str):
"""Update the title of an HTML file."""
- # Read the content of the file
with open(file_path, encoding="utf-8") as file:
content = file.read()
@@ -99,7 +113,7 @@ def update_subdir_edit_links(subdir="", docs_url=""):
if str(subdir[0]) == "/":
subdir = str(subdir[0])[1:]
html_files = (SITE / subdir).rglob("*.html")
- for html_file in tqdm(html_files, desc="Processing subdir files"):
+ for html_file in tqdm(html_files, desc="Processing subdir files", mininterval=1.0):
with html_file.open("r", encoding="utf-8") as file:
soup = BeautifulSoup(file, "html.parser")
@@ -155,15 +169,16 @@ def update_docs_html():
# Update 404 titles
update_page_title(SITE / "404.html", new_title="Ultralytics Docs - Not Found")
- # Update edit links
- update_subdir_edit_links(
- subdir="hub/sdk/", # do not use leading slash
- docs_url="https://github.com/ultralytics/hub-sdk/tree/main/docs/",
- )
+ # Update edit button links
+ for subdir, docs_url in (
+ ("hub/sdk/", "https://github.com/ultralytics/hub-sdk/tree/main/docs/"), # do not use leading slash
+ ("compare/", "https://github.com/ultralytics/docs/tree/main/docs/en/compare/"),
+ ):
+ update_subdir_edit_links(subdir=subdir, docs_url=docs_url)
# Convert plaintext links to HTML hyperlinks
files_modified = 0
- for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links"):
+ for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links", mininterval=1.0):
with open(html_file, encoding="utf-8") as file:
content = file.read()
updated_content = convert_plaintext_links_to_html(content)
@@ -198,12 +213,9 @@ def convert_plaintext_links_to_html(content):
for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items
for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks
- new_text = re.sub(
- r'(https?://[^\s()<>]+(?:\.[^\s()<>]+)+)(?\1',
- str(text_node),
- )
- if "\1', str(text_node))
+ if "\n?", r"\n", content, flags=re.DOTALL)
+ pass
+ elif file_type == "css":
+ # Remove CSS comments, preserving newline after comment
+ # content = re.sub(r"/\*.*?\*/\n?", r"\n", content, flags=re.DOTALL)
+ pass
+ elif file_type == "js":
+ # Remove JS single-line comments, preserving newline and URLs
+ # content = re.sub(r"(?
- Watch: How to Train [Image Classification](https://www.ultralytics.com/glossary/image-classification) Model using Caltech-256 Dataset with Ultralytics HUB
+ Watch: How to Train Image Classification Model using Caltech-256 Dataset with Ultralytics HUB
## Key Features
@@ -47,7 +47,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="caltech256", epochs=100, imgsz=416)
@@ -57,7 +57,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
+ yolo classify train data=caltech256 model=yolo11n-cls.pt epochs=100 imgsz=416
```
## Sample Images and Annotations
@@ -106,7 +106,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 [epochs](https://www.ul
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model
# Train the model
results = model.train(data="caltech256", epochs=100, imgsz=416)
@@ -116,7 +116,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 [epochs](https://www.ul
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
+ yolo classify train data=caltech256 model=yolo11n-cls.pt epochs=100 imgsz=416
```
### What are the most common use cases for the Caltech-256 dataset?
@@ -141,6 +141,6 @@ Ultralytics YOLO models offer several advantages for training on the Caltech-256
- **High Accuracy**: YOLO models are known for their state-of-the-art performance in object detection tasks.
- **Speed**: They provide real-time inference capabilities, making them suitable for applications requiring quick predictions.
- **Ease of Use**: With Ultralytics HUB, users can train, validate, and deploy models without extensive coding.
-- **Pretrained Models**: Starting from pretrained models, like `yolov8n-cls.pt`, can significantly reduce training time and improve model [accuracy](https://www.ultralytics.com/glossary/accuracy).
+- **Pretrained Models**: Starting from pretrained models, like `yolo11n-cls.pt`, can significantly reduce training time and improve model [accuracy](https://www.ultralytics.com/glossary/accuracy).
For more details, explore our [comprehensive training guide](../../modes/train.md).
diff --git a/docs/en/datasets/classify/cifar10.md b/docs/en/datasets/classify/cifar10.md
index 7bae78b38a3..e081bc16813 100644
--- a/docs/en/datasets/classify/cifar10.md
+++ b/docs/en/datasets/classify/cifar10.md
@@ -16,7 +16,7 @@ The [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute
allowfullscreen>
- Watch: How to Train an [Image Classification](https://www.ultralytics.com/glossary/image-classification) Model with CIFAR-10 Dataset using Ultralytics YOLOv8
+ Watch: How to Train an Image Classification Model with CIFAR-10 Dataset using Ultralytics YOLO11
## Key Features
@@ -50,7 +50,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="cifar10", epochs=100, imgsz=32)
@@ -60,7 +60,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
+ yolo classify train data=cifar10 model=yolo11n-cls.pt epochs=100 imgsz=32
```
## Sample Images and Annotations
@@ -104,7 +104,7 @@ To train a YOLO model on the CIFAR-10 dataset using Ultralytics, you can follow
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="cifar10", epochs=100, imgsz=32)
@@ -114,7 +114,7 @@ To train a YOLO model on the CIFAR-10 dataset using Ultralytics, you can follow
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
+ yolo classify train data=cifar10 model=yolo11n-cls.pt epochs=100 imgsz=32
```
For more details, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/classify/cifar100.md b/docs/en/datasets/classify/cifar100.md
index a6735bbcc4a..1f4713c458e 100644
--- a/docs/en/datasets/classify/cifar100.md
+++ b/docs/en/datasets/classify/cifar100.md
@@ -8,6 +8,17 @@ keywords: CIFAR-100, dataset, machine learning, computer vision, image classific
The [CIFAR-100](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a significant extension of the CIFAR-10 dataset, composed of 60,000 32x32 color images in 100 different classes. It was developed by researchers at the CIFAR institute, offering a more challenging dataset for more complex machine learning and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
+
+
+
+
+ Watch: How to Train Ultralytics YOLO11 on CIFAR-100 | Step-by-Step Image Classification Tutorial ๐
+
+
## Key Features
- The CIFAR-100 dataset consists of 60,000 images, divided into 100 classes.
@@ -39,7 +50,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 [epochs](https://www.ultr
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="cifar100", epochs=100, imgsz=32)
@@ -49,7 +60,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 [epochs](https://www.ultr
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
+ yolo classify train data=cifar100 model=yolo11n-cls.pt epochs=100 imgsz=32
```
## Sample Images and Annotations
@@ -97,7 +108,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="cifar100", epochs=100, imgsz=32)
@@ -107,7 +118,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
+ yolo classify train data=cifar100 model=yolo11n-cls.pt epochs=100 imgsz=32
```
For a comprehensive list of available arguments, please refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/classify/fashion-mnist.md b/docs/en/datasets/classify/fashion-mnist.md
index 531cd2c1bd8..6c49ceebb5e 100644
--- a/docs/en/datasets/classify/fashion-mnist.md
+++ b/docs/en/datasets/classify/fashion-mnist.md
@@ -16,7 +16,7 @@ The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is
allowfullscreen>
- Watch: How to do [Image Classification](https://www.ultralytics.com/glossary/image-classification) on Fashion MNIST Dataset using Ultralytics YOLOv8
+ Watch: How to do Image Classification on Fashion MNIST Dataset using Ultralytics YOLO11
## Key Features
@@ -37,6 +37,7 @@ The Fashion-MNIST dataset is split into two subsets:
Each training and test example is assigned to one of the following labels:
+```
0. T-shirt/top
1. Trouser
2. Pullover
@@ -47,6 +48,7 @@ Each training and test example is assigned to one of the following labels:
7. Sneaker
8. Bag
9. Ankle boot
+```
## Applications
@@ -64,7 +66,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 [epochs](https://www.u
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="fashion-mnist", epochs=100, imgsz=28)
@@ -74,7 +76,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 [epochs](https://www.u
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+ yolo classify train data=fashion-mnist model=yolo11n-cls.pt epochs=100 imgsz=28
```
## Sample Images and Annotations
@@ -107,7 +109,7 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot
from ultralytics import YOLO
# Load a pretrained model
- model = YOLO("yolov8n-cls.pt")
+ model = YOLO("yolo11n-cls.pt")
# Train the model on Fashion-MNIST
results = model.train(data="fashion-mnist", epochs=100, imgsz=28)
@@ -117,7 +119,7 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot
=== "CLI"
```bash
- yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+ yolo classify train data=fashion-mnist model=yolo11n-cls.pt epochs=100 imgsz=28
```
For more detailed training parameters, refer to the [Training page](../../modes/train.md).
@@ -128,7 +130,7 @@ The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is
### Can I use Ultralytics YOLO for image classification tasks like Fashion-MNIST?
-Yes, Ultralytics YOLO models can be used for image classification tasks, including those involving the Fashion-MNIST dataset. YOLOv8, for example, supports various vision tasks such as detection, segmentation, and classification. To get started with image classification tasks, refer to the [Classification page](https://docs.ultralytics.com/tasks/classify/).
+Yes, Ultralytics YOLO models can be used for image classification tasks, including those involving the Fashion-MNIST dataset. YOLO11, for example, supports various vision tasks such as detection, segmentation, and classification. To get started with image classification tasks, refer to the [Classification page](https://docs.ultralytics.com/tasks/classify/).
### What are the key features and structure of the Fashion-MNIST dataset?
diff --git a/docs/en/datasets/classify/imagenet.md b/docs/en/datasets/classify/imagenet.md
index 76e59b3f183..72c2e2a3b5b 100644
--- a/docs/en/datasets/classify/imagenet.md
+++ b/docs/en/datasets/classify/imagenet.md
@@ -10,13 +10,7 @@ keywords: ImageNet, deep learning, visual recognition, computer vision, pretrain
## ImageNet Pretrained Models
-| Model | size (pixels) | acc top1 | acc top5 | Speed CPU ONNX (ms) | Speed A100 TensorRT (ms) | params (M) | FLOPs (B) at 640 |
-| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ |
-| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt) | 224 | 69.0 | 88.3 | 12.9 | 0.31 | 2.7 | 4.3 |
-| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-cls.pt) | 224 | 73.8 | 91.7 | 23.4 | 0.35 | 6.4 | 13.5 |
-| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-cls.pt) | 224 | 76.8 | 93.5 | 85.4 | 0.62 | 17.0 | 42.7 |
-| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-cls.pt) | 224 | 76.8 | 93.5 | 163.0 | 0.87 | 37.5 | 99.7 |
-| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-cls.pt) | 224 | 79.0 | 94.6 | 232.0 | 1.01 | 57.4 | 154.8 |
+{% include "macros/yolo-cls-perf.md" %}
## Key Features
@@ -49,7 +43,7 @@ To train a deep learning model on the ImageNet dataset for 100 [epochs](https://
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagenet", epochs=100, imgsz=224)
@@ -59,7 +53,7 @@ To train a deep learning model on the ImageNet dataset for 100 [epochs](https://
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
+ yolo classify train data=imagenet model=yolo11n-cls.pt epochs=100 imgsz=224
```
## Sample Images and Annotations
@@ -110,7 +104,7 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagenet", epochs=100, imgsz=224)
@@ -120,14 +114,14 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
+ yolo classify train data=imagenet model=yolo11n-cls.pt epochs=100 imgsz=224
```
For more in-depth training instruction, refer to our [Training page](../../modes/train.md).
-### Why should I use the Ultralytics YOLOv8 pretrained models for my ImageNet dataset projects?
+### Why should I use the Ultralytics YOLO11 pretrained models for my ImageNet dataset projects?
-Ultralytics YOLOv8 pretrained models offer state-of-the-art performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for various computer vision tasks. For example, the YOLOv8n-cls model, with a top-1 accuracy of 69.0% and a top-5 accuracy of 88.3%, is optimized for real-time applications. Pretrained models reduce the computational resources required for training from scratch and accelerate development cycles. Learn more about the performance metrics of YOLOv8 models in the [ImageNet Pretrained Models section](#imagenet-pretrained-models).
+Ultralytics YOLO11 pretrained models offer state-of-the-art performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for various computer vision tasks. For example, the YOLO11n-cls model, with a top-1 accuracy of 69.0% and a top-5 accuracy of 88.3%, is optimized for real-time applications. Pretrained models reduce the computational resources required for training from scratch and accelerate development cycles. Learn more about the performance metrics of YOLO11 models in the [ImageNet Pretrained Models section](#imagenet-pretrained-models).
### How is the ImageNet dataset structured, and why is it important?
diff --git a/docs/en/datasets/classify/imagenet10.md b/docs/en/datasets/classify/imagenet10.md
index 4e40e6655f8..217d56c54b9 100644
--- a/docs/en/datasets/classify/imagenet10.md
+++ b/docs/en/datasets/classify/imagenet10.md
@@ -35,7 +35,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagenet10", epochs=5, imgsz=224)
@@ -45,7 +45,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
+ yolo classify train data=imagenet10 model=yolo11n-cls.pt epochs=5 imgsz=224
```
## Sample Images and Annotations
@@ -94,7 +94,7 @@ To test your deep learning model on the ImageNet10 dataset with an image size of
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagenet10", epochs=5, imgsz=224)
@@ -104,7 +104,7 @@ To test your deep learning model on the ImageNet10 dataset with an image size of
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
+ yolo classify train data=imagenet10 model=yolo11n-cls.pt epochs=5 imgsz=224
```
Refer to the [Training](../../modes/train.md) page for a comprehensive list of available arguments.
diff --git a/docs/en/datasets/classify/imagenette.md b/docs/en/datasets/classify/imagenette.md
index bf371502ad5..dd2af1c3d1b 100644
--- a/docs/en/datasets/classify/imagenette.md
+++ b/docs/en/datasets/classify/imagenette.md
@@ -37,7 +37,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagenette", epochs=100, imgsz=224)
@@ -47,7 +47,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
+ yolo classify train data=imagenette model=yolo11n-cls.pt epochs=100 imgsz=224
```
## Sample Images and Annotations
@@ -72,7 +72,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model with ImageNette160
results = model.train(data="imagenette160", epochs=100, imgsz=160)
@@ -82,7 +82,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
```bash
# Start training from a pretrained *.pt model with ImageNette160
- yolo classify train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160
+ yolo classify train data=imagenette160 model=yolo11n-cls.pt epochs=100 imgsz=160
```
!!! example "Train Example with ImageNette320"
@@ -93,7 +93,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model with ImageNette320
results = model.train(data="imagenette320", epochs=100, imgsz=320)
@@ -103,7 +103,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
```bash
# Start training from a pretrained *.pt model with ImageNette320
- yolo classify train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320
+ yolo classify train data=imagenette320 model=yolo11n-cls.pt epochs=100 imgsz=320
```
These smaller versions of the dataset allow for rapid iterations during the development process while still providing valuable and realistic image classification tasks.
@@ -130,7 +130,7 @@ To train a YOLO model on the ImageNette dataset for 100 [epochs](https://www.ult
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagenette", epochs=100, imgsz=224)
@@ -140,7 +140,7 @@ To train a YOLO model on the ImageNette dataset for 100 [epochs](https://www.ult
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
+ yolo classify train data=imagenette model=yolo11n-cls.pt epochs=100 imgsz=224
```
For more details, see the [Training](../../modes/train.md) documentation page.
@@ -167,7 +167,7 @@ Yes, the ImageNette dataset is also available in two resized versions: ImageNett
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt")
+ model = YOLO("yolo11n-cls.pt")
# Train the model with ImageNette160
results = model.train(data="imagenette160", epochs=100, imgsz=160)
@@ -177,7 +177,7 @@ Yes, the ImageNette dataset is also available in two resized versions: ImageNett
```bash
# Start training from a pretrained *.pt model with ImageNette160
- yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160
+ yolo detect train data=imagenette160 model=yolo11n-cls.pt epochs=100 imgsz=160
```
For more information, refer to [Training with ImageNette160 and ImageNette320](#imagenette160-and-imagenette320).
diff --git a/docs/en/datasets/classify/imagewoof.md b/docs/en/datasets/classify/imagewoof.md
index 2ed0273b605..2e33f44542d 100644
--- a/docs/en/datasets/classify/imagewoof.md
+++ b/docs/en/datasets/classify/imagewoof.md
@@ -34,7 +34,7 @@ To train a CNN model on the ImageWoof dataset for 100 [epochs](https://www.ultra
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="imagewoof", epochs=100, imgsz=224)
@@ -44,7 +44,7 @@ To train a CNN model on the ImageWoof dataset for 100 [epochs](https://www.ultra
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
+ yolo classify train data=imagewoof model=yolo11n-cls.pt epochs=100 imgsz=224
```
## Dataset Variants
@@ -67,7 +67,7 @@ To use these variants in your training, simply replace 'imagewoof' in the datase
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# For medium-sized dataset
model.train(data="imagewoof320", epochs=100, imgsz=224)
@@ -80,7 +80,7 @@ To use these variants in your training, simply replace 'imagewoof' in the datase
```bash
# Load a pretrained model and train on the small-sized dataset
- yolo classify train model=yolov8n-cls.pt data=imagewoof320 epochs=100 imgsz=224
+ yolo classify train model=yolo11n-cls.pt data=imagewoof320 epochs=100 imgsz=224
```
It's important to note that using smaller images will likely yield lower performance in terms of classification accuracy. However, it's an excellent way to iterate quickly in the early stages of model development and prototyping.
@@ -116,7 +116,7 @@ To train a [Convolutional Neural Network](https://www.ultralytics.com/glossary/c
```python
from ultralytics import YOLO
- model = YOLO("yolov8n-cls.pt") # Load a pretrained model
+ model = YOLO("yolo11n-cls.pt") # Load a pretrained model
results = model.train(data="imagewoof", epochs=100, imgsz=224)
```
@@ -124,7 +124,7 @@ To train a [Convolutional Neural Network](https://www.ultralytics.com/glossary/c
=== "CLI"
```bash
- yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
+ yolo classify train data=imagewoof model=yolo11n-cls.pt epochs=100 imgsz=224
```
For more details on available training arguments, refer to the [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/classify/index.md b/docs/en/datasets/classify/index.md
index 3567d6a2952..e8876ce9ebf 100644
--- a/docs/en/datasets/classify/index.md
+++ b/docs/en/datasets/classify/index.md
@@ -86,7 +86,7 @@ This structured approach ensures that the model can effectively learn from well-
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="path/to/dataset", epochs=100, imgsz=640)
@@ -96,7 +96,7 @@ This structured approach ensures that the model can effectively learn from well-
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=path/to/data model=yolov8n-cls.pt epochs=100 imgsz=640
+ yolo detect train data=path/to/data model=yolo11n-cls.pt epochs=100 imgsz=640
```
## Supported Datasets
@@ -113,6 +113,7 @@ Ultralytics supports the following datasets with automatic download:
- [Imagenette](imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing.
- [Imagewoof](imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks.
- [MNIST](mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks.
+- [MNIST160](mnist.md): First 8 images of each MNIST category from the MNIST dataset. Dataset contains 160 images total.
### Adding your own dataset
@@ -170,7 +171,7 @@ To use your own dataset with Ultralytics YOLO, ensure it follows the specified d
from ultralytics import YOLO
# Load a model
-model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="path/to/your/dataset", epochs=100, imgsz=640)
@@ -182,7 +183,7 @@ More details can be found in the [Adding your own dataset](#adding-your-own-data
Ultralytics YOLO offers several benefits for image classification, including:
-- **Pretrained Models**: Load pretrained models like `yolov8n-cls.pt` to jump-start your training process.
+- **Pretrained Models**: Load pretrained models like `yolo11n-cls.pt` to jump-start your training process.
- **Ease of Use**: Simple API and CLI commands for training and evaluation.
- **High Performance**: State-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, ideal for real-time applications.
- **Support for Multiple Datasets**: Seamless integration with various popular datasets like CIFAR-10, ImageNet, and more.
@@ -202,7 +203,7 @@ Training a model using Ultralytics YOLO can be done easily in both Python and CL
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model
# Train the model
results = model.train(data="path/to/dataset", epochs=100, imgsz=640)
@@ -213,7 +214,7 @@ Training a model using Ultralytics YOLO can be done easily in both Python and CL
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=path/to/data model=yolov8n-cls.pt epochs=100 imgsz=640
+ yolo detect train data=path/to/data model=yolo11n-cls.pt epochs=100 imgsz=640
```
These examples demonstrate the straightforward process of training a YOLO model using either approach. For more information, visit the [Usage](#usage) section.
diff --git a/docs/en/datasets/classify/mnist.md b/docs/en/datasets/classify/mnist.md
index 07f0a70a1d6..356fdc4f75c 100644
--- a/docs/en/datasets/classify/mnist.md
+++ b/docs/en/datasets/classify/mnist.md
@@ -6,7 +6,7 @@ keywords: MNIST, dataset, handwritten digits, image classification, deep learnin
# MNIST Dataset
-The [MNIST](http://yann.lecun.com/exdb/mnist/) (Modified National Institute of Standards and Technology) dataset is a large database of handwritten digits that is commonly used for training various image processing systems and machine learning models. It was created by "re-mixing" the samples from NIST's original datasets and has become a benchmark for evaluating the performance of image classification algorithms.
+The [MNIST](https://en.wikipedia.org/wiki/MNIST_database) (Modified National Institute of Standards and Technology) dataset is a large database of handwritten digits that is commonly used for training various image processing systems and machine learning models. It was created by "re-mixing" the samples from NIST's original datasets and has become a benchmark for evaluating the performance of image classification algorithms.
## Key Features
@@ -42,7 +42,7 @@ To train a CNN model on the MNIST dataset for 100 [epochs](https://www.ultralyti
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="mnist", epochs=100, imgsz=32)
@@ -52,7 +52,7 @@ To train a CNN model on the MNIST dataset for 100 [epochs](https://www.ultralyti
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+ yolo classify train data=mnist model=yolo11n-cls.pt epochs=100 imgsz=28
```
## Sample Images and Annotations
@@ -83,13 +83,13 @@ research or development work, please cite the following paper:
}
```
-We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](http://yann.lecun.com/exdb/mnist/).
+We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](https://en.wikipedia.org/wiki/MNIST_database).
## FAQ
### What is the MNIST dataset, and why is it important in machine learning?
-The [MNIST](http://yann.lecun.com/exdb/mnist/) dataset, or Modified National Institute of Standards and Technology dataset, is a widely-used collection of handwritten digits designed for training and testing image classification systems. It includes 60,000 training images and 10,000 testing images, all of which are grayscale and 28x28 pixels in size. The dataset's importance lies in its role as a standard benchmark for evaluating image classification algorithms, helping researchers and engineers to compare methods and track progress in the field.
+The [MNIST](https://en.wikipedia.org/wiki/MNIST_database) dataset, or Modified National Institute of Standards and Technology dataset, is a widely-used collection of handwritten digits designed for training and testing image classification systems. It includes 60,000 training images and 10,000 testing images, all of which are grayscale and 28x28 pixels in size. The dataset's importance lies in its role as a standard benchmark for evaluating image classification algorithms, helping researchers and engineers to compare methods and track progress in the field.
### How can I use Ultralytics YOLO to train a model on the MNIST dataset?
@@ -103,7 +103,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="mnist", epochs=100, imgsz=32)
@@ -113,7 +113,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the
```bash
# Start training from a pretrained *.pt model
- yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+ yolo classify train data=mnist model=yolo11n-cls.pt epochs=100 imgsz=28
```
For a detailed list of available training arguments, refer to the [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/african-wildlife.md b/docs/en/datasets/detect/african-wildlife.md
index 14a066b14b7..519b8cfa672 100644
--- a/docs/en/datasets/detect/african-wildlife.md
+++ b/docs/en/datasets/detect/african-wildlife.md
@@ -1,13 +1,24 @@
---
comments: true
description: Explore our African Wildlife Dataset featuring images of buffalo, elephant, rhino, and zebra for training computer vision models. Ideal for research and conservation.
-keywords: African Wildlife Dataset, South African animals, object detection, computer vision, YOLOv8, wildlife research, conservation, dataset
+keywords: African Wildlife Dataset, South African animals, object detection, computer vision, YOLO11, wildlife research, conservation, dataset
---
# African Wildlife Dataset
This dataset showcases four common animal classes typically found in South African nature reserves. It includes images of African wildlife such as buffalo, elephant, rhino, and zebra, providing valuable insights into their characteristics. Essential for training [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) algorithms, this dataset aids in identifying animals in various habitats, from zoos to forests, and supports wildlife research.
+
+
## Dataset Structure
The African wildlife objects detection dataset is split into three subsets:
@@ -32,7 +43,7 @@ A YAML (Yet Another Markup Language) file defines the dataset configuration, inc
## Usage
-To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
+To train a YOLO11n model on the African wildlife dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -42,7 +53,7 @@ To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https:
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="african-wildlife.yaml", epochs=100, imgsz=640)
@@ -52,7 +63,7 @@ To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https:
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=african-wildlife.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=african-wildlife.yaml model=yolo11n.pt epochs=100 imgsz=640
```
!!! example "Inference Example"
@@ -96,9 +107,9 @@ The dataset has been released available under the [AGPL-3.0 License](https://git
The African Wildlife Dataset includes images of four common animal species found in South African nature reserves: buffalo, elephant, rhino, and zebra. It is a valuable resource for training computer vision algorithms in object detection and animal identification. The dataset supports various tasks like object tracking, research, and conservation efforts. For more information on its structure and applications, refer to the [Dataset Structure](#dataset-structure) section and [Applications](#applications) of the dataset.
-### How do I train a YOLOv8 model using the African Wildlife Dataset?
+### How do I train a YOLO11 model using the African Wildlife Dataset?
-You can train a YOLOv8 model on the African Wildlife Dataset by using the `african-wildlife.yaml` configuration file. Below is an example of how to train the YOLOv8n model for 100 epochs with an image size of 640:
+You can train a YOLO11 model on the African Wildlife Dataset by using the `african-wildlife.yaml` configuration file. Below is an example of how to train the YOLO11n model for 100 epochs with an image size of 640:
!!! example
@@ -108,7 +119,7 @@ You can train a YOLOv8 model on the African Wildlife Dataset by using the `afric
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="african-wildlife.yaml", epochs=100, imgsz=640)
@@ -118,7 +129,7 @@ You can train a YOLOv8 model on the African Wildlife Dataset by using the `afric
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=african-wildlife.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=african-wildlife.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For additional training parameters and options, refer to the [Training](../../modes/train.md) documentation.
diff --git a/docs/en/datasets/detect/argoverse.md b/docs/en/datasets/detect/argoverse.md
index a834be90edd..4280b09a25f 100644
--- a/docs/en/datasets/detect/argoverse.md
+++ b/docs/en/datasets/detect/argoverse.md
@@ -43,7 +43,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the Argoverse dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -53,7 +53,7 @@ To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.u
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="Argoverse.yaml", epochs=100, imgsz=640)
@@ -63,7 +63,7 @@ To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.u
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=Argoverse.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=Argoverse.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -104,7 +104,7 @@ The [Argoverse](https://www.argoverse.org/) dataset, developed by Argo AI, suppo
### How can I train an Ultralytics YOLO model using the Argoverse dataset?
-To train a YOLOv8 model with the Argoverse dataset, use the provided YAML configuration file and the following code:
+To train a YOLO11 model with the Argoverse dataset, use the provided YAML configuration file and the following code:
!!! example "Train Example"
@@ -114,7 +114,7 @@ To train a YOLOv8 model with the Argoverse dataset, use the provided YAML config
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="Argoverse.yaml", epochs=100, imgsz=640)
@@ -125,7 +125,7 @@ To train a YOLOv8 model with the Argoverse dataset, use the provided YAML config
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=Argoverse.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=Argoverse.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For a detailed explanation of the arguments, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/brain-tumor.md b/docs/en/datasets/detect/brain-tumor.md
index 9f108e73882..cb06b9c09e7 100644
--- a/docs/en/datasets/detect/brain-tumor.md
+++ b/docs/en/datasets/detect/brain-tumor.md
@@ -6,6 +6,8 @@ keywords: brain tumor dataset, MRI scans, CT scans, brain tumor detection, medic
# Brain Tumor Dataset
+
+
A brain tumor detection dataset consists of medical images from MRI or CT scans, containing information about brain tumor presence, location, and characteristics. This dataset is essential for training [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) algorithms to automate brain tumor identification, aiding in early diagnosis and treatment planning.
@@ -42,7 +44,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page.
+To train a YOLO11n model on the brain tumor dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -52,7 +54,7 @@ To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="brain-tumor.yaml", epochs=100, imgsz=640)
@@ -62,7 +64,7 @@ To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=brain-tumor.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=brain-tumor.yaml model=yolo11n.pt epochs=100 imgsz=640
```
!!! example "Inference Example"
@@ -106,9 +108,9 @@ The dataset has been released available under the [AGPL-3.0 License](https://git
The brain tumor dataset is divided into two subsets: the **training set** consists of 893 images with corresponding annotations, while the **testing set** comprises 223 images with paired annotations. This structured division aids in developing robust and accurate computer vision models for detecting brain tumors. For more information on the dataset structure, visit the [Dataset Structure](#dataset-structure) section.
-### How can I train a YOLOv8 model on the brain tumor dataset using Ultralytics?
+### How can I train a YOLO11 model on the brain tumor dataset using Ultralytics?
-You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an image size of 640px using both Python and CLI methods. Below are the examples for both:
+You can train a YOLO11 model on the brain tumor dataset for 100 epochs with an image size of 640px using both Python and CLI methods. Below are the examples for both:
!!! example "Train Example"
@@ -118,7 +120,7 @@ You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an i
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="brain-tumor.yaml", epochs=100, imgsz=640)
@@ -129,7 +131,7 @@ You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an i
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=brain-tumor.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=brain-tumor.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For a detailed list of available arguments, refer to the [Training](../../modes/train.md) page.
@@ -138,9 +140,9 @@ For a detailed list of available arguments, refer to the [Training](../../modes/
Using the brain tumor dataset in AI projects enables early diagnosis and treatment planning for brain tumors. It helps in automating brain tumor identification through computer vision, facilitating accurate and timely medical interventions, and supporting personalized treatment strategies. This application holds significant potential in improving patient outcomes and medical efficiencies.
-### How do I perform inference using a fine-tuned YOLOv8 model on the brain tumor dataset?
+### How do I perform inference using a fine-tuned YOLO11 model on the brain tumor dataset?
-Inference using a fine-tuned YOLOv8 model can be performed with either Python or CLI approaches. Here are the examples:
+Inference using a fine-tuned YOLO11 model can be performed with either Python or CLI approaches. Here are the examples:
!!! example "Inference Example"
diff --git a/docs/en/datasets/detect/coco.md b/docs/en/datasets/detect/coco.md
index d0901428387..9af5207d618 100644
--- a/docs/en/datasets/detect/coco.md
+++ b/docs/en/datasets/detect/coco.md
@@ -21,13 +21,7 @@ The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is
## COCO Pretrained Models
-| Model | size (pixels) | mAPval 50-95 | Speed CPU ONNX (ms) | Speed A100 TensorRT (ms) | params (M) | FLOPs (B) |
-| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 |
-| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 |
-| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 |
-| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 |
-| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 |
+{% include "macros/yolo-det-perf.md" %}
## Key Features
@@ -60,7 +54,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the COCO dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -70,7 +64,7 @@ To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultral
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco.yaml", epochs=100, imgsz=640)
@@ -80,7 +74,7 @@ To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultral
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=coco.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=coco.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -122,7 +116,7 @@ The [COCO dataset](https://cocodataset.org/#home) (Common Objects in Context) is
### How can I train a YOLO model using the COCO dataset?
-To train a YOLOv8 model using the COCO dataset, you can use the following code snippets:
+To train a YOLO11 model using the COCO dataset, you can use the following code snippets:
!!! example "Train Example"
@@ -132,7 +126,7 @@ To train a YOLOv8 model using the COCO dataset, you can use the following code s
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco.yaml", epochs=100, imgsz=640)
@@ -142,7 +136,7 @@ To train a YOLOv8 model using the COCO dataset, you can use the following code s
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=coco.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=coco.yaml model=yolo11n.pt epochs=100 imgsz=640
```
Refer to the [Training page](../../modes/train.md) for more details on available arguments.
@@ -156,13 +150,15 @@ The COCO dataset includes:
- Standardized evaluation metrics for object detection (mAP) and segmentation (mean Average Recall, mAR).
- **Mosaicing** technique in training batches to enhance model generalization across various object sizes and contexts.
-### Where can I find pretrained YOLOv8 models trained on the COCO dataset?
+### Where can I find pretrained YOLO11 models trained on the COCO dataset?
-Pretrained YOLOv8 models on the COCO dataset can be downloaded from the links provided in the documentation. Examples include:
+Pretrained YOLO11 models on the COCO dataset can be downloaded from the links provided in the documentation. Examples include:
-- [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt)
-- [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt)
-- [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt)
+- [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt)
+- [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt)
+- [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt)
+- [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt)
+- [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt)
These models vary in size, mAP, and inference speed, providing options for different performance and resource requirements.
diff --git a/docs/en/datasets/detect/coco8.md b/docs/en/datasets/detect/coco8.md
index 4a8ad5a8522..b6b7a5a0f12 100644
--- a/docs/en/datasets/detect/coco8.md
+++ b/docs/en/datasets/detect/coco8.md
@@ -1,7 +1,7 @@
---
comments: true
description: Explore the Ultralytics COCO8 dataset, a versatile and manageable set of 8 images perfect for testing object detection models and training pipelines.
-keywords: COCO8, Ultralytics, dataset, object detection, YOLOv8, training, validation, machine learning, computer vision
+keywords: COCO8, Ultralytics, dataset, object detection, YOLO11, training, validation, machine learning, computer vision
---
# COCO8 Dataset
@@ -21,7 +21,7 @@ keywords: COCO8, Ultralytics, dataset, object detection, YOLOv8, training, valid
Watch: Ultralytics COCO Dataset Overview
-This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics).
+This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
## Dataset YAML
@@ -35,7 +35,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -45,7 +45,7 @@ To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultra
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
@@ -55,7 +55,7 @@ To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultra
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -95,9 +95,9 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th
The Ultralytics COCO8 dataset is a compact yet versatile object detection dataset consisting of the first 8 images from the COCO train 2017 set, with 4 images for training and 4 for validation. It is designed for testing and debugging object detection models and experimentation with new detection approaches. Despite its small size, COCO8 offers enough diversity to act as a sanity check for your training pipelines before deploying larger datasets. For more details, view the [COCO8 dataset](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8.yaml).
-### How do I train a YOLOv8 model using the COCO8 dataset?
+### How do I train a YOLO11 model using the COCO8 dataset?
-To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or CLI commands. Here's how you can start:
+To train a YOLO11 model using the COCO8 dataset, you can employ either Python or CLI commands. Here's how you can start:
!!! example "Train Example"
@@ -107,7 +107,7 @@ To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
@@ -117,19 +117,19 @@ To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
### Why should I use Ultralytics HUB for managing my COCO8 training?
-Ultralytics HUB is an all-in-one web tool designed to simplify the training and deployment of YOLO models, including the Ultralytics YOLOv8 models on the COCO8 dataset. It offers cloud training, real-time tracking, and seamless dataset management. HUB allows you to start training with a single click and avoids the complexities of manual setups. Discover more about [Ultralytics HUB](https://hub.ultralytics.com/) and its benefits.
+Ultralytics HUB is an all-in-one web tool designed to simplify the training and deployment of YOLO models, including the Ultralytics YOLO11 models on the COCO8 dataset. It offers cloud training, real-time tracking, and seamless dataset management. HUB allows you to start training with a single click and avoids the complexities of manual setups. Discover more about [Ultralytics HUB](https://hub.ultralytics.com/) and its benefits.
### What are the benefits of using mosaic augmentation in training with the COCO8 dataset?
Mosaic augmentation, demonstrated in the COCO8 dataset, combines multiple images into a single image during training. This technique increases the variety of objects and scenes in each training batch, improving the model's ability to generalize across different object sizes, aspect ratios, and contexts. This results in a more robust object detection model. For more details, refer to the [training guide](#usage).
-### How can I validate my YOLOv8 model trained on the COCO8 dataset?
+### How can I validate my YOLO11 model trained on the COCO8 dataset?
-Validation of your YOLOv8 model trained on the COCO8 dataset can be performed using the model's validation commands. You can invoke the validation mode via CLI or Python script to evaluate the model's performance using precise metrics. For detailed instructions, visit the [Validation](../../modes/val.md) page.
+Validation of your YOLO11 model trained on the COCO8 dataset can be performed using the model's validation commands. You can invoke the validation mode via CLI or Python script to evaluate the model's performance using precise metrics. For detailed instructions, visit the [Validation](../../modes/val.md) page.
diff --git a/docs/en/datasets/detect/globalwheat2020.md b/docs/en/datasets/detect/globalwheat2020.md
index ef7ff7ac310..e744b8d666a 100644
--- a/docs/en/datasets/detect/globalwheat2020.md
+++ b/docs/en/datasets/detect/globalwheat2020.md
@@ -38,7 +38,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the Global Wheat Head Dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -48,7 +48,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="GlobalWheat2020.yaml", epochs=100, imgsz=640)
@@ -58,7 +58,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=GlobalWheat2020.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=GlobalWheat2020.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -96,9 +96,9 @@ We would like to acknowledge the researchers and institutions that contributed t
The Global Wheat Head Dataset is primarily used for developing and training deep learning models aimed at wheat head detection. This is crucial for applications in wheat phenotyping and crop management, allowing for more accurate estimations of wheat head density, size, and overall crop yield potential. Accurate detection methods help in assessing crop health and maturity, essential for efficient crop management.
-### How do I train a YOLOv8n model on the Global Wheat Head Dataset?
+### How do I train a YOLO11n model on the Global Wheat Head Dataset?
-To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the following code snippets. Make sure you have the `GlobalWheat2020.yaml` configuration file specifying dataset paths and classes:
+To train a YOLO11n model on the Global Wheat Head Dataset, you can use the following code snippets. Make sure you have the `GlobalWheat2020.yaml` configuration file specifying dataset paths and classes:
!!! example "Train Example"
@@ -108,7 +108,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the follo
from ultralytics import YOLO
# Load a pre-trained model (recommended for training)
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Train the model
results = model.train(data="GlobalWheat2020.yaml", epochs=100, imgsz=640)
@@ -118,7 +118,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the follo
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=GlobalWheat2020.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=GlobalWheat2020.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/index.md b/docs/en/datasets/detect/index.md
index 61640480f61..d5408112b26 100644
--- a/docs/en/datasets/detect/index.md
+++ b/docs/en/datasets/detect/index.md
@@ -16,7 +16,7 @@ The Ultralytics YOLO format is a dataset configuration format that allows you to
```yaml
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco8 # dataset root dir
+path: ../datasets/coco8 # dataset root dir (absolute or relative; if relative, it's relative to default datasets_dir)
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
@@ -56,7 +56,7 @@ Here's how you can use these formats to train your model:
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
@@ -66,7 +66,7 @@ Here's how you can use these formats to train your model:
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Supported Datasets
@@ -89,6 +89,7 @@ Here is a list of the supported datasets and a brief description for each:
- [Brain-tumor](brain-tumor.md): A dataset for detecting brain tumors includes MRI or CT scan images with details on tumor presence, location, and characteristics.
- [African-wildlife](african-wildlife.md): A dataset featuring images of African wildlife, including buffalo, elephant, rhino, and zebras.
- [Signature](signature.md): A dataset featuring images of various documents with annotated signatures, supporting document verification and fraud detection research.
+- [Medical-pills](medical-pills.md): A dataset featuring images of medical-pills, annotated for applications such as pharmaceutical quality assurance, pill sorting, and regulatory compliance.
### Adding your own dataset
@@ -158,11 +159,11 @@ Ultralytics YOLO supports a wide range of datasets, including:
- [Objects365](objects365.md)
- [OpenImagesV7](open-images-v7.md)
-Each dataset page provides detailed information on the structure and usage tailored for efficient YOLOv8 training. Explore the full list in the [Supported Datasets](#supported-datasets) section.
+Each dataset page provides detailed information on the structure and usage tailored for efficient YOLO11 training. Explore the full list in the [Supported Datasets](#supported-datasets) section.
-### How do I start training a YOLOv8 model using my dataset?
+### How do I start training a YOLO11 model using my dataset?
-To start training a YOLOv8 model, ensure your dataset is formatted correctly and the paths are defined in a YAML file. Use the following script to begin training:
+To start training a YOLO11 model, ensure your dataset is formatted correctly and the paths are defined in a YAML file. Use the following script to begin training:
!!! example
@@ -171,18 +172,18 @@ To start training a YOLOv8 model, ensure your dataset is formatted correctly and
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt") # Load a pretrained model
+ model = YOLO("yolo11n.pt") # Load a pretrained model
results = model.train(data="path/to/your_dataset.yaml", epochs=100, imgsz=640)
```
=== "CLI"
```bash
- yolo detect train data=path/to/your_dataset.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=path/to/your_dataset.yaml model=yolo11n.pt epochs=100 imgsz=640
```
Refer to the [Usage](#usage) section for more details on utilizing different modes, including CLI commands.
### Where can I find practical examples of using Ultralytics YOLO for object detection?
-Ultralytics provides numerous examples and practical guides for using YOLOv8 in diverse applications. For a comprehensive overview, visit the [Ultralytics Blog](https://www.ultralytics.com/blog) where you can find case studies, detailed tutorials, and community stories showcasing object detection, segmentation, and more with YOLOv8. For specific examples, check the [Usage](../../modes/predict.md) section in the documentation.
+Ultralytics provides numerous examples and practical guides for using YOLO11 in diverse applications. For a comprehensive overview, visit the [Ultralytics Blog](https://www.ultralytics.com/blog) where you can find case studies, detailed tutorials, and community stories showcasing object detection, segmentation, and more with YOLO11. For specific examples, check the [Usage](../../modes/predict.md) section in the documentation.
diff --git a/docs/en/datasets/detect/lvis.md b/docs/en/datasets/detect/lvis.md
index c4a4ff76ed3..7bcfd088eb0 100644
--- a/docs/en/datasets/detect/lvis.md
+++ b/docs/en/datasets/detect/lvis.md
@@ -56,7 +56,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the LVIS dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -66,7 +66,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultral
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="lvis.yaml", epochs=100, imgsz=640)
@@ -76,7 +76,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultral
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=lvis.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=lvis.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -114,9 +114,9 @@ We would like to acknowledge the LVIS Consortium for creating and maintaining th
The [LVIS dataset](https://www.lvisdataset.org/) is a large-scale dataset with fine-grained vocabulary-level annotations developed by Facebook AI Research (FAIR). It is primarily used for object detection and instance segmentation, featuring over 1203 object categories and 2 million instance annotations. Researchers and practitioners use it to train and benchmark models like Ultralytics YOLO for advanced computer vision tasks. The dataset's extensive size and diversity make it an essential resource for pushing the boundaries of model performance in detection and segmentation.
-### How can I train a YOLOv8n model using the LVIS dataset?
+### How can I train a YOLO11n model using the LVIS dataset?
-To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size of 640, follow the example below. This process utilizes Ultralytics' framework, which offers comprehensive training features.
+To train a YOLO11n model on the LVIS dataset for 100 epochs with an image size of 640, follow the example below. This process utilizes Ultralytics' framework, which offers comprehensive training features.
!!! example "Train Example"
@@ -126,7 +126,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size o
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="lvis.yaml", epochs=100, imgsz=640)
@@ -137,7 +137,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size o
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=lvis.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=lvis.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For detailed training configurations, refer to the [Training](../../modes/train.md) documentation.
@@ -148,7 +148,7 @@ The images in the LVIS dataset are the same as those in the [COCO dataset](./coc
### Why should I use Ultralytics YOLO for training on the LVIS dataset?
-Ultralytics YOLO models, including the latest YOLOv8, are optimized for real-time object detection with state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. They support a wide range of annotations, such as the fine-grained ones provided by the LVIS dataset, making them ideal for advanced computer vision applications. Moreover, Ultralytics offers seamless integration with various [training](../../modes/train.md), [validation](../../modes/val.md), and [prediction](../../modes/predict.md) modes, ensuring efficient model development and deployment.
+Ultralytics YOLO models, including the latest YOLO11, are optimized for real-time object detection with state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. They support a wide range of annotations, such as the fine-grained ones provided by the LVIS dataset, making them ideal for advanced computer vision applications. Moreover, Ultralytics offers seamless integration with various [training](../../modes/train.md), [validation](../../modes/val.md), and [prediction](../../modes/predict.md) modes, ensuring efficient model development and deployment.
### Can I see some sample annotations from the LVIS dataset?
diff --git a/docs/en/datasets/detect/medical-pills.md b/docs/en/datasets/detect/medical-pills.md
new file mode 100644
index 00000000000..c32aabf2f72
--- /dev/null
+++ b/docs/en/datasets/detect/medical-pills.md
@@ -0,0 +1,147 @@
+---
+comments: true
+description: Explore the medical-pills detection dataset with labeled images. Essential for training AI models for pharmaceutical identification and automation.
+keywords: medical-pills dataset, pill detection, pharmaceutical imaging, AI in healthcare, computer vision, object detection, medical automation, dataset for training
+---
+
+# Medical Pills Dataset
+
+
+
+The medical-pills detection dataset is a proof-of-concept (POC) dataset, carefully curated to demonstrate the potential of AI in pharmaceutical applications. It contains labeled images specifically designed to train [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) [models](https://docs.ultralytics.com/models/) for identifying medical-pills.
+
+
+
+
+
+ Watch: How to train Ultralytics YOLO11 Model on Medical Pills Detection Dataset in Google Colab
+
+
+This dataset serves as a foundational resource for automating essential [tasks](https://docs.ultralytics.com/tasks/) such as quality control, packaging automation, and efficient sorting in pharmaceutical workflows. By integrating this dataset into projects, researchers and developers can explore innovative [solutions](https://docs.ultralytics.com/solutions/) that enhance [accuracy](https://www.ultralytics.com/glossary/accuracy), streamline operations, and ultimately contribute to improved healthcare outcomes.
+
+## Dataset Structure
+
+The medical-pills dataset is divided into two subsets:
+
+- **Training set**: Consisting of 92 images, each annotated with the class `pill`.
+- **Validation set**: Comprising 23 images with corresponding annotations.
+
+## Applications
+
+Using computer vision for medical-pills detection enables automation in the pharmaceutical industry, supporting tasks like:
+
+- **Pharmaceutical Sorting**: Automating the sorting of pills based on size, shape, or color to enhance production efficiency.
+- **AI Research and Development**: Serving as a benchmark for developing and testing computer vision algorithms in pharmaceutical use cases.
+- **Digital Inventory Systems**: Powering smart inventory solutions by integrating automated pill recognition for real-time stock monitoring and replenishment planning.
+
+## Dataset YAML
+
+A YAML configuration file is provided to define the dataset's structure, including paths and classes. For the medical-pills dataset, the `medical-pills.yaml` file can be accessed at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/medical-pills.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/medical-pills.yaml).
+
+!!! example "ultralytics/cfg/datasets/medical-pills.yaml"
+
+ ```yaml
+ --8<-- "ultralytics/cfg/datasets/medical-pills.yaml"
+ ```
+
+## Usage
+
+To train a YOLO11n model on the medical-pills dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the following examples. For detailed arguments, refer to the model's [Training](../../modes/train.md) page.
+
+!!! example "Train Example"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a model
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
+
+ # Train the model
+ results = model.train(data="medical-pills.yaml", epochs=100, imgsz=640)
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Start training from a pretrained *.pt model
+ yolo detect train data=medical-pills.yaml model=yolo11n.pt epochs=100 imgsz=640
+ ```
+
+!!! example "Inference Example"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a model
+ model = YOLO("path/to/best.pt") # load a fine-tuned model
+
+ # Inference using the model
+ results = model.predict("https://ultralytics.com/assets/medical-pills-sample.jpg")
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Start prediction with a fine-tuned *.pt model
+ yolo detect predict model='path/to/best.pt' imgsz=640 source="https://ultralytics.com/assets/medical-pills-sample.jpg"
+ ```
+
+## Sample Images and Annotations
+
+The medical-pills dataset features labeled images showcasing the diversity of pills. Below is an example of a labeled image from the dataset:
+
+
+
+- **Mosaiced Image**: Displayed is a training batch comprising mosaiced dataset images. Mosaicing enhances training diversity by consolidating multiple images into one, improving model generalization.
+
+## Citations and Acknowledgments
+
+The dataset is available under the [AGPL-3.0 License](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
+
+If you use the Medical-pills dataset in your research or development work, please cite it using the mentioned details:
+
+!!! quote ""
+
+ === "BibTeX"
+
+ ```bibtex
+ @dataset{Jocher_Ultralytics_Datasets_2024,
+ author = {Jocher, Glenn and Rizwan, Muhammad},
+ license = {AGPL-3.0},
+ month = {Dec},
+ title = {Ultralytics Datasets: Medical-pills Detection Dataset},
+ url = {https://docs.ultralytics.com/datasets/detect/medical-pills/},
+ version = {1.0.0},
+ year = {2024}
+ }
+ ```
+
+## FAQ
+
+### What is the structure of the medical-pills dataset?
+
+The dataset includes 92 images for training and 23 images for validation. Each image is annotated with the class `pill`, enabling effective training and evaluation of models.
+
+### How can I train a YOLO11 model on the medical-pills dataset?
+
+You can train a YOLO11 model for 100 epochs with an image size of 640px using the Python or CLI methods provided. Refer to the [Training Example](#usage) section for detailed instructions.
+
+### What are the benefits of using the medical-pills dataset in AI projects?
+
+The dataset enables automation in pill detection, contributing to counterfeit prevention, quality assurance, and pharmaceutical process optimization.
+
+### How do I perform inference on the medical-pills dataset?
+
+Inference can be done using Python or CLI methods with a fine-tuned YOLO11 model. Refer to the [Inference Example](#usage) section for code snippets.
+
+### Where can I find the YAML configuration file for the medical-pills dataset?
+
+The YAML file is available at [medical-pills.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/medical-pills.yaml), containing dataset paths, classes, and additional configuration details.
diff --git a/docs/en/datasets/detect/objects365.md b/docs/en/datasets/detect/objects365.md
index 49947617afe..96e6f3140c7 100644
--- a/docs/en/datasets/detect/objects365.md
+++ b/docs/en/datasets/detect/objects365.md
@@ -1,7 +1,7 @@
---
comments: true
description: Explore the Objects365 Dataset with 2M images and 30M bounding boxes across 365 categories. Enhance your object detection models with diverse, high-quality data.
-keywords: Objects365 dataset, object detection, machine learning, deep learning, computer vision, annotated images, bounding boxes, YOLOv8, high-resolution images, dataset configuration
+keywords: Objects365 dataset, object detection, machine learning, deep learning, computer vision, annotated images, bounding boxes, YOLO11, high-resolution images, dataset configuration
---
# Objects365 Dataset
@@ -38,7 +38,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the Objects365 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -48,7 +48,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www.
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="Objects365.yaml", epochs=100, imgsz=640)
@@ -58,7 +58,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www.
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=Objects365.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=Objects365.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -97,9 +97,9 @@ We would like to acknowledge the team of researchers who created and maintain th
The [Objects365 dataset](https://www.objects365.org/) is designed for object detection tasks in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and computer vision. It provides a large-scale, high-quality dataset with 2 million annotated images and 30 million bounding boxes across 365 categories. Leveraging such a diverse dataset helps improve the performance and generalization of object detection models, making it invaluable for research and development in the field.
-### How can I train a YOLOv8 model on the Objects365 dataset?
+### How can I train a YOLO11 model on the Objects365 dataset?
-To train a YOLOv8n model using the Objects365 dataset for 100 epochs with an image size of 640, follow these instructions:
+To train a YOLO11n model using the Objects365 dataset for 100 epochs with an image size of 640, follow these instructions:
!!! example "Train Example"
@@ -109,7 +109,7 @@ To train a YOLOv8n model using the Objects365 dataset for 100 epochs with an ima
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="Objects365.yaml", epochs=100, imgsz=640)
@@ -119,7 +119,7 @@ To train a YOLOv8n model using the Objects365 dataset for 100 epochs with an ima
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=Objects365.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=Objects365.yaml model=yolo11n.pt epochs=100 imgsz=640
```
Refer to the [Training](../../modes/train.md) page for a comprehensive list of available arguments.
diff --git a/docs/en/datasets/detect/open-images-v7.md b/docs/en/datasets/detect/open-images-v7.md
index 1e6f1f7e4fb..1751a2d0a43 100644
--- a/docs/en/datasets/detect/open-images-v7.md
+++ b/docs/en/datasets/detect/open-images-v7.md
@@ -1,7 +1,7 @@
---
comments: true
-description: Explore the comprehensive Open Images V7 dataset by Google. Learn about its annotations, applications, and use YOLOv8 pretrained models for computer vision tasks.
-keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object detection, image segmentation, visual relationships, AI research, Ultralytics
+description: Explore the comprehensive Open Images V7 dataset by Google. Learn about its annotations, applications, and use YOLO11 pretrained models for computer vision tasks.
+keywords: Open Images V7, Google dataset, computer vision, YOLO11 models, object detection, image segmentation, visual relationships, AI research, Ultralytics
---
# Open Images V7 Dataset
@@ -16,7 +16,7 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object
allowfullscreen>
- Watch: [Object Detection](https://www.ultralytics.com/glossary/object-detection) using OpenImagesV7 Pretrained Model
+ Watch:Object Detection using OpenImagesV7 Pretrained Model
## Open Images V7 Pretrained Models
@@ -29,6 +29,35 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object
| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 |
| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 |
+You can use these pretrained for inference or fine-tuning as follows.
+
+!!! example "Pretrained Model Usage Example"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load an Open Images Dataset V7 pretrained YOLOv8n model
+ model = YOLO("yolov8n-oiv7.pt")
+
+ # Run prediction
+ results = model.predict(source="image.jpg")
+
+ # Start training from the pretrained checkpoint
+ results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Predict using an Open Images Dataset V7 pretrained model
+ yolo detect predict source=image.jpg model=yolov8n-oiv7.pt
+
+ # Start training from an Open Images Dataset V7 pretrained checkpoint
+ yolo detect train data=coco8.yaml model=yolov8n-oiv7.pt epochs=100 imgsz=640
+ ```
+

## Key Features
@@ -69,7 +98,7 @@ Typically, datasets come with a YAML (Yet Another Markup Language) file that del
## Usage
-To train a YOLOv8n model on the Open Images V7 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the Open Images V7 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! warning
@@ -87,8 +116,8 @@ To train a YOLOv8n model on the Open Images V7 dataset for 100 [epochs](https://
```python
from ultralytics import YOLO
- # Load a COCO-pretrained YOLOv8n model
- model = YOLO("yolov8n.pt")
+ # Load a COCO-pretrained YOLO11n model
+ model = YOLO("yolo11n.pt")
# Train the model on the Open Images V7 dataset
results = model.train(data="open-images-v7.yaml", epochs=100, imgsz=640)
@@ -97,8 +126,8 @@ To train a YOLOv8n model on the Open Images V7 dataset for 100 [epochs](https://
=== "CLI"
```bash
- # Train a COCO-pretrained YOLOv8n model on the Open Images V7 dataset
- yolo detect train data=open-images-v7.yaml model=yolov8n.pt epochs=100 imgsz=640
+ # Train a COCO-pretrained YOLO11n model on the Open Images V7 dataset
+ yolo detect train data=open-images-v7.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -136,9 +165,9 @@ A heartfelt acknowledgment goes out to the Google AI team for creating and maint
Open Images V7 is an extensive and versatile dataset created by Google, designed to advance research in computer vision. It includes image-level labels, object bounding boxes, object segmentation masks, visual relationships, and localized narratives, making it ideal for various computer vision tasks such as object detection, segmentation, and relationship detection.
-### How do I train a YOLOv8 model on the Open Images V7 dataset?
+### How do I train a YOLO11 model on the Open Images V7 dataset?
-To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python and CLI commands. Here's an example of training the YOLOv8n model for 100 epochs with an image size of 640:
+To train a YOLO11 model on the Open Images V7 dataset, you can use both Python and CLI commands. Here's an example of training the YOLO11n model for 100 epochs with an image size of 640:
!!! example "Train Example"
@@ -147,8 +176,8 @@ To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python a
```python
from ultralytics import YOLO
- # Load a COCO-pretrained YOLOv8n model
- model = YOLO("yolov8n.pt")
+ # Load a COCO-pretrained YOLO11n model
+ model = YOLO("yolo11n.pt")
# Train the model on the Open Images V7 dataset
results = model.train(data="open-images-v7.yaml", epochs=100, imgsz=640)
@@ -158,8 +187,8 @@ To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python a
=== "CLI"
```bash
- # Train a COCO-pretrained YOLOv8n model on the Open Images V7 dataset
- yolo detect train data=open-images-v7.yaml model=yolov8n.pt epochs=100 imgsz=640
+ # Train a COCO-pretrained YOLO11n model on the Open Images V7 dataset
+ yolo detect train data=open-images-v7.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For more details on arguments and settings, refer to the [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/roboflow-100.md b/docs/en/datasets/detect/roboflow-100.md
index 6b3c540e030..ba228242a65 100644
--- a/docs/en/datasets/detect/roboflow-100.md
+++ b/docs/en/datasets/detect/roboflow-100.md
@@ -67,7 +67,7 @@ Dataset benchmarking evaluates machine learning model performance on specific da
if path.exists():
# Fix YAML file and run training
benchmark.fix_yaml(str(path))
- os.system(f"yolo detect train data={path} model=yolov8s.pt epochs=1 batch=16")
+ os.system(f"yolo detect train data={path} model=yolo11s.pt epochs=1 batch=16")
# Run validation and evaluate
os.system(f"yolo detect val data={path} model=runs/detect/train/weights/best.pt > {val_log_file} 2>&1")
@@ -165,7 +165,7 @@ To use the Roboflow 100 dataset for benchmarking, you can implement the RF100Ben
if path.exists():
# Fix YAML file and run training
benchmark.fix_yaml(str(path))
- os.system(f"yolo detect train data={path} model=yolov8s.pt epochs=1 batch=16")
+ os.system(f"yolo detect train data={path} model=yolo11n.pt epochs=1 batch=16")
# Run validation and evaluate
os.system(f"yolo detect val data={path} model=runs/detect/train/weights/best.pt > {val_log_file} 2>&1")
diff --git a/docs/en/datasets/detect/signature.md b/docs/en/datasets/detect/signature.md
index 5746d57e026..834e711a45b 100644
--- a/docs/en/datasets/detect/signature.md
+++ b/docs/en/datasets/detect/signature.md
@@ -1,7 +1,7 @@
---
comments: true
description: Discover the Signature Detection Dataset for training models to identify and verify human signatures in various documents. Perfect for document verification and fraud prevention.
-keywords: Signature Detection Dataset, document verification, fraud detection, computer vision, YOLOv8, Ultralytics, annotated signatures, training dataset
+keywords: Signature Detection Dataset, document verification, fraud detection, computer vision, YOLO11, Ultralytics, annotated signatures, training dataset
---
# Signature Detection Dataset
@@ -31,7 +31,7 @@ A YAML (Yet Another Markup Language) file defines the dataset configuration, inc
## Usage
-To train a YOLOv8n model on the signature detection dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
+To train a YOLO11n model on the signature detection dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -41,7 +41,7 @@ To train a YOLOv8n model on the signature detection dataset for 100 [epochs](htt
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="signature.yaml", epochs=100, imgsz=640)
@@ -51,7 +51,7 @@ To train a YOLOv8n model on the signature detection dataset for 100 [epochs](htt
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=signature.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=signature.yaml model=yolo11n.pt epochs=100 imgsz=640
```
!!! example "Inference Example"
@@ -95,9 +95,9 @@ The dataset has been released available under the [AGPL-3.0 License](https://git
The Signature Detection Dataset is a collection of annotated images aimed at detecting human signatures within various document types. It can be applied in computer vision tasks such as [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking, primarily for document verification, fraud detection, and archival research. This dataset helps train models to recognize signatures in different contexts, making it valuable for both research and practical applications.
-### How do I train a YOLOv8n model on the Signature Detection Dataset?
+### How do I train a YOLO11n model on the Signature Detection Dataset?
-To train a YOLOv8n model on the Signature Detection Dataset, follow these steps:
+To train a YOLO11n model on the Signature Detection Dataset, follow these steps:
1. Download the `signature.yaml` dataset configuration file from [signature.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/signature.yaml).
2. Use the following Python script or CLI command to start training:
@@ -110,7 +110,7 @@ To train a YOLOv8n model on the Signature Detection Dataset, follow these steps:
from ultralytics import YOLO
# Load a pretrained model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Train the model
results = model.train(data="signature.yaml", epochs=100, imgsz=640)
@@ -119,7 +119,7 @@ To train a YOLOv8n model on the Signature Detection Dataset, follow these steps:
=== "CLI"
```bash
- yolo detect train data=signature.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=signature.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For more details, refer to the [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/sku-110k.md b/docs/en/datasets/detect/sku-110k.md
index c6cddc483fa..90557bb6b22 100644
--- a/docs/en/datasets/detect/sku-110k.md
+++ b/docs/en/datasets/detect/sku-110k.md
@@ -6,7 +6,7 @@ keywords: SKU-110k, dataset, object detection, retail shelf images, deep learnin
# SKU-110k Dataset
-The [SKU-110k](https://github.com/eg4000/SKU110K_CVPR19) dataset is a collection of densely packed retail shelf images, designed to support research in [object detection](https://www.ultralytics.com/glossary/object-detection) tasks. Developed by Eran Goldman et al., the dataset contains over 110,000 unique store keeping unit (SKU) categories with densely packed objects, often looking similar or even identical, positioned in close proximity.
+The [SKU-110k](https://github.com/eg4000/SKU110K_CVPR19) dataset is a collection of densely packed retail shelf images, designed to support research in [object detection](https://www.ultralytics.com/glossary/object-detection) tasks. Developed by Eran Goldman et al., the dataset contains over 110,000 unique store keeping unit (SKU) categories with densely packed objects, often looking similar or even identical, positioned in proximity.
@@ -51,7 +51,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the SKU-110K dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -61,7 +61,7 @@ To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ul
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="SKU-110K.yaml", epochs=100, imgsz=640)
@@ -71,7 +71,7 @@ To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ul
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=SKU-110K.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=SKU-110K.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -107,11 +107,11 @@ We would like to acknowledge Eran Goldman et al. for creating and maintaining th
### What is the SKU-110k dataset and why is it important for object detection?
-The SKU-110k dataset consists of densely packed retail shelf images designed to aid research in object detection tasks. Developed by Eran Goldman et al., it includes over 110,000 unique SKU categories. Its importance lies in its ability to challenge state-of-the-art object detectors with diverse object appearances and close proximity, making it an invaluable resource for researchers and practitioners in computer vision. Learn more about the dataset's structure and applications in our [SKU-110k Dataset](#sku-110k-dataset) section.
+The SKU-110k dataset consists of densely packed retail shelf images designed to aid research in object detection tasks. Developed by Eran Goldman et al., it includes over 110,000 unique SKU categories. Its importance lies in its ability to challenge state-of-the-art object detectors with diverse object appearances and proximity, making it an invaluable resource for researchers and practitioners in computer vision. Learn more about the dataset's structure and applications in our [SKU-110k Dataset](#sku-110k-dataset) section.
-### How do I train a YOLOv8 model using the SKU-110k dataset?
+### How do I train a YOLO11 model using the SKU-110k dataset?
-Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an example to train a YOLOv8n model for 100 epochs with an image size of 640:
+Training a YOLO11 model on the SKU-110k dataset is straightforward. Here's an example to train a YOLO11n model for 100 epochs with an image size of 640:
!!! example "Train Example"
@@ -121,7 +121,7 @@ Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an ex
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="SKU-110K.yaml", epochs=100, imgsz=640)
@@ -132,7 +132,7 @@ Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an ex
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=SKU-110K.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=SKU-110K.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/visdrone.md b/docs/en/datasets/detect/visdrone.md
index 99b182cb4e0..cbc89d835c8 100644
--- a/docs/en/datasets/detect/visdrone.md
+++ b/docs/en/datasets/detect/visdrone.md
@@ -47,7 +47,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the VisDrone dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -57,7 +57,7 @@ To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ul
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="VisDrone.yaml", epochs=100, imgsz=640)
@@ -67,7 +67,7 @@ To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ul
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=VisDrone.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=VisDrone.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -113,9 +113,9 @@ The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large-
- **Diversity**: Collected across 14 cities, in urban and rural settings, under different weather and lighting conditions.
- **Tasks**: Split into five main tasksโobject detection in images and videos, single-object and multi-object tracking, and crowd counting.
-### How can I use the VisDrone Dataset to train a YOLOv8 model with Ultralytics?
+### How can I use the VisDrone Dataset to train a YOLO11 model with Ultralytics?
-To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image size of 640, you can follow these steps:
+To train a YOLO11 model on the VisDrone dataset for 100 epochs with an image size of 640, you can follow these steps:
!!! example "Train Example"
@@ -125,7 +125,7 @@ To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image siz
from ultralytics import YOLO
# Load a pretrained model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Train the model
results = model.train(data="VisDrone.yaml", epochs=100, imgsz=640)
@@ -135,7 +135,7 @@ To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image siz
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=VisDrone.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=VisDrone.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For additional configuration options, please refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/detect/voc.md b/docs/en/datasets/detect/voc.md
index 7dc67fb5a44..449810e6980 100644
--- a/docs/en/datasets/detect/voc.md
+++ b/docs/en/datasets/detect/voc.md
@@ -39,7 +39,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n model on the VOC dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -49,7 +49,7 @@ To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultraly
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="VOC.yaml", epochs=100, imgsz=640)
@@ -59,7 +59,7 @@ To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultraly
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=VOC.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=VOC.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -99,9 +99,9 @@ We would like to acknowledge the PASCAL VOC Consortium for creating and maintain
The [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) (Visual Object Classes) dataset is a renowned benchmark for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification in computer vision. It includes comprehensive annotations like bounding boxes, class labels, and segmentation masks across 20 different object categories. Researchers use it widely to evaluate the performance of models like Faster R-CNN, YOLO, and Mask R-CNN due to its standardized evaluation metrics such as mean Average Precision (mAP).
-### How do I train a YOLOv8 model using the VOC dataset?
+### How do I train a YOLO11 model using the VOC dataset?
-To train a YOLOv8 model with the VOC dataset, you need the dataset configuration in a YAML file. Here's an example to start training a YOLOv8n model for 100 epochs with an image size of 640:
+To train a YOLO11 model with the VOC dataset, you need the dataset configuration in a YAML file. Here's an example to start training a YOLO11n model for 100 epochs with an image size of 640:
!!! example "Train Example"
@@ -111,7 +111,7 @@ To train a YOLOv8 model with the VOC dataset, you need the dataset configuration
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="VOC.yaml", epochs=100, imgsz=640)
@@ -121,7 +121,7 @@ To train a YOLOv8 model with the VOC dataset, you need the dataset configuration
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=VOC.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=VOC.yaml model=yolo11n.pt epochs=100 imgsz=640
```
### What are the primary challenges included in the VOC dataset?
diff --git a/docs/en/datasets/detect/xview.md b/docs/en/datasets/detect/xview.md
index df8e493357f..41b6c20ad8e 100644
--- a/docs/en/datasets/detect/xview.md
+++ b/docs/en/datasets/detect/xview.md
@@ -52,7 +52,7 @@ To train a model on the xView dataset for 100 [epochs](https://www.ultralytics.c
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="xView.yaml", epochs=100, imgsz=640)
@@ -62,7 +62,7 @@ To train a model on the xView dataset for 100 [epochs](https://www.ultralytics.c
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=xView.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=xView.yaml model=yolo11n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -114,7 +114,7 @@ To train a model on the xView dataset using Ultralytics YOLO, follow these steps
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="xView.yaml", epochs=100, imgsz=640)
@@ -125,7 +125,7 @@ To train a model on the xView dataset using Ultralytics YOLO, follow these steps
```bash
# Start training from a pretrained *.pt model
- yolo detect train data=xView.yaml model=yolov8n.pt epochs=100 imgsz=640
+ yolo detect train data=xView.yaml model=yolo11n.pt epochs=100 imgsz=640
```
For detailed arguments and settings, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/explorer/api.md b/docs/en/datasets/explorer/api.md
index 4c550991367..f20489d444b 100644
--- a/docs/en/datasets/explorer/api.md
+++ b/docs/en/datasets/explorer/api.md
@@ -6,6 +6,10 @@ keywords: Ultralytics, Explorer API, dataset exploration, SQL queries, similarit
# Ultralytics Explorer API
+!!! warning "Community Note โ ๏ธ"
+
+ As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐
+
## Introduction
@@ -36,7 +40,7 @@ pip install ultralytics[explorer]
from ultralytics import Explorer
# Create an Explorer object
-explorer = Explorer(data="coco128.yaml", model="yolov8n.pt")
+explorer = Explorer(data="coco128.yaml", model="yolo11n.pt")
# Create embeddings for your dataset
explorer.create_embeddings_table()
@@ -75,7 +79,7 @@ You get a pandas dataframe with the `limit` number of most similar data points t
from ultralytics import Explorer
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
similar = exp.get_similar(img="https://ultralytics.com/images/bus.jpg", limit=10)
@@ -95,7 +99,7 @@ You get a pandas dataframe with the `limit` number of most similar data points t
from ultralytics import Explorer
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
similar = exp.get_similar(idx=1, limit=10)
@@ -118,7 +122,7 @@ You can also plot the similar images using the `plot_similar` method. This metho
from ultralytics import Explorer
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
plt = exp.plot_similar(img="https://ultralytics.com/images/bus.jpg", limit=10)
@@ -131,7 +135,7 @@ You can also plot the similar images using the `plot_similar` method. This metho
from ultralytics import Explorer
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
plt = exp.plot_similar(idx=1, limit=10)
@@ -150,7 +154,7 @@ Note: This works using LLMs under the hood so the results are probabilistic and
from ultralytics.data.explorer import plot_query_result
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
df = exp.ask_ai("show me 100 images with exactly one person and 2 dogs. There can be other objects too")
@@ -171,7 +175,7 @@ You can run SQL queries on your dataset using the `sql_query` method. This metho
from ultralytics import Explorer
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
df = exp.sql_query("WHERE labels LIKE '%person%' AND labels LIKE '%dog%'")
@@ -188,7 +192,7 @@ You can also plot the results of a SQL query using the `plot_sql_query` method.
from ultralytics import Explorer
# create an Explorer object
- exp = Explorer(data="coco128.yaml", model="yolov8n.pt")
+ exp = Explorer(data="coco128.yaml", model="yolo11n.pt")
exp.create_embeddings_table()
# plot the SQL Query
@@ -235,7 +239,7 @@ Here are some examples of what you can do with the table:
```python
from ultralytics import Explorer
- exp = Explorer(model="yolov8n.pt")
+ exp = Explorer(model="yolo11n.pt")
exp.create_embeddings_table()
table = exp.table
@@ -361,7 +365,7 @@ You can use the Ultralytics Explorer API to perform similarity searches by creat
from ultralytics import Explorer
# Create an Explorer object
-explorer = Explorer(data="coco128.yaml", model="yolov8n.pt")
+explorer = Explorer(data="coco128.yaml", model="yolo11n.pt")
explorer.create_embeddings_table()
# Search for similar images to a given image
@@ -383,7 +387,7 @@ The Ask AI feature allows users to filter datasets using natural language querie
from ultralytics import Explorer
# Create an Explorer object
-explorer = Explorer(data="coco128.yaml", model="yolov8n.pt")
+explorer = Explorer(data="coco128.yaml", model="yolo11n.pt")
explorer.create_embeddings_table()
# Query with natural language
diff --git a/docs/en/datasets/explorer/dashboard.md b/docs/en/datasets/explorer/dashboard.md
index 3bc3a21e469..92c1ba78b35 100644
--- a/docs/en/datasets/explorer/dashboard.md
+++ b/docs/en/datasets/explorer/dashboard.md
@@ -6,6 +6,10 @@ keywords: Ultralytics Explorer GUI, semantic search, vector similarity, SQL quer
# Explorer GUI
+!!! warning "Community Note โ ๏ธ"
+
+ As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐
+
Explorer GUI is like a playground build using [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries and even search using natural language using our ask AI feature powered by LLMs.
\n",
- "\n",
- " \n",
- " \n",
- "\n",
- " [ไธญๆ](https://docs.ultralytics.com/zh/) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko/) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja/) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi/) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar/)\n",
- "\n",
- " \n",
- " \n",
- " \n",
- "\n",
- "Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries.\n",
- "\n",
- "We hope that the resources in this notebook will help you get the most out of Ultralytics. Please browse the Explorer Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
- "\n",
- "Try `yolo explorer` powered by Exlorer API\n",
- "\n",
- "Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser!\n",
- "\n",
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f",
- "metadata": {
- "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f"
- },
- "source": [
- "## Setup\n",
- "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41",
- "metadata": {
- "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41"
- },
- "outputs": [],
- "source": [
- "%pip install ultralytics[explorer] openai\n",
- "import ultralytics\n",
- "\n",
- "ultralytics.checks()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ae602549-3419-4909-9f82-35cba515483f",
- "metadata": {
- "id": "ae602549-3419-4909-9f82-35cba515483f"
- },
- "outputs": [],
- "source": [
- "from ultralytics import Explorer"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c",
- "metadata": {
- "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c"
- },
- "source": [
- "## Similarity search\n",
- "Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once and it is reused automatically.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "334619da-6deb-4b32-9fe0-74e0a79cee20",
- "metadata": {
- "id": "334619da-6deb-4b32-9fe0-74e0a79cee20"
- },
- "outputs": [],
- "source": [
- "exp = Explorer(\"VOC.yaml\", model=\"yolov8n.pt\")\n",
- "exp.create_embeddings_table()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d",
- "metadata": {
- "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d"
- },
- "source": [
- "One the embeddings table is built, you can get run semantic search in any of the following ways:\n",
- "- On a given index / list of indices in the dataset like - `exp.get_similar(idx=[1,10], limit=10)`\n",
- "- On any image/ list of images not in the dataset - `exp.get_similar(img=[\"path/to/img1\", \"path/to/img2\"], limit=10)`\n",
- "In case of multiple inputs, the aggregade of their embeddings is used.\n",
- "\n",
- "You get a pandas dataframe with the `limit` number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b485f05b-d92d-42bc-8da7-5e361667b341",
- "metadata": {
- "id": "b485f05b-d92d-42bc-8da7-5e361667b341"
- },
- "outputs": [],
- "source": [
- "similar = exp.get_similar(idx=1, limit=10)\n",
- "similar.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "acf4b489-2161-4176-a1fe-d1d067d8083d",
- "metadata": {
- "id": "acf4b489-2161-4176-a1fe-d1d067d8083d"
- },
- "source": [
- "You can use the also plot the similar samples directly using the `plot_similar` util\n",
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553",
- "metadata": {
- "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553"
- },
- "source": [
- "## 2. Ask AI: Search or filter with Natural Language\n",
- "You can prompt the Explorer object with the kind of data points you want to see and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None.\n",
- "
\n",
- "\n",
- "\n",
- "
\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c",
- "metadata": {
- "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c"
- },
- "outputs": [],
- "source": [
- "df = exp.ask_ai(\"show me images containing more than 10 objects with at least 2 persons\")\n",
- "df.head(5)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f",
- "metadata": {
- "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f"
- },
- "source": [
- "for plotting these results you can use `plot_query_result` util\n",
- "Example:\n",
- "```\n",
- "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n",
- "Image.fromarray(plt)\n",
- "```\n",
- "
\n",
- " \n",
- "\n",
- "
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511",
- "metadata": {
- "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511"
- },
- "outputs": [],
- "source": [
- "# plot\n",
- "from PIL import Image\n",
- "\n",
- "from ultralytics.data.explorer import plot_query_result\n",
- "\n",
- "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n",
- "Image.fromarray(plt)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "35315ae6-d827-40e4-8813-279f97a83b34",
- "metadata": {
- "id": "35315ae6-d827-40e4-8813-279f97a83b34"
- },
- "source": [
- "## 3. Run SQL queries on your Dataset!\n",
- "Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries.\n",
- "It accepts either of the formats:\n",
- "- Queries beginning with \"WHERE\" will automatically select all columns. This can be thought of as a short-hand query\n",
- "- You can also write full queries where you can specify which columns to select\n",
- "\n",
- "This can be used to investigate model performance and specific data points. For example:\n",
- "- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog.\n",
- "\n",
- "You can combine SQL query and semantic search to filter down to specific type of results\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf",
- "metadata": {
- "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf"
- },
- "outputs": [],
- "source": [
- "table = exp.sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\")\n",
- "table"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab",
- "metadata": {
- "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab"
- },
- "source": [
- "Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query`\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "18b977e7-d048-4b22-b8c4-084a03b04f23",
- "metadata": {
- "id": "18b977e7-d048-4b22-b8c4-084a03b04f23"
- },
- "outputs": [],
- "source": [
- "exp.plot_sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\", labels=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f26804c5-840b-4fd1-987f-e362f29e3e06",
- "metadata": {
- "id": "f26804c5-840b-4fd1-987f-e362f29e3e06"
- },
- "source": [
- "## 3. Working with embeddings Table (Advanced)\n",
- "Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre and post filters, etc."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a",
- "metadata": {
- "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a"
- },
- "outputs": [],
- "source": [
- "table = exp.table\n",
- "table.schema"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "238db292-8610-40b3-9af7-dfd6be174892",
- "metadata": {
- "id": "238db292-8610-40b3-9af7-dfd6be174892"
- },
- "source": [
- "### Run raw queries\n",
- "Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features.\n",
- "\n",
- "A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector.\n",
- "\n",
- "Metric\n",
- "In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics:\n",
- "- L2\n",
- "- Cosine\n",
- "- Dot\n",
- "Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/)\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d74430fe-5aee-45a1-8863-3f2c31338792",
- "metadata": {
- "id": "d74430fe-5aee-45a1-8863-3f2c31338792"
- },
- "outputs": [],
- "source": [
- "dummy_img_embedding = [i for i in range(256)]\n",
- "table.search(dummy_img_embedding).limit(5).to_pandas()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "587486b4-0d19-4214-b994-f032fb2e8eb5",
- "metadata": {
- "id": "587486b4-0d19-4214-b994-f032fb2e8eb5"
- },
- "source": [
- "### Inter-conversion to popular data formats"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c",
- "metadata": {
- "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c"
- },
- "outputs": [],
- "source": [
- "df = table.to_pandas()\n",
- "pa_table = table.to_arrow()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "42659d63-ad76-49d6-8dfc-78d77278db72",
- "metadata": {
- "id": "42659d63-ad76-49d6-8dfc-78d77278db72"
- },
- "source": [
- "### Work with Embeddings\n",
- "You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca",
- "metadata": {
- "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "\n",
- "embeddings = table.to_pandas()[\"vector\"].tolist()\n",
- "embeddings = np.array(embeddings)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602",
- "metadata": {
- "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602"
- },
- "source": [
- "### Scatterplot\n",
- "One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8",
- "metadata": {
- "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8"
- },
- "outputs": [],
- "source": [
- "!pip install scikit-learn --q"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "196079c3-45a9-4325-81ab-af79a881e37a",
- "metadata": {
- "id": "196079c3-45a9-4325-81ab-af79a881e37a"
- },
- "outputs": [],
- "source": [
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "from sklearn.decomposition import PCA\n",
- "\n",
- "# Reduce dimensions using PCA to 3 components for visualization in 3D\n",
- "pca = PCA(n_components=3)\n",
- "reduced_data = pca.fit_transform(embeddings)\n",
- "\n",
- "# Create a 3D scatter plot using Matplotlib's Axes3D\n",
- "fig = plt.figure(figsize=(8, 6))\n",
- "ax = fig.add_subplot(111, projection=\"3d\")\n",
- "\n",
- "# Scatter plot\n",
- "ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)\n",
- "ax.set_title(\"3D Scatter Plot of Reduced 256-Dimensional Data (PCA)\")\n",
- "ax.set_xlabel(\"Component 1\")\n",
- "ax.set_ylabel(\"Component 2\")\n",
- "ax.set_zlabel(\"Component 3\")\n",
- "\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1c843c23-e3f2-490e-8d6c-212fa038a149",
- "metadata": {
- "id": "1c843c23-e3f2-490e-8d6c-212fa038a149"
- },
- "source": [
- "## 4. Similarity Index\n",
- "Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation-\n",
- "* It tries to estimate how similar each data point is with the rest of the dataset.\n",
- "* It does that by counting how many image embeddings lie closer than `max_dist` to the current image in the generated embedding space, considering `top_k` similar images at a time.\n",
- "\n",
- "For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`.\n",
- "Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look at the plot first\n",
- "\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc",
- "metadata": {
- "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc"
- },
- "outputs": [],
- "source": [
- "exp.plot_similarity_index(max_dist=0.2, top_k=0.01)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "28228a9a-b727-45b5-8ca7-8db662c0b937",
- "metadata": {
- "id": "28228a9a-b727-45b5-8ca7-8db662c0b937"
- },
- "source": [
- "Now let's look at the output of the operation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a",
- "metadata": {
- "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "\n",
- "sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d",
- "metadata": {
- "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d"
- },
- "outputs": [],
- "source": [
- "sim_idx"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4",
- "metadata": {
- "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4"
- },
- "source": [
- "Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "58d2557b-d401-43cf-937d-4f554c7bc808",
- "metadata": {
- "id": "58d2557b-d401-43cf-937d-4f554c7bc808"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "\n",
- "sim_count = np.array(sim_idx[\"count\"])\n",
- "sim_idx[\"im_file\"][sim_count > 30]"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e",
- "metadata": {
- "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e"
- },
- "source": [
- "You should see something like this\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2",
- "metadata": {
- "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2"
- },
- "outputs": [],
- "source": [
- "exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/en/datasets/explorer/explorer.md b/docs/en/datasets/explorer/explorer.md
new file mode 100644
index 00000000000..cc4a1e7e849
--- /dev/null
+++ b/docs/en/datasets/explorer/explorer.md
@@ -0,0 +1,278 @@
+---
+comments: true
+description: Dive into advanced data exploration with Ultralytics Explorer. Perform semantic searches, execute SQL queries, and leverage AI-powered natural language insights for seamless data analysis.
+keywords: Ultralytics Explorer, data exploration, semantic search, vector similarity, SQL queries, AI, natural language queries, machine learning, OpenAI, LLMs, Ultralytics HUB
+---
+
+# VOC Exploration Example
+
+
+
+Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries.
+
+Try `yolo explorer` powered by Explorer API
+
+Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser!
+
+!!! warning "Community Note โ ๏ธ"
+
+ As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐
+
+## Setup
+
+Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.
+
+```bash
+%pip install ultralytics[explorer] openai
+yolo checks
+```
+
+## Similarity Search
+
+Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once, and it is reused automatically.
+
+```python
+exp = Explorer("VOC.yaml", model="yolo11n.pt")
+exp.create_embeddings_table()
+```
+
+One the embeddings table is built, you can get run semantic search in any of the following ways:
+
+- On a given index / list of indices in the dataset like - exp.get_similar(idx=[1,10], limit=10)
+- On any image/ list of images not in the dataset - exp.get_similar(img=["path/to/img1", "path/to/img2"], limit=10) In case of multiple inputs, the aggregate of their embeddings is used.
+
+You get a pandas dataframe with the limit number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering
+
+
+
+```python
+# Search dataset by index
+similar = exp.get_similar(idx=1, limit=10)
+similar.head()
+```
+
+You can use the also plot the similar samples directly using the `plot_similar` util
+
+
+
+```python
+exp.plot_similar(idx=6500, limit=20)
+exp.plot_similar(idx=[100, 101], limit=10) # Can also pass list of idxs or imgs
+
+exp.plot_similar(img="https://ultralytics.com/images/bus.jpg", limit=10, labels=False) # Can also pass external images
+```
+
+
+
+## Ask AI: Search or filter with Natural Language
+
+You can prompt the Explorer object with the kind of data points you want to see, and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None.
+
+
+
+```python
+df = exp.ask_ai("show me images containing more than 10 objects with at least 2 persons")
+df.head(5)
+```
+
+for plotting these results you can use `plot_query_result` util Example:
+
+```python
+plt = plot_query_result(exp.ask_ai("show me 10 images containing exactly 2 persons"))
+Image.fromarray(plt)
+```
+
+
+
+```python
+# plot
+from PIL import Image
+
+from ultralytics.data.explorer import plot_query_result
+
+plt = plot_query_result(exp.ask_ai("show me 10 images containing exactly 2 persons"))
+Image.fromarray(plt)
+```
+
+## Run SQL queries on your Dataset
+
+Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries. It accepts either of the formats:
+
+- Queries beginning with "WHERE" will automatically select all columns. This can be thought of as a shorthand query
+- You can also write full queries where you can specify which columns to select
+
+This can be used to investigate model performance and specific data points. For example:
+
+- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog.
+
+You can combine SQL query and semantic search to filter down to specific type of results
+
+```python
+table = exp.sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10")
+exp.plot_sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10", labels=True)
+```
+
+
+
+```python
+table = exp.sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10")
+print(table)
+```
+
+Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query`
+
+
+
+```python
+exp.plot_sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10", labels=True)
+```
+
+## Working with embeddings Table (Advanced)
+
+Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre- and post-filters, etc.
+
+```python
+table = exp.table
+print(table.schema)
+```
+
+### Run raw queriesยถ
+
+Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features.
+
+A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector.
+
+Metric In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics:
+
+- L2
+- Cosine
+- Dot Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/)
+
+
+
+```python
+dummy_img_embedding = [i for i in range(256)]
+table.search(dummy_img_embedding).limit(5).to_pandas()
+```
+
+### Interconversion to popular data formats
+
+```python
+df = table.to_pandas()
+pa_table = table.to_arrow()
+```
+
+### Work with Embeddings
+
+You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector`
+
+```python
+import numpy as np
+
+embeddings = table.to_pandas()["vector"].tolist()
+embeddings = np.array(embeddings)
+```
+
+### Scatterplot
+
+One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example
+
+
+
+```python
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA # pip install scikit-learn
+
+# Reduce dimensions using PCA to 3 components for visualization in 3D
+pca = PCA(n_components=3)
+reduced_data = pca.fit_transform(embeddings)
+
+# Create a 3D scatter plot using Matplotlib's Axes3D
+fig = plt.figure(figsize=(8, 6))
+ax = fig.add_subplot(111, projection="3d")
+
+# Scatter plot
+ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)
+ax.set_title("3D Scatter Plot of Reduced 256-Dimensional Data (PCA)")
+ax.set_xlabel("Component 1")
+ax.set_ylabel("Component 2")
+ax.set_zlabel("Component 3")
+
+plt.show()
+```
+
+### Similarity Index
+
+Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation-
+
+- It tries to estimate how similar each data point is with the rest of the dataset.
+- It does that by counting how many image embeddings lie closer than max_dist to the current image in the generated embedding space, considering top_k similar images at a time.
+
+For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`. Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look
+
+```python
+sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01)
+exp.plot_similarity_index(max_dist=0.2, top_k=0.01)
+```
+
+
+
+at the plot first
+
+```python
+exp.plot_similarity_index(max_dist=0.2, top_k=0.01)
+```
+
+Now let's look at the output of the operation
+
+```python
+sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False)
+
+sim_idx
+```
+
+Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them.
+
+```python
+import numpy as np
+
+sim_count = np.array(sim_idx["count"])
+sim_idx["im_file"][sim_count > 30]
+```
+
+You should see something like this
+
+
+
+```python
+exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images
+```
diff --git a/docs/en/datasets/explorer/index.md b/docs/en/datasets/explorer/index.md
index d7e7ab66d4e..6db5fa16730 100644
--- a/docs/en/datasets/explorer/index.md
+++ b/docs/en/datasets/explorer/index.md
@@ -6,6 +6,10 @@ keywords: Ultralytics Explorer, CV datasets, semantic search, SQL queries, vecto
# Ultralytics Explorer
+!!! warning "Community Note โ ๏ธ"
+
+ As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐
+
diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md
index a53d2040d17..7d7a30a7139 100644
--- a/docs/en/datasets/index.md
+++ b/docs/en/datasets/index.md
@@ -19,7 +19,11 @@ Ultralytics provides support for various datasets to facilitate computer vision
Watch: Ultralytics Datasets Overview
-## NEW ๐ Ultralytics Explorer
+## Ultralytics Explorer
+
+!!! warning "Community Note โ ๏ธ"
+
+ As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐
Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md).
@@ -46,10 +50,11 @@ Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your da
- [VisDrone](detect/visdrone.md): A dataset containing object detection and multi-object tracking data from drone-captured imagery with over 10K images and video sequences.
- [VOC](detect/voc.md): The Pascal Visual Object Classes (VOC) dataset for object detection and segmentation with 20 object classes and over 11K images.
- [xView](detect/xview.md): A dataset for object detection in overhead imagery with 60 object categories and over 1 million annotated objects.
-- [Roboflow 100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation.
+- [RF100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation.
- [Brain-tumor](detect/brain-tumor.md): A dataset for detecting brain tumors includes MRI or CT scan images with details on tumor presence, location, and characteristics.
- [African-wildlife](detect/african-wildlife.md): A dataset featuring images of African wildlife, including buffalo, elephant, rhino, and zebras.
- [Signature](detect/signature.md): A dataset featuring images of various documents with annotated signatures, supporting document verification and fraud detection research.
+- [Medical-pills](detect/medical-pills.md): A dataset containing labeled images of medical-pills, designed to aid in tasks like pharmaceutical quality control, sorting, and ensuring compliance with industry standards.
## [Instance Segmentation](segment/index.md)
@@ -70,6 +75,7 @@ Pose estimation is a technique used to determine the pose of the object relative
- [COCO8-pose](pose/coco8-pose.md): A smaller dataset for pose estimation tasks, containing a subset of 8 COCO images with human pose annotations.
- [Tiger-pose](pose/tiger-pose.md): A compact dataset consisting of 263 images focused on tigers, annotated with 12 keypoints per tiger for pose estimation tasks.
- [Hand-Keypoints](pose/hand-keypoints.md): A concise dataset featuring over 26,000 images centered on human hands, annotated with 21 keypoints per hand, designed for pose estimation tasks.
+- [Dog-pose](pose/dog-pose.md): A comprehensive dataset featuring approximately 6,000 images focused on dogs, annotated with 24 keypoints per dog, tailored for pose estimation tasks.
## [Classification](classify/index.md)
@@ -85,6 +91,7 @@ Pose estimation is a technique used to determine the pose of the object relative
- [Imagenette](classify/imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing.
- [Imagewoof](classify/imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks.
- [MNIST](classify/mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks.
+- [MNIST160](classify/mnist.md): First 8 images of each MNIST category from the MNIST dataset. Dataset contains 160 images total.
## [Oriented Bounding Boxes (OBB)](obb/index.md)
@@ -104,6 +111,17 @@ Multi-object tracking is a computer vision technique that involves detecting and
Contributing a new dataset involves several steps to ensure that it aligns well with the existing infrastructure. Below are the necessary steps:
+
+
+
+
+ Watch: How to Contribute to Ultralytics Datasets ๐
+
+
### Steps to Contribute a New Dataset
1. **Collect Images**: Gather the images that belong to the dataset. These could be collected from various sources, such as public databases or your own collection.
diff --git a/docs/en/datasets/obb/dota-v2.md b/docs/en/datasets/obb/dota-v2.md
index 76024cac105..a2c739479fd 100644
--- a/docs/en/datasets/obb/dota-v2.md
+++ b/docs/en/datasets/obb/dota-v2.md
@@ -31,7 +31,7 @@ keywords: DOTA dataset, object detection, aerial images, oriented bounding boxes
- Very small instances (less than 10 pixels) are also annotated.
- Addition of a new category: "container crane".
- A total of 403,318 instances.
-- Released for the DOAI Challenge 2019 on Object Detection in Aerial Images.
+- Released for the [DOAI Challenge 2019 on Object Detection in Aerial Images](https://captain-whu.github.io/DOAI2019/challenge.html).
### DOTA-v2.0
@@ -108,8 +108,8 @@ To train a model on the DOTA v1 dataset, you can utilize the following code snip
```python
from ultralytics import YOLO
- # Create a new YOLOv8n-OBB model from scratch
- model = YOLO("yolov8n-obb.yaml")
+ # Create a new YOLO11n-OBB model from scratch
+ model = YOLO("yolo11n-obb.yaml")
# Train the model on the DOTAv1 dataset
results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024)
@@ -118,8 +118,8 @@ To train a model on the DOTA v1 dataset, you can utilize the following code snip
=== "CLI"
```bash
- # Train a new YOLOv8n-OBB model on the DOTAv1 dataset
- yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024
+ # Train a new YOLO11n-OBB model on the DOTAv1 dataset
+ yolo obb train data=DOTAv1.yaml model=yolo11n-obb.pt epochs=100 imgsz=1024
```
## Sample Data and Annotations
@@ -176,8 +176,8 @@ To train a model on the DOTA dataset, you can use the following example with Ult
```python
from ultralytics import YOLO
- # Create a new YOLOv8n-OBB model from scratch
- model = YOLO("yolov8n-obb.yaml")
+ # Create a new YOLO11n-OBB model from scratch
+ model = YOLO("yolo11n-obb.yaml")
# Train the model on the DOTAv1 dataset
results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024)
@@ -186,8 +186,8 @@ To train a model on the DOTA dataset, you can use the following example with Ult
=== "CLI"
```bash
- # Train a new YOLOv8n-OBB model on the DOTAv1 dataset
- yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024
+ # Train a new YOLO11n-OBB model on the DOTAv1 dataset
+ yolo obb train data=DOTAv1.yaml model=yolo11n-obb.pt epochs=100 imgsz=1024
```
For more details on how to split and preprocess the DOTA images, refer to the [split DOTA images section](#split-dota-images).
diff --git a/docs/en/datasets/obb/dota8.md b/docs/en/datasets/obb/dota8.md
index f24ea5bce2e..199c91bf06e 100644
--- a/docs/en/datasets/obb/dota8.md
+++ b/docs/en/datasets/obb/dota8.md
@@ -1,7 +1,7 @@
---
comments: true
-description: Explore the DOTA8 dataset - a small, versatile oriented object detection dataset ideal for testing and debugging object detection models using Ultralytics YOLOv8.
-keywords: DOTA8 dataset, Ultralytics, YOLOv8, object detection, debugging, training models, oriented object detection, dataset YAML
+description: Explore the DOTA8 dataset - a small, versatile oriented object detection dataset ideal for testing and debugging object detection models using Ultralytics YOLO11.
+keywords: DOTA8 dataset, Ultralytics, YOLO11, object detection, debugging, training models, oriented object detection, dataset YAML
---
# DOTA8 Dataset
@@ -10,7 +10,7 @@ keywords: DOTA8 dataset, Ultralytics, YOLOv8, object detection, debugging, train
[Ultralytics](https://www.ultralytics.com/) DOTA8 is a small, but versatile oriented [object detection](https://www.ultralytics.com/glossary/object-detection) dataset composed of the first 8 images of 8 images of the split DOTAv1 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
-This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics).
+This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
## Dataset YAML
@@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-obb model on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -34,7 +34,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.u
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-obb.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-obb.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="dota8.yaml", epochs=100, imgsz=640)
@@ -44,7 +44,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.u
```bash
# Start training from a pretrained *.pt model
- yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640
+ yolo obb train data=dota8.yaml model=yolo11n-obb.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -84,11 +84,11 @@ A special note of gratitude to the team behind the DOTA datasets for their comme
### What is the DOTA8 dataset and how can it be used?
-The DOTA8 dataset is a small, versatile oriented object detection dataset made up of the first 8 images from the DOTAv1 split set, with 4 images designated for training and 4 for validation. It's ideal for testing and debugging object detection models like Ultralytics YOLOv8. Due to its manageable size and diversity, it helps in identifying pipeline errors and running sanity checks before deploying larger datasets. Learn more about object detection with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics).
+The DOTA8 dataset is a small, versatile oriented object detection dataset made up of the first 8 images from the DOTAv1 split set, with 4 images designated for training and 4 for validation. It's ideal for testing and debugging object detection models like Ultralytics YOLO11. Due to its manageable size and diversity, it helps in identifying pipeline errors and running sanity checks before deploying larger datasets. Learn more about object detection with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics).
-### How do I train a YOLOv8 model using the DOTA8 dataset?
+### How do I train a YOLO11 model using the DOTA8 dataset?
-To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For comprehensive argument options, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-obb model on the DOTA8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For comprehensive argument options, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -98,7 +98,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image s
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-obb.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-obb.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="dota8.yaml", epochs=100, imgsz=640)
@@ -108,7 +108,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image s
```bash
# Start training from a pretrained *.pt model
- yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640
+ yolo obb train data=dota8.yaml model=yolo11n-obb.pt epochs=100 imgsz=640
```
### What are the key features of the DOTA dataset and where can I access the YAML file?
@@ -119,6 +119,6 @@ The DOTA dataset is known for its large-scale benchmark and the challenges it pr
Mosaicing combines multiple images into one during training, increasing the variety of objects and contexts within each batch. This improves a model's ability to generalize to different object sizes, aspect ratios, and scenes. This technique can be visually demonstrated through a training batch composed of mosaiced DOTA8 dataset images, helping in robust model development. Explore more about mosaicing and training techniques on our [Training](../../modes/train.md) page.
-### Why should I use Ultralytics YOLOv8 for object detection tasks?
+### Why should I use Ultralytics YOLO11 for object detection tasks?
-Ultralytics YOLOv8 provides state-of-the-art real-time object detection capabilities, including features like oriented bounding boxes (OBB), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), and a highly versatile training pipeline. It's suitable for various applications and offers pretrained models for efficient fine-tuning. Explore further about the advantages and usage in the [Ultralytics YOLOv8 documentation](https://github.com/ultralytics/ultralytics).
+Ultralytics YOLO11 provides state-of-the-art real-time object detection capabilities, including features like oriented bounding boxes (OBB), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), and a highly versatile training pipeline. It's suitable for various applications and offers pretrained models for efficient fine-tuning. Explore further about the advantages and usage in the [Ultralytics YOLO11 documentation](https://github.com/ultralytics/ultralytics).
diff --git a/docs/en/datasets/obb/index.md b/docs/en/datasets/obb/index.md
index edeffb83aff..7ecaf3450e4 100644
--- a/docs/en/datasets/obb/index.md
+++ b/docs/en/datasets/obb/index.md
@@ -39,8 +39,8 @@ To train a model using these OBB formats:
```python
from ultralytics import YOLO
- # Create a new YOLOv8n-OBB model from scratch
- model = YOLO("yolov8n-obb.yaml")
+ # Create a new YOLO11n-OBB model from scratch
+ model = YOLO("yolo11n-obb.yaml")
# Train the model on the DOTAv1 dataset
results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024)
@@ -49,8 +49,8 @@ To train a model using these OBB formats:
=== "CLI"
```bash
- # Train a new YOLOv8n-OBB model on the DOTAv1 dataset
- yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024
+ # Train a new YOLO11n-OBB model on the DOTAv1 dataset
+ yolo obb train data=DOTAv1.yaml model=yolo11n-obb.pt epochs=100 imgsz=1024
```
## Supported Datasets
@@ -92,7 +92,7 @@ It's imperative to validate the compatibility of the dataset with your model and
Oriented Bounding Boxes (OBB) are a type of bounding box annotation where the box can be rotated to align more closely with the object being detected, rather than just being axis-aligned. This is particularly useful in aerial or satellite imagery where objects might not be aligned with the image axes. In Ultralytics YOLO models, OBBs are represented by their four corner points in the YOLO OBB format. This allows for more accurate object detection since the bounding boxes can rotate to fit the objects better.
-### How do I convert my existing DOTA dataset labels to YOLO OBB format for use with Ultralytics YOLOv8?
+### How do I convert my existing DOTA dataset labels to YOLO OBB format for use with Ultralytics YOLO11?
You can convert DOTA dataset labels to YOLO OBB format using the `convert_dota_to_yolo_obb` function from Ultralytics. This conversion ensures compatibility with the Ultralytics YOLO models, enabling you to leverage the OBB capabilities for enhanced object detection. Here's a quick example:
@@ -104,9 +104,9 @@ convert_dota_to_yolo_obb("path/to/DOTA")
This script will reformat your DOTA annotations into a YOLO-compatible format.
-### How do I train a YOLOv8 model with oriented bounding boxes (OBB) on my dataset?
+### How do I train a YOLO11 model with oriented bounding boxes (OBB) on my dataset?
-Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO OBB format and then using the Ultralytics API to train the model. Here's an example in both Python and CLI:
+Training a YOLO11 model with OBBs involves ensuring your dataset is in the YOLO OBB format and then using the Ultralytics API to train the model. Here's an example in both Python and CLI:
!!! example
@@ -115,8 +115,8 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO
```python
from ultralytics import YOLO
- # Create a new YOLOv8n-OBB model from scratch
- model = YOLO("yolov8n-obb.yaml")
+ # Create a new YOLO11n-OBB model from scratch
+ model = YOLO("yolo11n-obb.yaml")
# Train the model on the custom dataset
results = model.train(data="your_dataset.yaml", epochs=100, imgsz=640)
@@ -125,8 +125,8 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO
=== "CLI"
```bash
- # Train a new YOLOv8n-OBB model on the custom dataset
- yolo obb train data=your_dataset.yaml model=yolov8n-obb.yaml epochs=100 imgsz=640
+ # Train a new YOLO11n-OBB model on the custom dataset
+ yolo obb train data=your_dataset.yaml model=yolo11n-obb.yaml epochs=100 imgsz=640
```
This ensures your model leverages the detailed OBB annotations for improved detection [accuracy](https://www.ultralytics.com/glossary/accuracy).
@@ -142,6 +142,6 @@ Currently, Ultralytics supports the following datasets for OBB training:
These datasets are tailored for scenarios where OBBs offer a significant advantage, such as aerial and satellite image analysis.
-### Can I use my own dataset with oriented bounding boxes for YOLOv8 training, and if so, how?
+### Can I use my own dataset with oriented bounding boxes for YOLO11 training, and if so, how?
-Yes, you can use your own dataset with oriented bounding boxes for YOLOv8 training. Ensure your dataset annotations are converted to the YOLO OBB format, which involves defining bounding boxes by their four corner points. You can then create a YAML configuration file specifying the dataset paths, classes, and other necessary details. For more information on creating and configuring your datasets, refer to the [Supported Datasets](#supported-datasets) section.
+Yes, you can use your own dataset with oriented bounding boxes for YOLO11 training. Ensure your dataset annotations are converted to the YOLO OBB format, which involves defining bounding boxes by their four corner points. You can then create a YAML configuration file specifying the dataset paths, classes, and other necessary details. For more information on creating and configuring your datasets, refer to the [Supported Datasets](#supported-datasets) section.
diff --git a/docs/en/datasets/pose/coco.md b/docs/en/datasets/pose/coco.md
index 20042b40e25..81226c77ae7 100644
--- a/docs/en/datasets/pose/coco.md
+++ b/docs/en/datasets/pose/coco.md
@@ -12,14 +12,7 @@ The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialize
## COCO-Pose Pretrained Models
-| Model | size (pixels) | mAPpose 50-95 | mAPpose 50 | Speed CPU ONNX (ms) | Speed A100 TensorRT (ms) | params (M) | FLOPs (B) |
-| ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 |
-| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 |
-| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 |
-| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 |
-| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 |
-| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 |
+{% include "macros/yolo-pose-perf.md" %}
## Key Features
@@ -31,8 +24,8 @@ The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialize
The COCO-Pose dataset is split into three subsets:
-1. **Train2017**: This subset contains a portion of the 118K images from the COCO dataset, annotated for training pose estimation models.
-2. **Val2017**: This subset has a selection of images used for validation purposes during model training.
+1. **Train2017**: This subset contains 56599 images from the COCO dataset, annotated for training pose estimation models.
+2. **Val2017**: This subset has 2346 images used for validation purposes during model training.
3. **Test2017**: This subset consists of images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7384) for performance evaluation.
## Applications
@@ -51,7 +44,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-pose model on the COCO-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -61,7 +54,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https://
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640)
@@ -71,7 +64,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https://
```bash
# Start training from a pretrained *.pt model
- yolo pose train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=coco-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -109,11 +102,11 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th
### What is the COCO-Pose dataset and how is it used with Ultralytics YOLO for pose estimation?
-The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset designed for pose estimation tasks. It builds upon the COCO Keypoints 2017 images and annotations, allowing for the training of models like Ultralytics YOLO for detailed pose estimation. For instance, you can use the COCO-Pose dataset to train a YOLOv8n-pose model by loading a pretrained model and training it with a YAML configuration. For training examples, refer to the [Training](../../modes/train.md) documentation.
+The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset designed for pose estimation tasks. It builds upon the COCO Keypoints 2017 images and annotations, allowing for the training of models like Ultralytics YOLO for detailed pose estimation. For instance, you can use the COCO-Pose dataset to train a YOLO11n-pose model by loading a pretrained model and training it with a YAML configuration. For training examples, refer to the [Training](../../modes/train.md) documentation.
-### How can I train a YOLOv8 model on the COCO-Pose dataset?
+### How can I train a YOLO11 model on the COCO-Pose dataset?
-Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using either Python or CLI commands. For example, to train a YOLOv8n-pose model for 100 epochs with an image size of 640, you can follow the steps below:
+Training a YOLO11 model on the COCO-Pose dataset can be accomplished using either Python or CLI commands. For example, to train a YOLO11n-pose model for 100 epochs with an image size of 640, you can follow the steps below:
!!! example "Train Example"
@@ -123,7 +116,7 @@ Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using eithe
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640)
@@ -133,27 +126,27 @@ Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using eithe
```bash
# Start training from a pretrained *.pt model
- yolo pose train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=coco-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
For more details on the training process and available arguments, check the [training page](../../modes/train.md).
### What are the different metrics provided by the COCO-Pose dataset for evaluating model performance?
-The COCO-Pose dataset provides several standardized evaluation metrics for pose estimation tasks, similar to the original COCO dataset. Key metrics include the Object Keypoint Similarity (OKS), which evaluates the [accuracy](https://www.ultralytics.com/glossary/accuracy) of predicted keypoints against ground truth annotations. These metrics allow for thorough performance comparisons between different models. For instance, the COCO-Pose pretrained models such as YOLOv8n-pose, YOLOv8s-pose, and others have specific performance metrics listed in the documentation, like mAPpose50-95 and mAPpose50.
+The COCO-Pose dataset provides several standardized evaluation metrics for pose estimation tasks, similar to the original COCO dataset. Key metrics include the Object Keypoint Similarity (OKS), which evaluates the [accuracy](https://www.ultralytics.com/glossary/accuracy) of predicted keypoints against ground truth annotations. These metrics allow for thorough performance comparisons between different models. For instance, the COCO-Pose pretrained models such as YOLO11n-pose, YOLO11s-pose, and others have specific performance metrics listed in the documentation, like mAPpose50-95 and mAPpose50.
### How is the dataset structured and split for the COCO-Pose dataset?
The COCO-Pose dataset is split into three subsets:
-1. **Train2017**: Contains a portion of the 118K COCO images, annotated for training pose estimation models.
-2. **Val2017**: Selected images for validation purposes during model training.
-3. **Test2017**: Images used for testing and benchmarking trained models. Ground truth annotations for this subset are not publicly available; results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7384) for performance evaluation.
+1. **Train2017**: Contains 56599 COCO images, annotated for training pose estimation models.
+2. **Val2017**: 2346 images for validation purposes during model training.
+3. **Test2017**: Images used for testing and benchmarking trained models. Ground truth annotations for this subset are not publicly available; results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7403) for performance evaluation.
These subsets help organize the training, validation, and testing phases effectively. For configuration details, explore the `coco-pose.yaml` file available on [GitHub](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml).
### What are the key features and applications of the COCO-Pose dataset?
-The COCO-Pose dataset extends the COCO Keypoints 2017 annotations to include 17 keypoints for human figures, enabling detailed pose estimation. Standardized evaluation metrics (e.g., OKS) facilitate comparisons across different models. Applications of the COCO-Pose dataset span various domains, such as sports analytics, healthcare, and human-computer interaction, wherever detailed pose estimation of human figures is required. For practical use, leveraging pretrained models like those provided in the documentation (e.g., YOLOv8n-pose) can significantly streamline the process ([Key Features](#key-features)).
+The COCO-Pose dataset extends the COCO Keypoints 2017 annotations to include 17 keypoints for human figures, enabling detailed pose estimation. Standardized evaluation metrics (e.g., OKS) facilitate comparisons across different models. Applications of the COCO-Pose dataset span various domains, such as sports analytics, healthcare, and human-computer interaction, wherever detailed pose estimation of human figures is required. For practical use, leveraging pretrained models like those provided in the documentation (e.g., YOLO11n-pose) can significantly streamline the process ([Key Features](#key-features)).
If you use the COCO-Pose dataset in your research or development work, please cite the paper with the following [BibTeX entry](#citations-and-acknowledgments).
diff --git a/docs/en/datasets/pose/coco8-pose.md b/docs/en/datasets/pose/coco8-pose.md
index 95157b794e6..ab24f862072 100644
--- a/docs/en/datasets/pose/coco8-pose.md
+++ b/docs/en/datasets/pose/coco8-pose.md
@@ -1,7 +1,7 @@
---
comments: true
-description: Explore the compact, versatile COCO8-Pose dataset for testing and debugging object detection models. Ideal for quick experiments with YOLOv8.
-keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOLOv8, machine learning, computer vision, training data
+description: Explore the compact, versatile COCO8-Pose dataset for testing and debugging object detection models. Ideal for quick experiments with YOLO11.
+keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOLO11, machine learning, computer vision, training data
---
# COCO8-Pose Dataset
@@ -10,7 +10,7 @@ keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOL
[Ultralytics](https://www.ultralytics.com/) COCO8-Pose is a small, but versatile pose detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging [object detection](https://www.ultralytics.com/glossary/object-detection) models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
-This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics).
+This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
## Dataset YAML
@@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-pose model on the COCO8-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -34,7 +34,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https:/
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640)
@@ -44,7 +44,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https:/
```bash
# Start training from a pretrained *.pt model
- yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -80,13 +80,13 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th
## FAQ
-### What is the COCO8-Pose dataset, and how is it used with Ultralytics YOLOv8?
+### What is the COCO8-Pose dataset, and how is it used with Ultralytics YOLO11?
-The COCO8-Pose dataset is a small, versatile pose detection dataset that includes the first 8 images from the COCO train 2017 set, with 4 images for training and 4 for validation. It's designed for testing and debugging object detection models and experimenting with new detection approaches. This dataset is ideal for quick experiments with [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/). For more details on dataset configuration, check out the dataset YAML file [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml).
+The COCO8-Pose dataset is a small, versatile pose detection dataset that includes the first 8 images from the COCO train 2017 set, with 4 images for training and 4 for validation. It's designed for testing and debugging object detection models and experimenting with new detection approaches. This dataset is ideal for quick experiments with [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolo11/). For more details on dataset configuration, check out the dataset YAML file [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml).
-### How do I train a YOLOv8 model using the COCO8-Pose dataset in Ultralytics?
+### How do I train a YOLO11 model using the COCO8-Pose dataset in Ultralytics?
-To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, follow these examples:
+To train a YOLO11n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, follow these examples:
!!! example "Train Example"
@@ -96,7 +96,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt")
+ model = YOLO("yolo11n-pose.pt")
# Train the model
results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640)
@@ -105,7 +105,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i
=== "CLI"
```bash
- yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page.
@@ -120,12 +120,12 @@ The COCO8-Pose dataset offers several benefits:
For more about its features and usage, see the [Dataset Introduction](#introduction) section.
-### How does mosaicing benefit the YOLOv8 training process using the COCO8-Pose dataset?
+### How does mosaicing benefit the YOLO11 training process using the COCO8-Pose dataset?
Mosaicing, demonstrated in the sample images of the COCO8-Pose dataset, combines multiple images into one, increasing the variety of objects and scenes within each training batch. This technique helps improve the model's ability to generalize across various object sizes, aspect ratios, and contexts, ultimately enhancing model performance. See the [Sample Images and Annotations](#sample-images-and-annotations) section for example images.
### Where can I find the COCO8-Pose dataset YAML file and how do I use it?
-The COCO8-Pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLOv8 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolov8-model-using-the-coco8-pose-dataset-in-ultralytics) section.
+The COCO8-Pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLO11 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolo11-model-using-the-coco8-pose-dataset-in-ultralytics) section.
For more FAQs and detailed documentation, visit the [Ultralytics Documentation](https://docs.ultralytics.com/).
diff --git a/docs/en/datasets/pose/dog-pose.md b/docs/en/datasets/pose/dog-pose.md
new file mode 100644
index 00000000000..fa6acb0755f
--- /dev/null
+++ b/docs/en/datasets/pose/dog-pose.md
@@ -0,0 +1,141 @@
+---
+comments: true
+description: Discover the Dog-Pose dataset for pose detection. Featuring 6,773 training and 1,703 test images, it's a robust dataset for training YOLO11 models.
+keywords: Dog-Pose, Ultralytics, pose detection dataset, YOLO11, machine learning, computer vision, training data
+---
+
+# Dog-Pose Dataset
+
+## Introduction
+
+The [Ultralytics](https://www.ultralytics.com/) Dog-pose dataset is a high-quality and extensive dataset specifically curated for dog keypoint estimation. With 6,773 training images and 1,703 test images, this dataset provides a solid foundation for training robust pose estimation models. Each annotated image includes 24 keypoints with 3 dimensions per keypoint (x, y, visibility), making it a valuable resource for advanced research and development in computer vision.
+
+
+
+This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
+
+## Dataset YAML
+
+A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It includes paths, keypoint details, and other relevant information. In the case of the Dog-pose dataset, The `dog-pose.yaml` is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml).
+
+!!! example "ultralytics/cfg/datasets/dog-pose.yaml"
+
+ ```yaml
+ --8<-- "ultralytics/cfg/datasets/dog-pose.yaml"
+ ```
+
+## Usage
+
+To train a YOLO11n-pose model on the Dog-pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+
+!!! example "Train Example"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a model
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
+
+ # Train the model
+ results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640)
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Start training from a pretrained *.pt model
+ yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
+ ```
+
+## Sample Images and Annotations
+
+Here are some examples of images from the Dog-pose dataset, along with their corresponding annotations:
+
+
+
+- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
+
+The example showcases the variety and complexity of the images in the Dog-pose dataset and the benefits of using mosaicing during the training process.
+
+## Citations and Acknowledgments
+
+If you use the Dog-pose dataset in your research or development work, please cite the following paper:
+
+!!! quote ""
+
+ === "BibTeX"
+
+ ```bibtex
+ @inproceedings{khosla2011fgvc,
+ title={Novel dataset for Fine-Grained Image Categorization},
+ author={Aditya Khosla and Nityananda Jayadevaprakash and Bangpeng Yao and Li Fei-Fei},
+ booktitle={First Workshop on Fine-Grained Visual Categorization (FGVC), IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year={2011}
+ }
+ @inproceedings{deng2009imagenet,
+ title={ImageNet: A Large-Scale Hierarchical Image Database},
+ author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
+ booktitle={IEEE Computer Vision and Pattern Recognition (CVPR)},
+ year={2009}
+ }
+ ```
+
+We would like to acknowledge the Stanford team for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the Dog-pose dataset and its creators, visit the [Stanford Dogs Dataset website](http://vision.stanford.edu/aditya86/ImageNetDogs/).
+
+## FAQ
+
+### What is the Dog-pose dataset, and how is it used with Ultralytics YOLO11?
+
+The Dog-Pose dataset features 6,000 images annotated with 17 keypoints for dog pose estimation. Ideal for training and validating models with [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolo11/), it supports applications like animal behavior analysis and veterinary studies.
+
+### How do I train a YOLO11 model using the Dog-pose dataset in Ultralytics?
+
+To train a YOLO11n-pose model on the Dog-pose dataset for 100 epochs with an image size of 640, follow these examples:
+
+!!! example "Train Example"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a model
+ model = YOLO("yolo11n-pose.pt")
+
+ # Train the model
+ results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640)
+ ```
+
+ === "CLI"
+
+ ```bash
+ yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
+ ```
+
+For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page.
+
+### What are the benefits of using the Dog-pose dataset?
+
+The Dog-pose dataset offers several benefits:
+
+**Large and Diverse Dataset**: With 6,000 images, it provides a substantial amount of data covering a wide range of dog poses, breeds, and contexts, enabling robust model training and evaluation.
+
+**Pose-specific Annotations**: Offers detailed annotations for pose estimation, ensuring high-quality data for training pose detection models.
+
+**Real-World Scenarios**: Includes images from varied environments, enhancing the model's ability to generalize to real-world applications.
+
+**Model Performance Improvement**: The diversity and scale of the dataset help improve model accuracy and robustness, particularly for tasks involving fine-grained pose estimation.
+
+For more about its features and usage, see the [Dataset Introduction](#introduction) section.
+
+### How does mosaicing benefit the YOLO11 training process using the Dog-pose dataset?
+
+Mosaicing, as illustrated in the sample images from the Dog-pose dataset, merges multiple images into a single composite, enriching the diversity of objects and scenes in each training batch. This approach enhances the model's capacity to generalize across different object sizes, aspect ratios, and contexts, leading to improved performance. For example images, refer to the [Sample Images and Annotations](#sample-images-and-annotations) section.
+
+### Where can I find the Dog-pose dataset YAML file and how do I use it?
+
+The Dog-pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLO11 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolo11-model-using-the-dog-pose-dataset-in-ultralytics) section.
+
+For more FAQs and detailed documentation, visit the [Ultralytics Documentation](https://docs.ultralytics.com/).
diff --git a/docs/en/datasets/pose/hand-keypoints.md b/docs/en/datasets/pose/hand-keypoints.md
index c14bd5c5302..559cdcec657 100644
--- a/docs/en/datasets/pose/hand-keypoints.md
+++ b/docs/en/datasets/pose/hand-keypoints.md
@@ -8,7 +8,18 @@ keywords: Hand KeyPoints, pose estimation, dataset, keypoints, MediaPipe, YOLO,
## Introduction
-The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high accuracy and consistency, and the dataset is compatible [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) formats.
+The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high [accuracy](https://www.ultralytics.com/glossary/accuracy) and consistency, and the dataset is compatible [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) formats.
+
+
+
+
+
+ Watch: Hand Keypoints Estimation with Ultralytics YOLO11 | Human Hand Pose Estimation Tutorial
+
## Hand Landmarks
@@ -30,7 +41,7 @@ Each hand has a total of 21 keypoints.
## Key Features
- **Large Dataset**: 26,768 images with hand keypoint annotations.
-- **YOLOv8 Compatibility**: Ready for use with YOLOv8 models.
+- **YOLO11 Compatibility**: Ready for use with YOLO11 models.
- **21 Keypoints**: Detailed hand pose representation.
## Dataset Structure
@@ -56,7 +67,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-pose model on the Hand Keypoints dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -66,7 +77,7 @@ To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](htt
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="hand-keypoints.yaml", epochs=100, imgsz=640)
@@ -76,7 +87,7 @@ To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](htt
```bash
# Start training from a pretrained *.pt model
- yolo pose train data=hand-keypoints.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=hand-keypoints.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -109,9 +120,9 @@ We would also like to acknowledge the creator of this dataset, [Rion Dsilva](htt
## FAQ
-### How do I train a YOLOv8 model on the Hand Keypoints dataset?
+### How do I train a YOLO11 model on the Hand Keypoints dataset?
-To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python or the command line interface (CLI). Here's an example for training a YOLOv8n-pose model for 100 epochs with an image size of 640:
+To train a YOLO11 model on the Hand Keypoints dataset, you can use either Python or the command line interface (CLI). Here's an example for training a YOLO11n-pose model for 100 epochs with an image size of 640:
!!! Example
@@ -121,7 +132,7 @@ To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="hand-keypoints.yaml", epochs=100, imgsz=640)
@@ -131,7 +142,7 @@ To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python
```bash
# Start training from a pretrained *.pt model
- yolo pose train data=hand-keypoints.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=hand-keypoints.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
@@ -141,7 +152,7 @@ For a comprehensive list of available arguments, refer to the model [Training](.
The Hand Keypoints dataset is designed for advanced pose estimation tasks and includes several key features:
- **Large Dataset**: Contains 26,768 images with hand keypoint annotations.
-- **YOLOv8 Compatibility**: Ready for use with YOLOv8 models.
+- **YOLO11 Compatibility**: Ready for use with YOLO11 models.
- **21 Keypoints**: Detailed hand pose representation, including wrist and finger joints.
For more details, you can explore the [Hand Keypoints Dataset](#introduction) section.
diff --git a/docs/en/datasets/pose/index.md b/docs/en/datasets/pose/index.md
index a8bfd222963..0473b310553 100644
--- a/docs/en/datasets/pose/index.md
+++ b/docs/en/datasets/pose/index.md
@@ -42,7 +42,7 @@ The Ultralytics framework uses a YAML file format to define the dataset and mode
```yaml
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco8-pose # dataset root dir
+path: ../datasets/coco8-pose # dataset root dir (absolute or relative; if relative, it's relative to default datasets_dir)
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
@@ -72,7 +72,7 @@ The `train` and `val` fields specify the paths to the directories containing the
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640)
@@ -82,7 +82,7 @@ The `train` and `val` fields specify the paths to the directories containing the
```bash
# Start training from a pretrained *.pt model
- yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
## Supported Datasets
@@ -127,6 +127,15 @@ This section outlines the datasets that are compatible with Ultralytics YOLO for
- **Usage**: Great for human hand pose estimation.
- [Read more about Hand Keypoints](hand-keypoints.md)
+### Dog-Pose
+
+- **Description**: The Dog Pose dataset contains approximately 6,000 images, providing a diverse and extensive resource for training and validation of dog pose estimation models.
+- **Label Format**: Follows the Ultralytics YOLO format, with annotations for multiple keypoints specific to dog anatomy.
+- **Number of Classes**: 1 (Dog).
+- **Keypoints**: Includes 24 keypoints tailored to dog poses, such as limbs, joints, and head positions.
+- **Usage**: Ideal for training models to estimate dog poses in various scenarios, from research to real-world applications.
+- [Read more about Dog-Pose](dog-pose.md)
+
### Adding your own dataset
If you have your own dataset and would like to use it for training pose estimation models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file.
@@ -171,7 +180,7 @@ To use the COCO-Pose dataset with Ultralytics YOLO:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n-pose.pt") # load pretrained model
+ model = YOLO("yolo11n-pose.pt") # load pretrained model
results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640)
```
@@ -188,7 +197,7 @@ To add your dataset:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n-pose.pt")
+ model = YOLO("yolo11n-pose.pt")
results = model.train(data="your-dataset.yaml", epochs=100, imgsz=640)
```
diff --git a/docs/en/datasets/pose/tiger-pose.md b/docs/en/datasets/pose/tiger-pose.md
index 06333b345b5..d97d9996640 100644
--- a/docs/en/datasets/pose/tiger-pose.md
+++ b/docs/en/datasets/pose/tiger-pose.md
@@ -1,7 +1,7 @@
---
comments: true
description: Explore Ultralytics Tiger-Pose dataset with 263 diverse images. Ideal for testing, training, and refining pose estimation algorithms.
-keywords: Ultralytics, Tiger-Pose, dataset, pose estimation, YOLOv8, training data, machine learning, neural networks
+keywords: Ultralytics, Tiger-Pose, dataset, pose estimation, YOLO11, training data, machine learning, neural networks
---
# Tiger-Pose Dataset
@@ -12,7 +12,7 @@ keywords: Ultralytics, Tiger-Pose, dataset, pose estimation, YOLOv8, training da
Despite its manageable size of 210 images, tiger-pose dataset offers diversity, making it suitable for assessing training pipelines, identifying potential errors, and serving as a valuable preliminary step before working with larger datasets for pose estimation.
-This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics).
+This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
@@ -22,7 +22,7 @@ This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics.
allowfullscreen>
- Watch: Train YOLOv8 Pose Model on Tiger-Pose Dataset Using Ultralytics HUB
+ Watch: Train YOLO11 Pose Model on Tiger-Pose Dataset Using Ultralytics HUB
## Dataset YAML
@@ -37,7 +37,7 @@ A YAML (Yet Another Markup Language) file serves as the means to specify the con
## Usage
-To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-pose model on the Tiger-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -47,7 +47,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 [epochs](https:/
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="tiger-pose.yaml", epochs=100, imgsz=640)
@@ -57,7 +57,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 [epochs](https:/
```bash
# Start training from a pretrained *.pt model
- yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo task=pose mode=train data=tiger-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -101,11 +101,11 @@ The dataset has been released available under the [AGPL-3.0 License](https://git
### What is the Ultralytics Tiger-Pose dataset used for?
-The Ultralytics Tiger-Pose dataset is designed for pose estimation tasks, consisting of 263 images sourced from a [YouTube video](https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUbVGlnZXIgd2Fsa2luZyByZWZlcmVuY2UubXA0). The dataset is divided into 210 training images and 53 validation images. It is particularly useful for testing, training, and refining pose estimation algorithms using [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics).
+The Ultralytics Tiger-Pose dataset is designed for pose estimation tasks, consisting of 263 images sourced from a [YouTube video](https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUbVGlnZXIgd2Fsa2luZyByZWZlcmVuY2UubXA0). The dataset is divided into 210 training images and 53 validation images. It is particularly useful for testing, training, and refining pose estimation algorithms using [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
-### How do I train a YOLOv8 model on the Tiger-Pose dataset?
+### How do I train a YOLO11 model on the Tiger-Pose dataset?
-To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an image size of 640, use the following code snippets. For more details, visit the [Training](../../modes/train.md) page:
+To train a YOLO11n-pose model on the Tiger-Pose dataset for 100 epochs with an image size of 640, use the following code snippets. For more details, visit the [Training](../../modes/train.md) page:
!!! example "Train Example"
@@ -115,7 +115,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="tiger-pose.yaml", epochs=100, imgsz=640)
@@ -126,16 +126,16 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i
```bash
# Start training from a pretrained *.pt model
- yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+ yolo task=pose mode=train data=tiger-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
### What configurations does the `tiger-pose.yaml` file include?
The `tiger-pose.yaml` file is used to specify the configuration details of the Tiger-Pose dataset. It includes crucial data such as file paths and class definitions. To see the exact configuration, you can check out the [Ultralytics Tiger-Pose Dataset Configuration File](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/tiger-pose.yaml).
-### How can I run inference using a YOLOv8 model trained on the Tiger-Pose dataset?
+### How can I run inference using a YOLO11 model trained on the Tiger-Pose dataset?
-To perform inference using a YOLOv8 model trained on the Tiger-Pose dataset, you can use the following code snippets. For a detailed guide, visit the [Prediction](../../modes/predict.md) page:
+To perform inference using a YOLO11 model trained on the Tiger-Pose dataset, you can use the following code snippets. For a detailed guide, visit the [Prediction](../../modes/predict.md) page:
!!! example "Inference Example"
@@ -161,4 +161,4 @@ To perform inference using a YOLOv8 model trained on the Tiger-Pose dataset, you
### What are the benefits of using the Tiger-Pose dataset for pose estimation?
-The Tiger-Pose dataset, despite its manageable size of 210 images for training, provides a diverse collection of images that are ideal for testing pose estimation pipelines. The dataset helps identify potential errors and acts as a preliminary step before working with larger datasets. Additionally, the dataset supports the training and refinement of pose estimation algorithms using advanced tools like [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics), enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy).
+The Tiger-Pose dataset, despite its manageable size of 210 images for training, provides a diverse collection of images that are ideal for testing pose estimation pipelines. The dataset helps identify potential errors and acts as a preliminary step before working with larger datasets. Additionally, the dataset supports the training and refinement of pose estimation algorithms using advanced tools like [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics), enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy).
diff --git a/docs/en/datasets/segment/carparts-seg.md b/docs/en/datasets/segment/carparts-seg.md
index b798cacad15..e3437c5107b 100644
--- a/docs/en/datasets/segment/carparts-seg.md
+++ b/docs/en/datasets/segment/carparts-seg.md
@@ -6,19 +6,21 @@ keywords: Carparts Segmentation Dataset, Roboflow, computer vision, automotive A
# Roboflow Universe Carparts Segmentation Dataset
+
+
The [Roboflow](https://roboflow.com/?ref=ultralytics) [Carparts Segmentation Dataset](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm?ref=ultralytics) is a curated collection of images and videos designed for [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications, specifically focusing on segmentation tasks related to car parts. This dataset provides a diverse set of visuals captured from multiple perspectives, offering valuable annotated examples for training and testing segmentation models.
Whether you're working on automotive research, developing AI solutions for vehicle maintenance, or exploring computer vision applications, the Carparts Segmentation Dataset serves as a valuable resource for enhancing accuracy and efficiency in your projects.
-
- Watch: Carparts [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation) Using Ultralytics HUB
+ Watch: Carparts Instance Segmentation with Ultralytics YOLO11
## Dataset Structure
@@ -45,7 +47,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train Ultralytics YOLO11n model on the Carparts Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -55,7 +57,7 @@ To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="carparts-seg.yaml", epochs=100, imgsz=640)
@@ -65,7 +67,7 @@ To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=carparts-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=carparts-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -108,9 +110,9 @@ We extend our thanks to the Roboflow team for their dedication in developing and
The [Roboflow Carparts Segmentation Dataset](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm?ref=ultralytics) is a curated collection of images and videos specifically designed for car part segmentation tasks in computer vision. This dataset includes a diverse range of visuals captured from multiple perspectives, making it an invaluable resource for training and testing segmentation models for automotive applications.
-### How can I use the Carparts Segmentation Dataset with Ultralytics YOLOv8?
+### How can I use the Carparts Segmentation Dataset with Ultralytics YOLO11?
-To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow these steps:
+To train a YOLO11 model on the Carparts Segmentation dataset, you can follow these steps:
!!! example "Train Example"
@@ -120,7 +122,7 @@ To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow the
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="carparts-seg.yaml", epochs=100, imgsz=640)
@@ -130,7 +132,7 @@ To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow the
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=carparts-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=carparts-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
For more details, refer to the [Training](../../modes/train.md) documentation.
diff --git a/docs/en/datasets/segment/coco.md b/docs/en/datasets/segment/coco.md
index 0f403c69afa..2dd8a0f53a4 100644
--- a/docs/en/datasets/segment/coco.md
+++ b/docs/en/datasets/segment/coco.md
@@ -1,7 +1,7 @@
---
comments: true
description: Explore the COCO-Seg dataset, an extension of COCO, with detailed segmentation annotations. Learn how to train YOLO models with COCO-Seg.
-keywords: COCO-Seg, dataset, YOLO models, instance segmentation, object detection, COCO dataset, YOLOv8, computer vision, Ultralytics, machine learning
+keywords: COCO-Seg, dataset, YOLO models, instance segmentation, object detection, COCO dataset, YOLO11, computer vision, Ultralytics, machine learning
---
# COCO-Seg Dataset
@@ -10,13 +10,7 @@ The [COCO-Seg](https://cocodataset.org/#home) dataset, an extension of the COCO
## COCO-Seg Pretrained Models
-| Model | size (pixels) | mAPbox 50-95 | mAPmask 50-95 | Speed CPU ONNX (ms) | Speed A100 TensorRT (ms) | params (M) | FLOPs (B) |
-| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 |
-| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 |
-| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 |
-| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 |
-| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 |
+{% include "macros/yolo-seg-perf.md" %}
## Key Features
@@ -49,7 +43,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-seg model on the COCO-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -59,17 +53,17 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 [epochs](https://ww
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
- results = model.train(data="coco-seg.yaml", epochs=100, imgsz=640)
+ results = model.train(data="coco.yaml", epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=coco.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -109,9 +103,9 @@ We extend our thanks to the COCO Consortium for creating and maintaining this in
The [COCO-Seg](https://cocodataset.org/#home) dataset is an extension of the original COCO (Common Objects in Context) dataset, specifically designed for instance segmentation tasks. While it uses the same images as the COCO dataset, COCO-Seg includes more detailed segmentation annotations, making it a powerful resource for researchers and developers focusing on object instance segmentation.
-### How can I train a YOLOv8 model using the COCO-Seg dataset?
+### How can I train a YOLO11 model using the COCO-Seg dataset?
-To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a detailed list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a detailed list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -121,17 +115,17 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
- results = model.train(data="coco-seg.yaml", epochs=100, imgsz=640)
+ results = model.train(data="coco.yaml", epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=coco.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
### What are the key features of the COCO-Seg dataset?
@@ -145,15 +139,9 @@ The COCO-Seg dataset includes several key features:
### What pretrained models are available for COCO-Seg, and what are their performance metrics?
-The COCO-Seg dataset supports multiple pretrained YOLOv8 segmentation models with varying performance metrics. Here's a summary of the available models and their key metrics:
+The COCO-Seg dataset supports multiple pretrained YOLO11 segmentation models with varying performance metrics. Here's a summary of the available models and their key metrics:
-| Model | size (pixels) | mAPbox 50-95 | mAPmask 50-95 | Speed CPU ONNX (ms) | Speed A100 TensorRT (ms) | params (M) | FLOPs (B) |
-| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
-| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 |
-| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 |
-| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 |
-| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 |
-| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 |
+{% include "macros/yolo-seg-perf.md" %}
### How is the COCO-Seg dataset structured and what subsets does it contain?
diff --git a/docs/en/datasets/segment/coco8-seg.md b/docs/en/datasets/segment/coco8-seg.md
index 21abf3d8029..3fc69745138 100644
--- a/docs/en/datasets/segment/coco8-seg.md
+++ b/docs/en/datasets/segment/coco8-seg.md
@@ -1,7 +1,7 @@
---
comments: true
description: Discover the versatile and manageable COCO8-Seg dataset by Ultralytics, ideal for testing and debugging segmentation models or new detection approaches.
-keywords: COCO8-Seg, Ultralytics, segmentation dataset, YOLOv8, COCO 2017, model training, computer vision, dataset configuration
+keywords: COCO8-Seg, Ultralytics, segmentation dataset, YOLO11, COCO 2017, model training, computer vision, dataset configuration
---
# COCO8-Seg Dataset
@@ -10,7 +10,7 @@ keywords: COCO8-Seg, Ultralytics, segmentation dataset, YOLOv8, COCO 2017, model
[Ultralytics](https://www.ultralytics.com/) COCO8-Seg is a small, but versatile [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging segmentation models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
-This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics).
+This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
## Dataset YAML
@@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train a YOLO11n-seg model on the COCO8-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -34,7 +34,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 [epochs](https://w
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640)
@@ -44,7 +44,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 [epochs](https://w
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
@@ -80,13 +80,13 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th
## FAQ
-### What is the COCO8-Seg dataset, and how is it used in Ultralytics YOLOv8?
+### What is the COCO8-Seg dataset, and how is it used in Ultralytics YOLO11?
-The **COCO8-Seg dataset** is a compact instance segmentation dataset by Ultralytics, consisting of the first 8 images from the COCO train 2017 setโ4 images for training and 4 for validation. This dataset is tailored for testing and debugging segmentation models or experimenting with new detection methods. It is particularly useful with Ultralytics [YOLOv8](https://github.com/ultralytics/ultralytics) and [HUB](https://hub.ultralytics.com/) for rapid iteration and pipeline error-checking before scaling to larger datasets. For detailed usage, refer to the model [Training](../../modes/train.md) page.
+The **COCO8-Seg dataset** is a compact instance segmentation dataset by Ultralytics, consisting of the first 8 images from the COCO train 2017 setโ4 images for training and 4 for validation. This dataset is tailored for testing and debugging segmentation models or experimenting with new detection methods. It is particularly useful with Ultralytics [YOLO11](https://github.com/ultralytics/ultralytics) and [HUB](https://hub.ultralytics.com/) for rapid iteration and pipeline error-checking before scaling to larger datasets. For detailed usage, refer to the model [Training](../../modes/train.md) page.
-### How can I train a YOLOv8n-seg model using the COCO8-Seg dataset?
+### How can I train a YOLO11n-seg model using the COCO8-Seg dataset?
-To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use Python or CLI commands. Here's a quick example:
+To train a **YOLO11n-seg** model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use Python or CLI commands. Here's a quick example:
!!! example "Train Example"
@@ -96,7 +96,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # Load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # Load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640)
@@ -106,7 +106,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
For a thorough explanation of available arguments and configuration options, you can check the [Training](../../modes/train.md) documentation.
diff --git a/docs/en/datasets/segment/crack-seg.md b/docs/en/datasets/segment/crack-seg.md
index f5ffbe92e0a..1526fa5e90a 100644
--- a/docs/en/datasets/segment/crack-seg.md
+++ b/docs/en/datasets/segment/crack-seg.md
@@ -34,7 +34,7 @@ A YAML (Yet Another Markup Language) file is employed to outline the configurati
## Usage
-To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train Ultralytics YOLO11n model on the Crack Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -44,7 +44,7 @@ To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 [ep
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="crack-seg.yaml", epochs=100, imgsz=640)
@@ -54,7 +54,7 @@ To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 [ep
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=crack-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=crack-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -98,9 +98,9 @@ We would like to acknowledge the Roboflow team for creating and maintaining the
The [Roboflow Crack Segmentation Dataset](https://universe.roboflow.com/university-bswxt/crack-bphdr?ref=ultralytics) is a comprehensive collection of 4029 static images designed specifically for transportation and public safety studies. It is ideal for tasks such as self-driving car model development and infrastructure maintenance. The dataset includes training, testing, and validation sets, aiding in accurate crack detection and segmentation.
-### How do I train a model using the Crack Segmentation Dataset with Ultralytics YOLOv8?
+### How do I train a model using the Crack Segmentation Dataset with Ultralytics YOLO11?
-To train an Ultralytics YOLOv8 model on the Crack Segmentation dataset, use the following code snippets. Detailed instructions and further parameters can be found on the model [Training](../../modes/train.md) page.
+To train an Ultralytics YOLO11 model on the Crack Segmentation dataset, use the following code snippets. Detailed instructions and further parameters can be found on the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -110,7 +110,7 @@ To train an Ultralytics YOLOv8 model on the Crack Segmentation dataset, use the
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="crack-seg.yaml", epochs=100, imgsz=640)
@@ -120,7 +120,7 @@ To train an Ultralytics YOLOv8 model on the Crack Segmentation dataset, use the
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=crack-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=crack-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
### Why should I use the Crack Segmentation Dataset for my self-driving car project?
diff --git a/docs/en/datasets/segment/index.md b/docs/en/datasets/segment/index.md
index 52b19781640..9f88aea3c48 100644
--- a/docs/en/datasets/segment/index.md
+++ b/docs/en/datasets/segment/index.md
@@ -44,7 +44,7 @@ The Ultralytics framework uses a YAML file format to define the dataset and mode
```yaml
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco8-seg # dataset root dir
+path: ../datasets/coco8-seg # dataset root dir (absolute or relative; if relative, it's relative to default datasets_dir)
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)
@@ -74,7 +74,7 @@ The `train` and `val` fields specify the paths to the directories containing the
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640)
@@ -84,7 +84,7 @@ The `train` and `val` fields specify the paths to the directories containing the
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
## Supported Datasets
@@ -137,18 +137,12 @@ To auto-annotate your dataset using the Ultralytics framework, you can use the `
```python
from ultralytics.data.annotator import auto_annotate
- auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model="sam_b.pt")
+ auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="sam_b.pt")
```
-| Argument | Type | Description | Default |
-| ------------ | ----------------------- | ----------------------------------------------------------------------------------------------------------- | -------------- |
-| `data` | `str` | Path to a folder containing images to be annotated. | `None` |
-| `det_model` | `str, optional` | Pre-trained YOLO detection model. Defaults to `'yolov8x.pt'`. | `'yolov8x.pt'` |
-| `sam_model` | `str, optional` | Pre-trained SAM segmentation model. Defaults to `'sam_b.pt'`. | `'sam_b.pt'` |
-| `device` | `str, optional` | Device to run the models on. Defaults to an empty string (CPU or GPU, if available). | `''` |
-| `output_dir` | `str or None, optional` | Directory to save the annotated results. Defaults to a `'labels'` folder in the same directory as `'data'`. | `None` |
+{% include "macros/sam-auto-annotate.md" %}
-The `auto_annotate` function takes the path to your images, along with optional arguments for specifying the pre-trained detection and [SAM segmentation models](../../models/sam.md), the device to run the models on, and the output directory for saving the annotated results.
+The `auto_annotate` function takes the path to your images, along with optional arguments for specifying the pre-trained detection models i.e. [YOLO11](../../models/yolo11.md), [YOLOv8](../../models/yolov8.md) or other [models](../../models/index.md) and segmentation models i.e, [SAM](../../models/sam.md), [SAM2](../../models/sam-2.md) or [MobileSAM](../../models/mobile-sam.md), the device to run the models on, and the output directory for saving the annotated results.
By leveraging the power of pre-trained models, auto-annotation can significantly reduce the time and effort required for creating high-quality segmentation datasets. This feature is particularly useful for researchers and developers working with large image collections, as it allows them to focus on model development and evaluation rather than manual annotation.
@@ -195,7 +189,7 @@ Auto-annotation in Ultralytics YOLO allows you to generate segmentation annotati
```python
from ultralytics.data.annotator import auto_annotate
-auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model="sam_b.pt")
+auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="sam_b.pt") # or sam_model="mobile_sam.pt"
```
-This function automates the annotation process, making it faster and more efficient. For more details, explore the [Auto-Annotation](#auto-annotation) section.
+This function automates the annotation process, making it faster and more efficient. For more details, explore the [Auto-Annotate Reference](https://docs.ultralytics.com/reference/data/annotator/#ultralytics.data.annotator.auto_annotate).
diff --git a/docs/en/datasets/segment/package-seg.md b/docs/en/datasets/segment/package-seg.md
index 477072fb574..bd6446983c5 100644
--- a/docs/en/datasets/segment/package-seg.md
+++ b/docs/en/datasets/segment/package-seg.md
@@ -6,6 +6,8 @@ keywords: Roboflow, Package Segmentation Dataset, computer vision, package ident
# Roboflow Universe Package Segmentation Dataset
+
+
The [Roboflow](https://roboflow.com/?ref=ultralytics) [Package Segmentation Dataset](https://universe.roboflow.com/factorypackage/factory_package?ref=ultralytics) is a curated collection of images specifically tailored for tasks related to package segmentation in the field of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This dataset is designed to assist researchers, developers, and enthusiasts working on projects related to package identification, sorting, and handling.
Containing a diverse set of images showcasing various packages in different contexts and environments, the dataset serves as a valuable resource for training and evaluating segmentation models. Whether you are engaged in logistics, warehouse automation, or any application requiring precise package analysis, the Package Segmentation Dataset provides a targeted and comprehensive set of images to enhance the performance of your computer vision algorithms.
@@ -34,7 +36,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
-To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+To train Ultralytics YOLO11n model on the Package Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@@ -44,7 +46,7 @@ To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 [
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="package-seg.yaml", epochs=100, imgsz=640)
@@ -54,7 +56,7 @@ To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 [
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=package-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
@@ -97,9 +99,9 @@ We express our gratitude to the Roboflow team for their efforts in creating and
The [Roboflow Package Segmentation Dataset](https://universe.roboflow.com/factorypackage/factory_package?ref=ultralytics) is a curated collection of images tailored for tasks involving package segmentation. It includes diverse images of packages in various contexts, making it invaluable for training and evaluating segmentation models. This dataset is particularly useful for applications in logistics, warehouse automation, and any project requiring precise package analysis. It helps optimize logistics and enhance vision models for accurate package identification and sorting.
-### How do I train an Ultralytics YOLOv8 model on the Package Segmentation Dataset?
+### How do I train an Ultralytics YOLO11 model on the Package Segmentation Dataset?
-You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Use the snippets below:
+You can train an Ultralytics YOLO11n model using both Python and CLI methods. Use the snippets below:
!!! example "Train Example"
@@ -109,7 +111,7 @@ You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Us
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt") # load a pretrained model
+ model = YOLO("yolo11n-seg.pt") # load a pretrained model
# Train the model
results = model.train(data="package-seg.yaml", epochs=100, imgsz=640)
@@ -119,7 +121,7 @@ You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Us
```bash
# Start training from a pretrained *.pt model
- yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+ yolo segment train data=package-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
```
Refer to the model [Training](../../modes/train.md) page for more details.
@@ -134,9 +136,9 @@ The dataset is structured into three main components:
This structure ensures a balanced dataset for thorough model training, validation, and testing, enhancing the performance of segmentation algorithms.
-### Why should I use Ultralytics YOLOv8 with the Package Segmentation Dataset?
+### Why should I use Ultralytics YOLO11 with the Package Segmentation Dataset?
-Ultralytics YOLOv8 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed for real-time object detection and segmentation tasks. Using it with the Package Segmentation Dataset allows you to leverage YOLOv8's capabilities for precise package segmentation. This combination is especially beneficial for industries like logistics and warehouse automation, where accurate package identification is critical. For more information, check out our [page on YOLOv8 segmentation](https://docs.ultralytics.com/models/yolov8/).
+Ultralytics YOLO11 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed for real-time object detection and segmentation tasks. Using it with the Package Segmentation Dataset allows you to leverage YOLO11's capabilities for precise package segmentation. This combination is especially beneficial for industries like logistics and warehouse automation, where accurate package identification is critical. For more information, check out our [page on YOLO11 segmentation](https://docs.ultralytics.com/models/yolo11/).
### How can I access and use the package-seg.yaml file for the Package Segmentation Dataset?
diff --git a/docs/en/datasets/track/index.md b/docs/en/datasets/track/index.md
index f9a8b4f81b5..0aa3d8c50bf 100644
--- a/docs/en/datasets/track/index.md
+++ b/docs/en/datasets/track/index.md
@@ -19,14 +19,14 @@ Multi-Object Detector doesn't need standalone training and directly supports pre
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True)
```
=== "CLI"
```bash
- yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show
+ yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show
```
## FAQ
@@ -42,17 +42,17 @@ To use Multi-Object Tracking with Ultralytics YOLO, you can start by using the P
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt") # Load the YOLOv8 model
+ model = YOLO("yolo11n.pt") # Load the YOLO11 model
results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True)
```
=== "CLI"
```bash
- yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3 iou=0.5 show
+ yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3 iou=0.5 show
```
-These commands load the YOLOv8 model and use it for tracking objects in the given video source with specific confidence (`conf`) and [Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (`iou`) thresholds. For more details, refer to the [track mode documentation](../../modes/track.md).
+These commands load the YOLO11 model and use it for tracking objects in the given video source with specific confidence (`conf`) and [Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (`iou`) thresholds. For more details, refer to the [track mode documentation](../../modes/track.md).
### What are the upcoming features for training trackers in Ultralytics?
diff --git a/docs/en/guides/analytics.md b/docs/en/guides/analytics.md
index 08710f522c5..cd7fc40dcfc 100644
--- a/docs/en/guides/analytics.md
+++ b/docs/en/guides/analytics.md
@@ -1,10 +1,10 @@
---
comments: true
description: Learn to create line graphs, bar plots, and pie charts using Python with guided instructions and code snippets. Maximize your data visualization skills!.
-keywords: Ultralytics, YOLOv8, data visualization, line graphs, bar plots, pie charts, Python, analytics, tutorial, guide
+keywords: Ultralytics, YOLO11, data visualization, line graphs, bar plots, pie charts, Python, analytics, tutorial, guide
---
-# Analytics using Ultralytics YOLOv8
+# Analytics using Ultralytics YOLO11
## Introduction
@@ -33,263 +33,61 @@ This guide provides a comprehensive overview of three fundamental types of [data
- Bar plots, on the other hand, are suitable for comparing quantities across different categories and showing relationships between a category and its numerical value.
- Lastly, pie charts are effective for illustrating proportions among categories and showing parts of a whole.
-!!! analytics "Analytics Examples"
+!!! example "Analytics Examples"
- === "Line Graph"
+ === "CLI"
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8s.pt")
+ ```bash
+ yolo solutions analytics show=True
- cap = cv2.VideoCapture("Path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+ # pass the source
+ yolo solutions analytics source="path/to/video/file.mp4"
- out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
+ # generate the pie chart
+ yolo solutions analytics analytics_type="pie" show=True
- analytics = solutions.Analytics(
- type="line",
- writer=out,
- im0_shape=(w, h),
- view_img=True,
- )
- total_counts = 0
- frame_count = 0
-
- while cap.isOpened():
- success, frame = cap.read()
-
- if success:
- frame_count += 1
- results = model.track(frame, persist=True, verbose=True)
+ # generate the bar plots
+ yolo solutions analytics analytics_type="bar" show=True
- if results[0].boxes.id is not None:
- boxes = results[0].boxes.xyxy.cpu()
- for box in boxes:
- total_counts += 1
-
- analytics.update_line(frame_count, total_counts)
-
- total_counts = 0
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
- else:
- break
-
- cap.release()
- out.release()
- cv2.destroyAllWindows()
+ # generate the area plots
+ yolo solutions analytics analytics_type="area" show=True
```
- === "Multiple Lines"
+ === "Python"
```python
import cv2
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8s.pt")
+ from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
- out = cv2.VideoWriter("multiple_line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
- analytics = solutions.Analytics(
- type="line",
- writer=out,
- im0_shape=(w, h),
- view_img=True,
- max_points=200,
+ # Video writer
+ out = cv2.VideoWriter(
+ "ultralytics_analytics.avi",
+ cv2.VideoWriter_fourcc(*"MJPG"),
+ fps,
+ (1920, 1080), # This is fixed
)
- frame_count = 0
- data = {}
- labels = []
-
- while cap.isOpened():
- success, frame = cap.read()
-
- if success:
- frame_count += 1
-
- results = model.track(frame, persist=True)
-
- if results[0].boxes.id is not None:
- boxes = results[0].boxes.xyxy.cpu()
- track_ids = results[0].boxes.id.int().cpu().tolist()
- clss = results[0].boxes.cls.cpu().tolist()
-
- for box, track_id, cls in zip(boxes, track_ids, clss):
- # Store each class label
- if model.names[int(cls)] not in labels:
- labels.append(model.names[int(cls)])
-
- # Store each class count
- if model.names[int(cls)] in data:
- data[model.names[int(cls)]] += 1
- else:
- data[model.names[int(cls)]] = 0
-
- # update lines every frame
- analytics.update_multiple_lines(data, labels, frame_count)
- data = {} # clear the data list for next frame
- else:
- break
-
- cap.release()
- out.release()
- cv2.destroyAllWindows()
- ```
-
- === "Pie Chart"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8s.pt")
-
- cap = cv2.VideoCapture("Path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- out = cv2.VideoWriter("pie_chart.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
-
+ # Init analytics
analytics = solutions.Analytics(
- type="pie",
- writer=out,
- im0_shape=(w, h),
- view_img=True,
+ show=True, # Display the output
+ analytics_type="line", # Pass the analytics type, could be "pie", "bar" or "area".
+ model="yolo11n.pt", # Path to the YOLO11 model file
+ # classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model.
)
- clswise_count = {}
-
- while cap.isOpened():
- success, frame = cap.read()
- if success:
- results = model.track(frame, persist=True, verbose=True)
- if results[0].boxes.id is not None:
- boxes = results[0].boxes.xyxy.cpu()
- clss = results[0].boxes.cls.cpu().tolist()
- for box, cls in zip(boxes, clss):
- if model.names[int(cls)] in clswise_count:
- clswise_count[model.names[int(cls)]] += 1
- else:
- clswise_count[model.names[int(cls)]] = 1
-
- analytics.update_pie(clswise_count)
- clswise_count = {}
-
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
- else:
- break
-
- cap.release()
- out.release()
- cv2.destroyAllWindows()
- ```
-
- === "Bar Plot"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8s.pt")
-
- cap = cv2.VideoCapture("Path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- out = cv2.VideoWriter("bar_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
-
- analytics = solutions.Analytics(
- type="bar",
- writer=out,
- im0_shape=(w, h),
- view_img=True,
- )
-
- clswise_count = {}
-
- while cap.isOpened():
- success, frame = cap.read()
- if success:
- results = model.track(frame, persist=True, verbose=True)
- if results[0].boxes.id is not None:
- boxes = results[0].boxes.xyxy.cpu()
- clss = results[0].boxes.cls.cpu().tolist()
- for box, cls in zip(boxes, clss):
- if model.names[int(cls)] in clswise_count:
- clswise_count[model.names[int(cls)]] += 1
- else:
- clswise_count[model.names[int(cls)]] = 1
-
- analytics.update_bar(clswise_count)
- clswise_count = {}
-
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
- else:
- break
-
- cap.release()
- out.release()
- cv2.destroyAllWindows()
- ```
-
- === "Area chart"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8s.pt")
-
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- out = cv2.VideoWriter("area_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
-
- analytics = solutions.Analytics(
- type="area",
- writer=out,
- im0_shape=(w, h),
- view_img=True,
- )
-
- clswise_count = {}
+ # Process video
frame_count = 0
-
while cap.isOpened():
- success, frame = cap.read()
+ success, im0 = cap.read()
if success:
frame_count += 1
- results = model.track(frame, persist=True, verbose=True)
-
- if results[0].boxes.id is not None:
- boxes = results[0].boxes.xyxy.cpu()
- clss = results[0].boxes.cls.cpu().tolist()
-
- for box, cls in zip(boxes, clss):
- if model.names[int(cls)] in clswise_count:
- clswise_count[model.names[int(cls)]] += 1
- else:
- clswise_count[model.names[int(cls)]] = 1
-
- analytics.update_area(frame_count, clswise_count)
- clswise_count = {}
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
+ im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
+ out.write(im0) # write the video file
else:
break
@@ -302,23 +100,12 @@ This guide provides a comprehensive overview of three fundamental types of [data
Here's a table with the `Analytics` arguments:
-| Name | Type | Default | Description |
-| -------------- | ----------------- | ------------- | -------------------------------------------------------------------------------- |
-| `type` | `str` | `None` | Type of data or object. |
-| `im0_shape` | `tuple` | `None` | Shape of the initial image. |
-| `writer` | `cv2.VideoWriter` | `None` | Object for writing video files. |
-| `title` | `str` | `ultralytics` | Title for the visualization. |
-| `x_label` | `str` | `x` | Label for the x-axis. |
-| `y_label` | `str` | `y` | Label for the y-axis. |
-| `bg_color` | `str` | `white` | Background color. |
-| `fg_color` | `str` | `black` | Foreground color. |
-| `line_color` | `str` | `yellow` | Color of the lines. |
-| `line_width` | `int` | `2` | Width of the lines. |
-| `fontsize` | `int` | `13` | Font size for text. |
-| `view_img` | `bool` | `False` | Flag to display the image or video. |
-| `save_img` | `bool` | `True` | Flag to save the image or video. |
-| `max_points` | `int` | `50` | For multiple lines, total points drawn on frame, before deleting initial points. |
-| `points_width` | `int` | `15` | Width of line points highlighter. |
+| Name | Type | Default | Description |
+| ---------------- | ------ | ------- | ---------------------------------------------------- |
+| `analytics_type` | `str` | `line` | Type of graph i.e "line", "bar", "area", "pie" |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
### Arguments `model.track`
@@ -330,11 +117,11 @@ Understanding when and how to use different types of visualizations is crucial f
## FAQ
-### How do I create a line graph using Ultralytics YOLOv8 Analytics?
+### How do I create a line graph using Ultralytics YOLO11 Analytics?
-To create a line graph using Ultralytics YOLOv8 Analytics, follow these steps:
+To create a line graph using Ultralytics YOLO11 Analytics, follow these steps:
-1. Load a YOLOv8 model and open your video file.
+1. Load a YOLO11 model and open your video file.
2. Initialize the `Analytics` class with the type set to "line."
3. Iterate through video frames, updating the line graph with relevant data, such as object counts per frame.
4. Save the output video displaying the line graph.
@@ -344,21 +131,33 @@ Example:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8s.pt")
cap = cv2.VideoCapture("Path/to/video/file.mp4")
-out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
+assert cap.isOpened(), "Error reading video file"
+
+w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-analytics = solutions.Analytics(type="line", writer=out, im0_shape=(w, h), view_img=True)
+out = cv2.VideoWriter(
+ "ultralytics_analytics.avi",
+ cv2.VideoWriter_fourcc(*"MJPG"),
+ fps,
+ (1920, 1080), # This is fixed
+)
+analytics = solutions.Analytics(
+ analytics_type="line",
+ show=True,
+)
+
+frame_count = 0
while cap.isOpened():
- success, frame = cap.read()
+ success, im0 = cap.read()
if success:
- results = model.track(frame, persist=True)
- total_counts = sum([1 for box in results[0].boxes.xyxy])
- analytics.update_line(frame_count, total_counts)
- if cv2.waitKey(1) & 0xFF == ord("q"):
+ frame_count += 1
+ im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
+ out.write(im0) # write the video file
+ else:
break
cap.release()
@@ -366,11 +165,11 @@ out.release()
cv2.destroyAllWindows()
```
-For further details on configuring the `Analytics` class, visit the [Analytics using Ultralytics YOLOv8 ๐](#analytics-using-ultralytics-yolov8) section.
+For further details on configuring the `Analytics` class, visit the [Analytics using Ultralytics YOLO11 ๐](#analytics-using-ultralytics-yolo11) section.
-### What are the benefits of using Ultralytics YOLOv8 for creating bar plots?
+### What are the benefits of using Ultralytics YOLO11 for creating bar plots?
-Using Ultralytics YOLOv8 for creating bar plots offers several benefits:
+Using Ultralytics YOLO11 for creating bar plots offers several benefits:
1. **Real-time Data Visualization**: Seamlessly integrate [object detection](https://www.ultralytics.com/glossary/object-detection) results into bar plots for dynamic updates.
2. **Ease of Use**: Simple API and functions make it straightforward to implement and visualize data.
@@ -382,24 +181,33 @@ Use the following example to generate a bar plot:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8s.pt")
cap = cv2.VideoCapture("Path/to/video/file.mp4")
-out = cv2.VideoWriter("bar_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
+assert cap.isOpened(), "Error reading video file"
+
+w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-analytics = solutions.Analytics(type="bar", writer=out, im0_shape=(w, h), view_img=True)
+out = cv2.VideoWriter(
+ "ultralytics_analytics.avi",
+ cv2.VideoWriter_fourcc(*"MJPG"),
+ fps,
+ (1920, 1080), # This is fixed
+)
+analytics = solutions.Analytics(
+ analytics_type="bar",
+ show=True,
+)
+
+frame_count = 0
while cap.isOpened():
- success, frame = cap.read()
+ success, im0 = cap.read()
if success:
- results = model.track(frame, persist=True)
- clswise_count = {
- model.names[int(cls)]: boxes.size(0)
- for cls, boxes in zip(results[0].boxes.cls.tolist(), results[0].boxes.xyxy)
- }
- analytics.update_bar(clswise_count)
- if cv2.waitKey(1) & 0xFF == ord("q"):
+ frame_count += 1
+ im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
+ out.write(im0) # write the video file
+ else:
break
cap.release()
@@ -409,9 +217,9 @@ cv2.destroyAllWindows()
To learn more, visit the [Bar Plot](#visual-samples) section in the guide.
-### Why should I use Ultralytics YOLOv8 for creating pie charts in my data visualization projects?
+### Why should I use Ultralytics YOLO11 for creating pie charts in my data visualization projects?
-Ultralytics YOLOv8 is an excellent choice for creating pie charts because:
+Ultralytics YOLO11 is an excellent choice for creating pie charts because:
1. **Integration with Object Detection**: Directly integrate object detection results into pie charts for immediate insights.
2. **User-Friendly API**: Simple to set up and use with minimal code.
@@ -423,24 +231,33 @@ Here's a quick example:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8s.pt")
cap = cv2.VideoCapture("Path/to/video/file.mp4")
-out = cv2.VideoWriter("pie_chart.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
+assert cap.isOpened(), "Error reading video file"
+
+w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-analytics = solutions.Analytics(type="pie", writer=out, im0_shape=(w, h), view_img=True)
+out = cv2.VideoWriter(
+ "ultralytics_analytics.avi",
+ cv2.VideoWriter_fourcc(*"MJPG"),
+ fps,
+ (1920, 1080), # This is fixed
+)
+analytics = solutions.Analytics(
+ analytics_type="pie",
+ show=True,
+)
+
+frame_count = 0
while cap.isOpened():
- success, frame = cap.read()
+ success, im0 = cap.read()
if success:
- results = model.track(frame, persist=True)
- clswise_count = {
- model.names[int(cls)]: boxes.size(0)
- for cls, boxes in zip(results[0].boxes.cls.tolist(), results[0].boxes.xyxy)
- }
- analytics.update_pie(clswise_count)
- if cv2.waitKey(1) & 0xFF == ord("q"):
+ frame_count += 1
+ im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
+ out.write(im0) # write the video file
+ else:
break
cap.release()
@@ -450,30 +267,42 @@ cv2.destroyAllWindows()
For more information, refer to the [Pie Chart](#visual-samples) section in the guide.
-### Can Ultralytics YOLOv8 be used to track objects and dynamically update visualizations?
+### Can Ultralytics YOLO11 be used to track objects and dynamically update visualizations?
-Yes, Ultralytics YOLOv8 can be used to track objects and dynamically update visualizations. It supports tracking multiple objects in real-time and can update various visualizations like line graphs, bar plots, and pie charts based on the tracked objects' data.
+Yes, Ultralytics YOLO11 can be used to track objects and dynamically update visualizations. It supports tracking multiple objects in real-time and can update various visualizations like line graphs, bar plots, and pie charts based on the tracked objects' data.
Example for tracking and updating a line graph:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8s.pt")
cap = cv2.VideoCapture("Path/to/video/file.mp4")
-out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h))
+assert cap.isOpened(), "Error reading video file"
+
+w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+
+out = cv2.VideoWriter(
+ "ultralytics_analytics.avi",
+ cv2.VideoWriter_fourcc(*"MJPG"),
+ fps,
+ (1920, 1080), # This is fixed
+)
-analytics = solutions.Analytics(type="line", writer=out, im0_shape=(w, h), view_img=True)
+analytics = solutions.Analytics(
+ analytics_type="line",
+ show=True,
+)
+frame_count = 0
while cap.isOpened():
- success, frame = cap.read()
+ success, im0 = cap.read()
if success:
- results = model.track(frame, persist=True)
- total_counts = sum([1 for box in results[0].boxes.xyxy])
- analytics.update_line(frame_count, total_counts)
- if cv2.waitKey(1) & 0xFF == ord("q"):
+ frame_count += 1
+ im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
+ out.write(im0) # write the video file
+ else:
break
cap.release()
@@ -483,11 +312,11 @@ cv2.destroyAllWindows()
To learn about the complete functionality, see the [Tracking](../modes/track.md) section.
-### What makes Ultralytics YOLOv8 different from other object detection solutions like [OpenCV](https://www.ultralytics.com/glossary/opencv) and [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)?
+### What makes Ultralytics YOLO11 different from other object detection solutions like [OpenCV](https://www.ultralytics.com/glossary/opencv) and [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)?
-Ultralytics YOLOv8 stands out from other object detection solutions like OpenCV and TensorFlow for multiple reasons:
+Ultralytics YOLO11 stands out from other object detection solutions like OpenCV and TensorFlow for multiple reasons:
-1. **State-of-the-art [Accuracy](https://www.ultralytics.com/glossary/accuracy)**: YOLOv8 provides superior accuracy in object detection, segmentation, and classification tasks.
+1. **State-of-the-art [Accuracy](https://www.ultralytics.com/glossary/accuracy)**: YOLO11 provides superior accuracy in object detection, segmentation, and classification tasks.
2. **Ease of Use**: User-friendly API allows for quick implementation and integration without extensive coding.
3. **Real-time Performance**: Optimized for high-speed inference, suitable for real-time applications.
4. **Diverse Applications**: Supports various tasks including multi-object tracking, custom model training, and exporting to different formats like ONNX, TensorRT, and CoreML.
diff --git a/docs/en/guides/azureml-quickstart.md b/docs/en/guides/azureml-quickstart.md
index a769eee10d2..d13f9812f74 100644
--- a/docs/en/guides/azureml-quickstart.md
+++ b/docs/en/guides/azureml-quickstart.md
@@ -1,10 +1,10 @@
---
comments: true
-description: Learn how to run YOLOv8 on AzureML. Quickstart instructions for terminal and notebooks to harness Azure's cloud computing for efficient model training.
-keywords: YOLOv8, AzureML, machine learning, cloud computing, quickstart, terminal, notebooks, model training, Python SDK, AI, Ultralytics
+description: Learn how to run YOLO11 on AzureML. Quickstart instructions for terminal and notebooks to harness Azure's cloud computing for efficient model training.
+keywords: YOLO11, AzureML, machine learning, cloud computing, quickstart, terminal, notebooks, model training, Python SDK, AI, Ultralytics
---
-# YOLOv8 ๐ on AzureML
+# YOLO11 ๐ on AzureML
## What is Azure?
@@ -22,7 +22,7 @@ For users of YOLO (You Only Look Once), AzureML provides a robust, scalable, and
- Utilize built-in tools for data preprocessing, feature selection, and model training.
- Collaborate more efficiently with capabilities for MLOps (Machine Learning Operations), including but not limited to monitoring, auditing, and versioning of models and data.
-In the subsequent sections, you will find a quickstart guide detailing how to run YOLOv8 object detection models using AzureML, either from a compute terminal or a notebook.
+In the subsequent sections, you will find a quickstart guide detailing how to run YOLO11 object detection models using AzureML, either from a compute terminal or a notebook.
## Prerequisites
@@ -46,11 +46,12 @@ Start your compute and open a Terminal:
### Create virtualenv
-Create your conda virtualenv and install pip in it:
+Create your conda virtualenv with your favorite python version and install pip in it:
+Python 3.13.1 is having some issues with some dependencies in AzureML.
```bash
-conda create --name yolov8env -y
-conda activate yolov8env
+conda create --name yolo11env -y python=3.12
+conda activate yolo11env
conda install pip -y
```
@@ -63,18 +64,18 @@ pip install ultralytics
pip install onnx>=1.12.0
```
-### Perform YOLOv8 tasks
+### Perform YOLO11 tasks
Predict:
```bash
-yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg'
+yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg'
```
Train a detection model for 10 [epochs](https://www.ultralytics.com/glossary/epoch) with an initial learning_rate of 0.01:
```bash
-yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01
+yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01
```
You can find more [instructions to use the Ultralytics CLI here](../quickstart.md#use-ultralytics-with-cli).
@@ -89,14 +90,14 @@ Open the compute Terminal.
-From your compute terminal, you need to create a new ipykernel that will be used by your notebook to manage your dependencies:
+From your compute terminal, you need to create a new ipykernel (with a specific python version - because Python 3.13.1 is having some issues with some dependencies in AzureML) that will be used by your notebook to manage your dependencies:
```bash
-conda create --name yolov8env -y
-conda activate yolov8env
+conda create --name yolo11env -y python=3.12
+conda activate yolo11env
conda install pip -y
conda install ipykernel -y
-python -m ipykernel install --user --name yolov8env --display-name "yolov8env"
+python -m ipykernel install --user --name yolo11env --display-name "yolo11env"
```
Close your terminal and create a new notebook. From your Notebook, you can select the new kernel.
@@ -105,21 +106,21 @@ Then you can open a Notebook cell and install the required dependencies:
```bash
%%bash
-source activate yolov8env
+source activate yolo11env
cd ultralytics
pip install -r requirements.txt
pip install ultralytics
pip install onnx>=1.12.0
```
-Note that we need to use the `source activate yolov8env` for all the %%bash cells, to make sure that the %%bash cell uses environment we want.
+Note that we need to use the `source activate yolo11env` for all the %%bash cells, to make sure that the %%bash cell uses environment we want.
Run some predictions using the [Ultralytics CLI](../quickstart.md#use-ultralytics-with-cli):
```bash
%%bash
-source activate yolov8env
-yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg'
+source activate yolo11env
+yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg'
```
Or with the [Ultralytics Python interface](../quickstart.md#use-ultralytics-with-python), for example to train the model:
@@ -128,7 +129,7 @@ Or with the [Ultralytics Python interface](../quickstart.md#use-ultralytics-with
from ultralytics import YOLO
# Load a model
-model = YOLO("yolov8n.pt") # load an official YOLOv8n model
+model = YOLO("yolo11n.pt") # load an official YOLO11n model
# Use the model
model.train(data="coco8.yaml", epochs=3) # train the model
@@ -137,47 +138,47 @@ results = model("https://ultralytics.com/images/bus.jpg") # predict on an image
path = model.export(format="onnx") # export the model to ONNX format
```
-You can use either the Ultralytics CLI or Python interface for running YOLOv8 tasks, as described in the terminal section above.
+You can use either the Ultralytics CLI or Python interface for running YOLO11 tasks, as described in the terminal section above.
-By following these steps, you should be able to get YOLOv8 running quickly on AzureML for quick trials. For more advanced uses, you may refer to the full AzureML documentation linked at the beginning of this guide.
+By following these steps, you should be able to get YOLO11 running quickly on AzureML for quick trials. For more advanced uses, you may refer to the full AzureML documentation linked at the beginning of this guide.
## Explore More with AzureML
-This guide serves as an introduction to get you up and running with YOLOv8 on AzureML. However, it only scratches the surface of what AzureML can offer. To delve deeper and unlock the full potential of AzureML for your machine learning projects, consider exploring the following resources:
+This guide serves as an introduction to get you up and running with YOLO11 on AzureML. However, it only scratches the surface of what AzureML can offer. To delve deeper and unlock the full potential of AzureML for your machine learning projects, consider exploring the following resources:
- [Create a Data Asset](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets): Learn how to set up and manage your data assets effectively within the AzureML environment.
- [Initiate an AzureML Job](https://learn.microsoft.com/azure/machine-learning/how-to-train-model): Get a comprehensive understanding of how to kickstart your machine learning training jobs on AzureML.
- [Register a Model](https://learn.microsoft.com/azure/machine-learning/how-to-manage-models): Familiarize yourself with model management practices including registration, versioning, and deployment.
-- [Train YOLOv8 with AzureML Python SDK](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azure-machine-learning-python-sdk-8268696be8ba): Explore a step-by-step guide on using the AzureML Python SDK to train your YOLOv8 models.
-- [Train YOLOv8 with AzureML CLI](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azureml-and-the-az-cli-73d3c870ba8e): Discover how to utilize the command-line interface for streamlined training and management of YOLOv8 models on AzureML.
+- [Train YOLO11 with AzureML Python SDK](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azure-machine-learning-python-sdk-8268696be8ba): Explore a step-by-step guide on using the AzureML Python SDK to train your YOLO11 models.
+- [Train YOLO11 with AzureML CLI](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azureml-and-the-az-cli-73d3c870ba8e): Discover how to utilize the command-line interface for streamlined training and management of YOLO11 models on AzureML.
## FAQ
-### How do I run YOLOv8 on AzureML for model training?
+### How do I run YOLO11 on AzureML for model training?
-Running YOLOv8 on AzureML for model training involves several steps:
+Running YOLO11 on AzureML for model training involves several steps:
1. **Create a Compute Instance**: From your AzureML workspace, navigate to Compute > Compute instances > New, and select the required instance.
-2. **Setup Environment**: Start your compute instance, open a terminal, and create a conda environment:
+2. **Setup Environment**: Start your compute instance, open a terminal, and create a conda environment, and don't forget to set your python version (python 3.13.1 is not supported yet) :
```bash
- conda create --name yolov8env -y
- conda activate yolov8env
+ conda create --name yolo11env -y python=3.12
+ conda activate yolo11env
conda install pip -y
pip install ultralytics onnx>=1.12.0
```
-3. **Run YOLOv8 Tasks**: Use the Ultralytics CLI to train your model:
+3. **Run YOLO11 Tasks**: Use the Ultralytics CLI to train your model:
```bash
- yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01
+ yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01
```
For more details, you can refer to the [instructions to use the Ultralytics CLI](../quickstart.md#use-ultralytics-with-cli).
-### What are the benefits of using AzureML for YOLOv8 training?
+### What are the benefits of using AzureML for YOLO11 training?
-AzureML provides a robust and efficient ecosystem for training YOLOv8 models:
+AzureML provides a robust and efficient ecosystem for training YOLO11 models:
- **Scalability**: Easily scale your compute resources as your data and model complexity grows.
- **MLOps Integration**: Utilize features like versioning, monitoring, and auditing to streamline ML operations.
@@ -185,9 +186,9 @@ AzureML provides a robust and efficient ecosystem for training YOLOv8 models:
These advantages make AzureML an ideal platform for projects ranging from quick prototypes to large-scale deployments. For more tips, check out [AzureML Jobs](https://learn.microsoft.com/azure/machine-learning/how-to-train-model).
-### How do I troubleshoot common issues when running YOLOv8 on AzureML?
+### How do I troubleshoot common issues when running YOLO11 on AzureML?
-Troubleshooting common issues with YOLOv8 on AzureML can involve the following steps:
+Troubleshooting common issues with YOLO11 on AzureML can involve the following steps:
- **Dependency Issues**: Ensure all required packages are installed. Refer to the `requirements.txt` file for dependencies.
- **Environment Setup**: Verify that your conda environment is correctly activated before running commands.
@@ -202,7 +203,7 @@ Yes, AzureML allows you to use both the Ultralytics CLI and the Python interface
- **CLI**: Ideal for quick tasks and running standard scripts directly from the terminal.
```bash
- yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg'
```
- **Python Interface**: Useful for more complex tasks requiring custom coding and integration within notebooks.
@@ -210,18 +211,18 @@ Yes, AzureML allows you to use both the Ultralytics CLI and the Python interface
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
model.train(data="coco8.yaml", epochs=3)
```
Refer to the quickstart guides for more detailed instructions [here](../quickstart.md#use-ultralytics-with-cli) and [here](../quickstart.md#use-ultralytics-with-python).
-### What is the advantage of using Ultralytics YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models?
+### What is the advantage of using Ultralytics YOLO11 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models?
-Ultralytics YOLOv8 offers several unique advantages over competing object detection models:
+Ultralytics YOLO11 offers several unique advantages over competing object detection models:
- **Speed**: Faster inference and training times compared to models like Faster R-CNN and SSD.
- **[Accuracy](https://www.ultralytics.com/glossary/accuracy)**: High accuracy in detection tasks with features like anchor-free design and enhanced augmentation strategies.
- **Ease of Use**: Intuitive API and CLI for quick setup, making it accessible both to beginners and experts.
-To explore more about YOLOv8's features, visit the [Ultralytics YOLO](https://www.ultralytics.com/yolo) page for detailed insights.
+To explore more about YOLO11's features, visit the [Ultralytics YOLO](https://www.ultralytics.com/yolo) page for detailed insights.
diff --git a/docs/en/guides/conda-quickstart.md b/docs/en/guides/conda-quickstart.md
index 6b52339260e..b958c85f273 100644
--- a/docs/en/guides/conda-quickstart.md
+++ b/docs/en/guides/conda-quickstart.md
@@ -37,7 +37,7 @@ This guide provides a comprehensive introduction to setting up a Conda environme
First, let's create a new Conda environment. Open your terminal and run the following command:
```bash
-conda create --name ultralytics-env python=3.8 -y
+conda create --name ultralytics-env python=3.11 -y
```
Activate the new environment:
@@ -73,7 +73,7 @@ With Ultralytics installed, you can now start using its robust features for [obj
```python
from ultralytics import YOLO
-model = YOLO("yolov8n.pt") # initialize model
+model = YOLO("yolo11n.pt") # initialize model
results = model("path/to/image.jpg") # perform inference
results[0].show() # display results for the first image
```
@@ -135,7 +135,7 @@ Congratulations! You have successfully set up a Conda environment, installed the
Setting up a Conda environment for Ultralytics projects is straightforward and ensures smooth package management. First, create a new Conda environment using the following command:
```bash
-conda create --name ultralytics-env python=3.8 -y
+conda create --name ultralytics-env python=3.11 -y
```
Then, activate the new environment with:
diff --git a/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md b/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md
index db61c08196c..716310598b7 100644
--- a/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md
+++ b/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md
@@ -1,10 +1,10 @@
---
comments: true
-description: Learn how to boost your Raspberry Pi's ML performance using Coral Edge TPU with Ultralytics YOLOv8. Follow our detailed setup and installation guide.
-keywords: Coral Edge TPU, Raspberry Pi, YOLOv8, Ultralytics, TensorFlow Lite, ML inference, machine learning, AI, installation guide, setup tutorial
+description: Learn how to boost your Raspberry Pi's ML performance using Coral Edge TPU with Ultralytics YOLO11. Follow our detailed setup and installation guide.
+keywords: Coral Edge TPU, Raspberry Pi, YOLO11, Ultralytics, TensorFlow Lite, ML inference, machine learning, AI, installation guide, setup tutorial
---
-# Coral Edge TPU on a Raspberry Pi with Ultralytics YOLOv8 ๐
+# Coral Edge TPU on a Raspberry Pi with Ultralytics YOLO11 ๐
@@ -27,11 +27,11 @@ The Coral Edge TPU is a compact device that adds an Edge TPU coprocessor to your
## Boost Raspberry Pi Model Performance with Coral Edge TPU
-Many people want to run their models on an embedded or mobile device such as a Raspberry Pi, since they are very power efficient and can be used in many different applications. However, the inference performance on these devices is usually poor even when using formats like [onnx](../integrations/onnx.md) or [openvino](../integrations/openvino.md). The Coral Edge TPU is a great solution to this problem, since it can be used with a Raspberry Pi and accelerate inference performance greatly.
+Many people want to run their models on an embedded or mobile device such as a Raspberry Pi, since they are very power efficient and can be used in many different applications. However, the inference performance on these devices is usually poor even when using formats like [ONNX](../integrations/onnx.md) or [OpenVINO](../integrations/openvino.md). The Coral Edge TPU is a great solution to this problem, since it can be used with a Raspberry Pi and accelerate inference performance greatly.
## Edge TPU on Raspberry Pi with TensorFlow Lite (New)โญ
-The [existing guide](https://coral.ai/docs/accelerator/get-started/) by Coral on how to use the Edge TPU with a Raspberry Pi is outdated, and the current Coral Edge TPU runtime builds do not work with the current TensorFlow Lite runtime versions anymore. In addition to that, Google seems to have completely abandoned the Coral project, and there have not been any updates between 2021 and 2024. This guide will show you how to get the Edge TPU working with the latest versions of the TensorFlow Lite runtime and an updated Coral Edge TPU runtime on a Raspberry Pi single board computer (SBC).
+The [existing guide](https://coral.ai/docs/accelerator/get-started/) by Coral on how to use the Edge TPU with a Raspberry Pi is outdated, and the current Coral Edge TPU runtime builds do not work with the current TensorFlow Lite runtime versions anymore. In addition to that, Google seems to have completely abandoned the Coral project, and there have not been any updates between 2021 and 2025. This guide will show you how to get the Edge TPU working with the latest versions of the TensorFlow Lite runtime and an updated Coral Edge TPU runtime on a Raspberry Pi single board computer (SBC).
## Prerequisites
@@ -47,6 +47,7 @@ This guide assumes that you already have a working Raspberry Pi OS install and h
### Installing the Edge TPU runtime
First, we need to install the Edge TPU runtime. There are many different versions available, so you need to choose the right version for your operating system.
+The high frequency version runs the Edge TPU at a higher clock speed, which improves performance. However, it might result in the Edge TPU thermal throttling, so it is recommended to have some sort of cooling mechanism in place.
| Raspberry Pi OS | High frequency mode | Version to download |
| --------------- | :-----------------: | ------------------------------------------ |
@@ -81,11 +82,11 @@ After installing the runtime, you need to plug in your Coral Edge TPU into a USB
sudo apt remove libedgetpu1-max
```
-## Export your model to a Edge TPU compatible model
+## Export to Edge TPU
To use the Edge TPU, you need to convert your model into a compatible format. It is recommended that you run export on Google Colab, x86_64 Linux machine, using the official [Ultralytics Docker container](docker-quickstart.md), or using [Ultralytics HUB](../hub/quickstart.md), since the Edge TPU compiler is not available on ARM. See the [Export Mode](../modes/export.md) for the available arguments.
-!!! note "Exporting the model"
+!!! example "Exporting the model"
=== "Python"
@@ -105,13 +106,27 @@ To use the Edge TPU, you need to convert your model into a compatible format. It
yolo export model=path/to/model.pt format=edgetpu # Export an official model or custom model
```
-The exported model will be saved in the `_saved_model/` folder with the name `_full_integer_quant_edgetpu.tflite`.
+The exported model will be saved in the `_saved_model/` folder with the name `_full_integer_quant_edgetpu.tflite`. It is important that your model ends with the suffix `_edgetpu.tflite`, otherwise ultralytics doesn't know that you're using an Edge TPU model.
## Running the model
-After exporting your model, you can run inference with it using the following code:
+Before you can actually run the model, you will need to install the correct libraries.
-!!! note "Running the model"
+If `tensorflow` is installed, uninstall tensorflow with the following command:
+
+```bash
+pip uninstall tensorflow tensorflow-aarch64
+```
+
+Then install/update `tflite-runtime`:
+
+```bash
+pip install -U tflite-runtime
+```
+
+Now you can run inference using the following code:
+
+!!! example "Running the model"
=== "Python"
@@ -119,7 +134,7 @@ After exporting your model, you can run inference with it using the following co
from ultralytics import YOLO
# Load a model
- model = YOLO("path/to/edgetpu_model.tflite") # Load an official model or custom model
+ model = YOLO("path/to/_full_integer_quant_edgetpu.tflite") # Load an official model or custom model
# Run Prediction
model.predict("path/to/source.png")
@@ -128,33 +143,69 @@ After exporting your model, you can run inference with it using the following co
=== "CLI"
```bash
- yolo predict model=path/to/edgetpu_model.tflite source=path/to/source.png # Load an official model or custom model
+ yolo predict model=path/to/_full_integer_quant_edgetpu.tflite source=path/to/source.png # Load an official model or custom model
```
Find comprehensive information on the [Predict](../modes/predict.md) page for full prediction mode details.
-???+ warning "Important"
+!!! note "Inference with multiple Edge TPUs"
- You should run the model using `tflite-runtime` and not `tensorflow`.
- If `tensorflow` is installed, uninstall tensorflow with the following command:
+ If you have multiple Edge TPUs you can use the following code to select a specific TPU.
- ```bash
- pip uninstall tensorflow tensorflow-aarch64
- ```
+ === "Python"
- Then install/update `tflite-runtime`:
+ ```python
+ from ultralytics import YOLO
- ```
- pip install -U tflite-runtime
- ```
+ # Load a model
+ model = YOLO("path/to/_full_integer_quant_edgetpu.tflite") # Load an official model or custom model
+
+ # Run Prediction
+ model.predict("path/to/source.png") # Inference defaults to the first TPU
+
+ model.predict("path/to/source.png", device="tpu:0") # Select the first TPU
+
+ model.predict("path/to/source.png", device="tpu:1") # Select the second TPU
+ ```
+
+## Benchmarks
+
+!!! tip "Benchmarks"
+
+ Tested with Raspberry Pi Os Bookworm 64-Bit and a USB Coral Edge TPU.
+
+ !!! note
+ Shown is the inference time, pre-/postprocessing is not included.
+
+ === "Raspberry Pi 4B 2GB"
+
+ | Image Size | Model | Standard Inference Time (ms) | High Frequency Inference Time (ms) |
+ |------------|---------|------------------------------|------------------------------------|
+ | 320 | YOLOv8n | 32.2 | 26.7 |
+ | 320 | YOLOv8s | 47.1 | 39.8 |
+ | 512 | YOLOv8n | 73.5 | 60.7 |
+ | 512 | YOLOv8s | 149.6 | 125.3 |
+
+ === "Raspberry Pi 5 8GB"
+
+ | Image Size | Model | Standard Inference Time (ms) | High Frequency Inference Time (ms) |
+ |------------|---------|------------------------------|------------------------------------|
+ | 320 | YOLOv8n | 22.2 | 16.7 |
+ | 320 | YOLOv8s | 40.1 | 32.2 |
+ | 512 | YOLOv8n | 53.5 | 41.6 |
+ | 512 | YOLOv8s | 132.0 | 103.3 |
+
+ On average:
- If you want a `tflite-runtime` wheel for `tensorflow` 2.15.0 download it from [here](https://github.com/feranick/TFlite-builds/releases) and install it using `pip` or your package manager of choice.
+ - The Raspberry Pi 5 is 22% faster with the standard mode than the Raspberry Pi 4B.
+ - The Raspberry Pi 5 is 30.2% faster with the high frequency mode than the Raspberry Pi 4B.
+ - The high frequency mode is 28.4% faster than the standard mode.
## FAQ
-### What is a Coral Edge TPU and how does it enhance Raspberry Pi's performance with Ultralytics YOLOv8?
+### What is a Coral Edge TPU and how does it enhance Raspberry Pi's performance with Ultralytics YOLO11?
-The Coral Edge TPU is a compact device designed to add an Edge TPU coprocessor to your system. This coprocessor enables low-power, high-performance [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) inference, particularly optimized for TensorFlow Lite models. When using a Raspberry Pi, the Edge TPU accelerates ML model inference, significantly boosting performance, especially for Ultralytics YOLOv8 models. You can read more about the Coral Edge TPU on their [home page](https://coral.ai/products/accelerator).
+The Coral Edge TPU is a compact device designed to add an Edge TPU coprocessor to your system. This coprocessor enables low-power, high-performance [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) inference, particularly optimized for TensorFlow Lite models. When using a Raspberry Pi, the Edge TPU accelerates ML model inference, significantly boosting performance, especially for Ultralytics YOLO11 models. You can read more about the Coral Edge TPU on their [home page](https://coral.ai/products/accelerator).
### How do I install the Coral Edge TPU runtime on a Raspberry Pi?
@@ -166,9 +217,9 @@ sudo dpkg -i path/to/package.deb
Make sure to uninstall any previous Coral Edge TPU runtime versions by following the steps outlined in the [Installation Walkthrough](#installation-walkthrough) section.
-### Can I export my Ultralytics YOLOv8 model to be compatible with Coral Edge TPU?
+### Can I export my Ultralytics YOLO11 model to be compatible with Coral Edge TPU?
-Yes, you can export your Ultralytics YOLOv8 model to be compatible with the Coral Edge TPU. It is recommended to perform the export on Google Colab, an x86_64 Linux machine, or using the [Ultralytics Docker container](docker-quickstart.md). You can also use Ultralytics HUB for exporting. Here is how you can export your model using Python and CLI:
+Yes, you can export your Ultralytics YOLO11 model to be compatible with the Coral Edge TPU. It is recommended to perform the export on Google Colab, an x86_64 Linux machine, or using the [Ultralytics Docker container](docker-quickstart.md). You can also use Ultralytics HUB for exporting. Here is how you can export your model using Python and CLI:
!!! note "Exporting the model"
@@ -192,7 +243,7 @@ Yes, you can export your Ultralytics YOLOv8 model to be compatible with the Cora
For more information, refer to the [Export Mode](../modes/export.md) documentation.
-### What should I do if TensorFlow is already installed on my Raspberry Pi but I want to use tflite-runtime instead?
+### What should I do if TensorFlow is already installed on my Raspberry Pi, but I want to use tflite-runtime instead?
If you have TensorFlow installed on your Raspberry Pi and need to switch to `tflite-runtime`, you'll need to uninstall TensorFlow first using:
@@ -208,9 +259,9 @@ pip install -U tflite-runtime
For a specific wheel, such as TensorFlow 2.15.0 `tflite-runtime`, you can download it from [this link](https://github.com/feranick/TFlite-builds/releases) and install it using `pip`. Detailed instructions are available in the section on running the model [Running the Model](#running-the-model).
-### How do I run inference with an exported YOLOv8 model on a Raspberry Pi using the Coral Edge TPU?
+### How do I run inference with an exported YOLO11 model on a Raspberry Pi using the Coral Edge TPU?
-After exporting your YOLOv8 model to an Edge TPU-compatible format, you can run inference using the following code snippets:
+After exporting your YOLO11 model to an Edge TPU-compatible format, you can run inference using the following code snippets:
!!! note "Running the model"
diff --git a/docs/en/guides/data-collection-and-annotation.md b/docs/en/guides/data-collection-and-annotation.md
index dce15e0a682..058323ee29e 100644
--- a/docs/en/guides/data-collection-and-annotation.md
+++ b/docs/en/guides/data-collection-and-annotation.md
@@ -136,12 +136,12 @@ Bouncing your ideas and queries off other [computer vision](https://www.ultralyt
### Where to Find Help and Support
-- **GitHub Issues:** Visit the YOLOv8 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face.
+- **GitHub Issues:** Visit the YOLO11 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to connect with other users and developers, get support, share knowledge, and brainstorm ideas.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Refer to the [official YOLOv8 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects.
+- **Ultralytics YOLO11 Documentation:** Refer to the [official YOLO11 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects.
## Conclusion
@@ -159,7 +159,7 @@ Ensuring high consistency and accuracy in data annotation involves establishing
### How many images do I need for training Ultralytics YOLO models?
-For effective [transfer learning](https://www.ultralytics.com/glossary/transfer-learning) and object detection with Ultralytics YOLO models, start with a minimum of a few hundred annotated objects per class. If training for just one class, begin with at least 100 annotated images and train for approximately 100 [epochs](https://www.ultralytics.com/glossary/epoch). More complex tasks might require thousands of images per class to achieve high reliability and performance. Quality annotations are crucial, so ensure your data collection and annotation processes are rigorous and aligned with your project's specific goals. Explore detailed training strategies in the [YOLOv8 training guide](../modes/train.md).
+For effective [transfer learning](https://www.ultralytics.com/glossary/transfer-learning) and object detection with Ultralytics YOLO models, start with a minimum of a few hundred annotated objects per class. If training for just one class, begin with at least 100 annotated images and train for approximately 100 [epochs](https://www.ultralytics.com/glossary/epoch). More complex tasks might require thousands of images per class to achieve high reliability and performance. Quality annotations are crucial, so ensure your data collection and annotation processes are rigorous and aligned with your project's specific goals. Explore detailed training strategies in the [YOLO11 training guide](../modes/train.md).
### What are some popular tools for data annotation?
diff --git a/docs/en/guides/deepstream-nvidia-jetson.md b/docs/en/guides/deepstream-nvidia-jetson.md
index ab15009b993..1170eddc93a 100644
--- a/docs/en/guides/deepstream-nvidia-jetson.md
+++ b/docs/en/guides/deepstream-nvidia-jetson.md
@@ -1,10 +1,10 @@
---
comments: true
-description: Learn how to deploy Ultralytics YOLOv8 on NVIDIA Jetson devices using TensorRT and DeepStream SDK. Explore performance benchmarks and maximize AI capabilities.
-keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, embedded systems, deep learning, TensorRT, DeepStream SDK, computer vision
+description: Learn how to deploy Ultralytics YOLO11 on NVIDIA Jetson devices using TensorRT and DeepStream SDK. Explore performance benchmarks and maximize AI capabilities.
+keywords: Ultralytics, YOLO11, NVIDIA Jetson, JetPack, AI deployment, embedded systems, deep learning, TensorRT, DeepStream SDK, computer vision
---
-# Ultralytics YOLOv8 on NVIDIA Jetson using DeepStream SDK and TensorRT
+# Ultralytics YOLO11 on NVIDIA Jetson using DeepStream SDK and TensorRT
@@ -14,16 +14,17 @@ keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, embedded s
allowfullscreen>
- Watch: How to Run Multiple Streams with DeepStream SDK on Jetson Nano using Ultralytics YOLOv8
+ Watch: How to Run Multiple Streams with DeepStream SDK on Jetson Nano using Ultralytics YOLO11
-This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices using DeepStream SDK and TensorRT. Here we use TensorRT to maximize the inference performance on the Jetson platform.
+This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices using DeepStream SDK and TensorRT. Here we use TensorRT to maximize the inference performance on the Jetson platform.
!!! note
- This guide has been tested with both [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.4](https://developer.nvidia.com/jetpack-sdk-464). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy.
+ This guide has been tested with [NVIDIA Jetson Orin Nano Super Developer Kit](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit) running the latest stable JetPack release of [JP6.1](https://developer.nvidia.com/embedded/jetpack-sdk-61),
+ [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.4](https://developer.nvidia.com/jetpack-sdk-464). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy.
## What is NVIDIA DeepStream?
@@ -33,48 +34,63 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti
Before you start to follow this guide:
-- Visit our documentation, [Quick Start Guide: NVIDIA Jetson with Ultralytics YOLOv8](nvidia-jetson.md) to set up your NVIDIA Jetson device with Ultralytics YOLOv8
+- Visit our documentation, [Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO11](nvidia-jetson.md) to set up your NVIDIA Jetson device with Ultralytics YOLO11
- Install [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) according to the JetPack version
- For JetPack 4.6.4, install [DeepStream 6.0.1](https://docs.nvidia.com/metropolis/deepstream/6.0.1/dev-guide/text/DS_Quickstart.html)
- For JetPack 5.1.3, install [DeepStream 6.3](https://docs.nvidia.com/metropolis/deepstream/6.3/dev-guide/text/DS_Quickstart.html)
+ - For JetPack 6.1, install [DeepStream 7.1](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_Installation.html)
!!! tip
In this guide we have used the Debian package method of installing DeepStream SDK to the Jetson device. You can also visit the [DeepStream SDK on Jetson (Archived)](https://developer.nvidia.com/embedded/deepstream-on-jetson-downloads-archived) to access legacy versions of DeepStream.
-## DeepStream Configuration for YOLOv8
+## DeepStream Configuration for YOLO11
Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) GitHub repository which includes NVIDIA DeepStream SDK support for YOLO models. We appreciate the efforts of marcoslucianops for his contributions!
-1. Install dependencies
+1. Install Ultralytics with necessary dependencies
```bash
- pip install cmake
- pip install onnxsim
+ cd ~
+ pip install -U pip
+ git clone https://github.com/ultralytics/ultralytics
+ cd ultralytics
+ pip install -e ".[export]" onnxslim
```
-2. Clone the following repository
+2. Clone the DeepStream-Yolo repository
```bash
+ cd ~
git clone https://github.com/marcoslucianops/DeepStream-Yolo
- cd DeepStream-Yolo
```
-3. Download Ultralytics YOLOv8 detection model (.pt) of your choice from [YOLOv8 releases](https://github.com/ultralytics/assets/releases). Here we use [yolov8s.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt).
+3. Copy the `export_yoloV8.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder
```bash
- wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt
+ cp ~/DeepStream-Yolo/utils/export_yoloV8.py ~/ultralytics
+ cd ultralytics
```
!!! note
- You can also use a [custom trained YOLOv8 model](https://docs.ultralytics.com/modes/train/).
+ `export_yoloV8.py` works for both YOLOv8 and YOLO11 models.
-4. Convert model to ONNX
+4. Download Ultralytics YOLO11 detection model (.pt) of your choice from [YOLO11 releases](https://github.com/ultralytics/assets/releases). Here we use [yolo11s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt).
```bash
- python3 utils/export_yoloV8.py -w yolov8s.pt
+ wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt
+ ```
+
+ !!! note
+
+ You can also use a [custom trained YOLO11 model](https://docs.ultralytics.com/modes/train/).
+
+5. Convert model to ONNX
+
+ ```bash
+ python3 export_yoloV8.py -w yolo11s.pt
```
!!! note "Pass the below arguments to the above command"
@@ -120,7 +136,14 @@ Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcosluc
--batch 4
```
-5. Set the CUDA version according to the JetPack version installed
+6. Copy the generated `.onnx` model file and `labels.txt` file to the `DeepStream-Yolo` folder
+
+ ```bash
+ cp yolo11s.pt.onnx labels.txt ~/DeepStream-Yolo
+ cd ~/DeepStream-Yolo
+ ```
+
+7. Set the CUDA version according to the JetPack version installed
For JetPack 4.6.4:
@@ -134,24 +157,30 @@ Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcosluc
export CUDA_VER=11.4
```
-6. Compile the library
+ For Jetpack 6.1:
+
+ ```bash
+ export CUDA_VER=12.6
+ ```
+
+8. Compile the library
```bash
make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo
```
-7. Edit the `config_infer_primary_yoloV8.txt` file according to your model (for YOLOv8s with 80 classes)
+9. Edit the `config_infer_primary_yoloV8.txt` file according to your model (for YOLO11s with 80 classes)
```bash
[property]
...
- onnx-file=yolov8s.onnx
+ onnx-file=yolo11s.pt.onnx
...
num-detected-classes=80
...
```
-8. Edit the `deepstream_app_config` file
+10. Edit the `deepstream_app_config` file
```bash
...
@@ -160,7 +189,7 @@ Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcosluc
config-file=config_infer_primary_yoloV8.txt
```
-9. You can also change the video source in `deepstream_app_config` file. Here a default video file is loaded
+11. You can also change the video source in `deepstream_app_config` file. Here a default video file is loaded
```bash
...
@@ -179,16 +208,20 @@ deepstream-app -c deepstream_app_config.txt
It will take a long time to generate the TensorRT engine file before starting the inference. So please be patient.
-
+
!!! tip
- If you want to convert the model to FP16 [precision](https://www.ultralytics.com/glossary/precision), simply set `model-engine-file=model_b1_gpu0_fp16.engine` and `network-mode=2` inside `config_infer_primary_yoloV8.txt`
+ If you want to convert the model to FP16 precision, simply set `model-engine-file=model_b1_gpu0_fp16.engine` and `network-mode=2` inside `config_infer_primary_yoloV8.txt`
## INT8 Calibration
If you want to use INT8 precision for inference, you need to follow the steps below
+!!! note
+
+ Currently INT8 does not work with TensorRT 10.x. This section of the guide has been tested with TensorRT 8.x which is expected to work.
+
1. Set `OPENCV` environment variable
```bash
@@ -303,50 +336,92 @@ deepstream-app -c deepstream_app_config.txt
## Benchmark Results
-The following table summarizes how YOLOv8s models perform at different TensorRT precision levels with an input size of 640x640 on NVIDIA Jetson Orin NX 16GB.
+The following benchmarks summarizes how YOLO11 models perform at different TensorRT precision levels with an input size of 640x640 on NVIDIA Jetson Orin NX 16GB.
+
+### Comparison Chart
+
+
+
+### Detailed Comparison Table
+
+!!! performance
+
+ === "YOLO11n"
+
+ | Format | Status | Inference time (ms/im) |
+ |-----------------|--------|------------------------|
+ | TensorRT (FP32) | โ | 8.64 |
+ | TensorRT (FP16) | โ | 5.27 |
+ | TensorRT (INT8) | โ | 4.54 |
+
+ === "YOLO11s"
+
+ | Format | Status | Inference time (ms/im) |
+ |-----------------|--------|------------------------|
+ | TensorRT (FP32) | โ | 14.53 |
+ | TensorRT (FP16) | โ | 7.91 |
+ | TensorRT (INT8) | โ | 6.05 |
+
+ === "YOLO11m"
+
+ | Format | Status | Inference time (ms/im) |
+ |-----------------|--------|------------------------|
+ | TensorRT (FP32) | โ | 32.05 |
+ | TensorRT (FP16) | โ | 15.55 |
+ | TensorRT (INT8) | โ | 10.43 |
+
+ === "YOLO11l"
+
+ | Format | Status | Inference time (ms/im) |
+ |-----------------|--------|------------------------|
+ | TensorRT (FP32) | โ | 39.68 |
+ | TensorRT (FP16) | โ | 19.88 |
+ | TensorRT (INT8) | โ | 13.64 |
+
+ === "YOLO11x"
-| Model Name | Precision | Inference Time (ms/im) | FPS |
-| ---------- | --------- | ---------------------- | --- |
-| YOLOv8s | FP32 | 15.63 | 64 |
-| | FP16 | 7.94 | 126 |
-| | INT8 | 5.53 | 181 |
+ | Format | Status | Inference time (ms/im) |
+ |-----------------|--------|------------------------|
+ | TensorRT (FP32) | โ | 80.65 |
+ | TensorRT (FP16) | โ | 39.06 |
+ | TensorRT (INT8) | โ | 22.83 |
-### Acknowledgements
+## Acknowledgements
This guide was initially created by our friends at Seeed Studio, Lakshantha and Elaine.
## FAQ
-### How do I set up Ultralytics YOLOv8 on an NVIDIA Jetson device?
+### How do I set up Ultralytics YOLO11 on an NVIDIA Jetson device?
-To set up Ultralytics YOLOv8 on an [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) device, you first need to install the [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) compatible with your JetPack version. Follow the step-by-step guide in our [Quick Start Guide](nvidia-jetson.md) to configure your NVIDIA Jetson for YOLOv8 deployment.
+To set up Ultralytics YOLO11 on an [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) device, you first need to install the [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) compatible with your JetPack version. Follow the step-by-step guide in our [Quick Start Guide](nvidia-jetson.md) to configure your NVIDIA Jetson for YOLO11 deployment.
-### What is the benefit of using TensorRT with YOLOv8 on NVIDIA Jetson?
+### What is the benefit of using TensorRT with YOLO11 on NVIDIA Jetson?
-Using TensorRT with YOLOv8 optimizes the model for inference, significantly reducing latency and improving throughput on NVIDIA Jetson devices. TensorRT provides high-performance, low-latency [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference through layer fusion, precision calibration, and kernel auto-tuning. This leads to faster and more efficient execution, particularly useful for real-time applications like video analytics and autonomous machines.
+Using TensorRT with YOLO11 optimizes the model for inference, significantly reducing latency and improving throughput on NVIDIA Jetson devices. TensorRT provides high-performance, low-latency [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference through layer fusion, precision calibration, and kernel auto-tuning. This leads to faster and more efficient execution, particularly useful for real-time applications like video analytics and autonomous machines.
-### Can I run Ultralytics YOLOv8 with DeepStream SDK across different NVIDIA Jetson hardware?
+### Can I run Ultralytics YOLO11 with DeepStream SDK across different NVIDIA Jetson hardware?
-Yes, the guide for deploying Ultralytics YOLOv8 with the DeepStream SDK and TensorRT is compatible across the entire NVIDIA Jetson lineup. This includes devices like the Jetson Orin NX 16GB with [JetPack 5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and the Jetson Nano 4GB with [JetPack 4.6.4](https://developer.nvidia.com/jetpack-sdk-464). Refer to the section [DeepStream Configuration for YOLOv8](#deepstream-configuration-for-yolov8) for detailed steps.
+Yes, the guide for deploying Ultralytics YOLO11 with the DeepStream SDK and TensorRT is compatible across the entire NVIDIA Jetson lineup. This includes devices like the Jetson Orin NX 16GB with [JetPack 5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and the Jetson Nano 4GB with [JetPack 4.6.4](https://developer.nvidia.com/jetpack-sdk-464). Refer to the section [DeepStream Configuration for YOLO11](#deepstream-configuration-for-yolo11) for detailed steps.
-### How can I convert a YOLOv8 model to ONNX for DeepStream?
+### How can I convert a YOLO11 model to ONNX for DeepStream?
-To convert a YOLOv8 model to ONNX format for deployment with DeepStream, use the `utils/export_yoloV8.py` script from the [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) repository.
+To convert a YOLO11 model to ONNX format for deployment with DeepStream, use the `utils/export_yoloV8.py` script from the [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) repository.
Here's an example command:
```bash
-python3 utils/export_yoloV8.py -w yolov8s.pt --opset 12 --simplify
+python3 utils/export_yoloV8.py -w yolo11s.pt --opset 12 --simplify
```
For more details on model conversion, check out our [model export section](../modes/export.md).
-### What are the performance benchmarks for YOLOv8 on NVIDIA Jetson Orin NX?
+### What are the performance benchmarks for YOLO on NVIDIA Jetson Orin NX?
-The performance of YOLOv8 models on NVIDIA Jetson Orin NX 16GB varies based on TensorRT precision levels. For example, YOLOv8s models achieve:
+The performance of YOLO11 models on NVIDIA Jetson Orin NX 16GB varies based on TensorRT precision levels. For example, YOLO11s models achieve:
-- **FP32 Precision**: 15.63 ms/im, 64 FPS
+- **FP32 Precision**: 14.6 ms/im, 68.5 FPS
- **FP16 Precision**: 7.94 ms/im, 126 FPS
-- **INT8 Precision**: 5.53 ms/im, 181 FPS
+- **INT8 Precision**: 5.95 ms/im, 168 FPS
-These benchmarks underscore the efficiency and capability of using TensorRT-optimized YOLOv8 models on NVIDIA Jetson hardware. For further details, see our [Benchmark Results](#benchmark-results) section.
+These benchmarks underscore the efficiency and capability of using TensorRT-optimized YOLO11 models on NVIDIA Jetson hardware. For further details, see our [Benchmark Results](#benchmark-results) section.
diff --git a/docs/en/guides/defining-project-goals.md b/docs/en/guides/defining-project-goals.md
index c5e3c58cf32..2a5dc1b124e 100644
--- a/docs/en/guides/defining-project-goals.md
+++ b/docs/en/guides/defining-project-goals.md
@@ -1,7 +1,7 @@
---
comments: true
description: Learn how to define clear goals and objectives for your computer vision project with our practical guide. Includes tips on problem statements, measurable objectives, and key decisions.
-keywords: computer vision, project planning, problem statement, measurable objectives, dataset preparation, model selection, YOLOv8, Ultralytics
+keywords: computer vision, project planning, problem statement, measurable objectives, dataset preparation, model selection, YOLO11, Ultralytics
---
# A Practical Guide for Defining Your [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Project
@@ -30,7 +30,7 @@ Let's walk through an example.
Consider a computer vision project where you want to [estimate the speed of vehicles](./speed-estimation.md) on a highway. The core issue is that current speed monitoring methods are inefficient and error-prone due to outdated radar systems and manual processes. The project aims to develop a real-time computer vision system that can replace legacy [speed estimation](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) systems.
-
+
Primary users include traffic management authorities and law enforcement, while secondary stakeholders are highway planners and the public benefiting from safer roads. Key requirements involve evaluating budget, time, and personnel, as well as addressing technical needs like high-resolution cameras and real-time data processing. Additionally, regulatory constraints on privacy and [data security](https://www.ultralytics.com/glossary/data-security) must be considered.
@@ -85,7 +85,7 @@ The most popular computer vision tasks include [image classification](https://ww
-For a detailed explanation of various tasks, please take a look at the Ultralytics Docs page on [YOLOv8 Tasks](../tasks/index.md).
+For a detailed explanation of various tasks, please take a look at the Ultralytics Docs page on [YOLO11 Tasks](../tasks/index.md).
### Can a Pre-trained Model Remember Classes It Knew Before Custom Training?
@@ -114,12 +114,12 @@ Connecting with other computer vision enthusiasts can be incredibly helpful for
### Community Support Channels
-- **GitHub Issues:** Head over to the YOLOv8 GitHub repository. You can use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers can assist with specific problems you encounter.
+- **GitHub Issues:** Head over to the YOLO11 GitHub repository. You can use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers can assist with specific problems you encounter.
- **Ultralytics Discord Server:** Become part of the [Ultralytics Discord server](https://discord.com/invite/ultralytics). Connect with fellow users and developers, seek support, exchange knowledge, and discuss ideas.
### Comprehensive Guides and Documentation
-- **Ultralytics YOLOv8 Documentation:** Explore the [official YOLOv8 documentation](./index.md) for in-depth guides and valuable tips on various computer vision tasks and projects.
+- **Ultralytics YOLO11 Documentation:** Explore the [official YOLO11 documentation](./index.md) for in-depth guides and valuable tips on various computer vision tasks and projects.
## Conclusion
@@ -138,11 +138,11 @@ To define a clear problem statement for your Ultralytics computer vision project
Providing a well-defined problem statement ensures that the project remains focused and aligned with your objectives. For a detailed guide, refer to our [practical guide](#defining-a-clear-problem-statement).
-### Why should I use Ultralytics YOLOv8 for speed estimation in my computer vision project?
+### Why should I use Ultralytics YOLO11 for speed estimation in my computer vision project?
-Ultralytics YOLOv8 is ideal for speed estimation because of its real-time object tracking capabilities, high accuracy, and robust performance in detecting and monitoring vehicle speeds. It overcomes inefficiencies and inaccuracies of traditional radar systems by leveraging cutting-edge computer vision technology. Check out our blog on [speed estimation using YOLOv8](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) for more insights and practical examples.
+Ultralytics YOLO11 is ideal for speed estimation because of its real-time object tracking capabilities, high accuracy, and robust performance in detecting and monitoring vehicle speeds. It overcomes inefficiencies and inaccuracies of traditional radar systems by leveraging cutting-edge computer vision technology. Check out our blog on [speed estimation using YOLO11](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) for more insights and practical examples.
-### How do I set effective measurable objectives for my computer vision project with Ultralytics YOLOv8?
+### How do I set effective measurable objectives for my computer vision project with Ultralytics YOLO11?
Set effective and measurable objectives using the SMART criteria:
diff --git a/docs/en/guides/distance-calculation.md b/docs/en/guides/distance-calculation.md
index 443b208b706..c9775124d4d 100644
--- a/docs/en/guides/distance-calculation.md
+++ b/docs/en/guides/distance-calculation.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to calculate distances between objects using Ultralytics YOLOv8 for accurate spatial positioning and scene understanding.
-keywords: Ultralytics, YOLOv8, distance calculation, computer vision, object tracking, spatial positioning
+description: Learn how to calculate distances between objects using Ultralytics YOLO11 for accurate spatial positioning and scene understanding.
+keywords: Ultralytics, YOLO11, distance calculation, computer vision, object tracking, spatial positioning
---
-# Distance Calculation using Ultralytics YOLOv8
+# Distance Calculation using Ultralytics YOLO11
## What is Distance Calculation?
-Measuring the gap between two objects is known as distance calculation within a specified space. In the case of [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics), the [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroid is employed to calculate the distance for bounding boxes highlighted by the user.
+Measuring the gap between two objects is known as distance calculation within a specified space. In the case of [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics), the [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroid is employed to calculate the distance for bounding boxes highlighted by the user.
@@ -18,14 +18,14 @@ Measuring the gap between two objects is known as distance calculation within a
allowfullscreen>
- Watch: Distance Calculation using Ultralytics YOLOv8
+ Watch: Distance Calculation using Ultralytics YOLO11
## Visuals
-| Distance Calculation using Ultralytics YOLOv8 |
+| Distance Calculation using Ultralytics YOLO11 |
| :---------------------------------------------------------------------------------------------------------------------------: |
-|  |
+|  |
## Advantages of Distance Calculation?
@@ -36,19 +36,16 @@ Measuring the gap between two objects is known as distance calculation within a
- Click on any two bounding boxes with Left Mouse click for distance calculation
-!!! example "Distance Calculation using YOLOv8 Example"
+!!! example "Distance Calculation using YOLO11 Example"
=== "Video Stream"
```python
import cv2
- from ultralytics import YOLO, solutions
+ from ultralytics import solutions
- model = YOLO("yolov8n.pt")
- names = model.model.names
-
- cap = cv2.VideoCapture("path/to/video/file.mp4")
+ cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -56,16 +53,15 @@ Measuring the gap between two objects is known as distance calculation within a
video_writer = cv2.VideoWriter("distance_calculation.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init distance-calculation obj
- dist_obj = solutions.DistanceCalculation(names=names, view_img=True)
+ distance = solutions.DistanceCalculation(model="yolo11n.pt", show=True)
+ # Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
-
- tracks = model.track(im0, persist=True, show=False)
- im0 = dist_obj.start_process(im0, tracks)
+ im0 = distance.calculate(im0)
video_writer.write(im0)
cap.release()
@@ -84,13 +80,11 @@ Measuring the gap between two objects is known as distance calculation within a
### Arguments `DistanceCalculation()`
-| `Name` | `Type` | `Default` | Description |
-| ---------------- | ------- | --------------- | --------------------------------------------------------- |
-| `names` | `dict` | `None` | Dictionary of classes names. |
-| `view_img` | `bool` | `False` | Flag to indicate if the video stream should be displayed. |
-| `line_thickness` | `int` | `2` | Thickness of the lines drawn on the image. |
-| `line_color` | `tuple` | `(255, 255, 0)` | Color of the lines drawn on the image (BGR format). |
-| `centroid_color` | `tuple` | `(255, 0, 255)` | Color of the centroids drawn (BGR format). |
+| `Name` | `Type` | `Default` | Description |
+| ------------ | ------ | --------- | ---------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
### Arguments `model.track`
@@ -98,34 +92,32 @@ Measuring the gap between two objects is known as distance calculation within a
## FAQ
-### How do I calculate distances between objects using Ultralytics YOLOv8?
+### How do I calculate distances between objects using Ultralytics YOLO11?
-To calculate distances between objects using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics), you need to identify the bounding box centroids of the detected objects. This process involves initializing the `DistanceCalculation` class from Ultralytics' `solutions` module and using the model's tracking outputs to calculate the distances. You can refer to the implementation in the [distance calculation example](#distance-calculation-using-ultralytics-yolov8).
+To calculate distances between objects using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics), you need to identify the bounding box centroids of the detected objects. This process involves initializing the `DistanceCalculation` class from Ultralytics' `solutions` module and using the model's tracking outputs to calculate the distances. You can refer to the implementation in the [distance calculation example](#distance-calculation-using-ultralytics-yolo11).
-### What are the advantages of using distance calculation with Ultralytics YOLOv8?
+### What are the advantages of using distance calculation with Ultralytics YOLO11?
-Using distance calculation with Ultralytics YOLOv8 offers several advantages:
+Using distance calculation with Ultralytics YOLO11 offers several advantages:
- **Localization Precision:** Provides accurate spatial positioning for objects.
- **Size Estimation:** Helps estimate physical sizes, contributing to better contextual understanding.
- **Scene Understanding:** Enhances 3D scene comprehension, aiding improved decision-making in applications like autonomous driving and surveillance.
-### Can I perform distance calculation in real-time video streams with Ultralytics YOLOv8?
+### Can I perform distance calculation in real-time video streams with Ultralytics YOLO11?
-Yes, you can perform distance calculation in real-time video streams with Ultralytics YOLOv8. The process involves capturing video frames using [OpenCV](https://www.ultralytics.com/glossary/opencv), running YOLOv8 [object detection](https://www.ultralytics.com/glossary/object-detection), and using the `DistanceCalculation` class to calculate distances between objects in successive frames. For a detailed implementation, see the [video stream example](#distance-calculation-using-ultralytics-yolov8).
+Yes, you can perform distance calculation in real-time video streams with Ultralytics YOLO11. The process involves capturing video frames using [OpenCV](https://www.ultralytics.com/glossary/opencv), running YOLO11 [object detection](https://www.ultralytics.com/glossary/object-detection), and using the `DistanceCalculation` class to calculate distances between objects in successive frames. For a detailed implementation, see the [video stream example](#distance-calculation-using-ultralytics-yolo11).
-### How do I delete points drawn during distance calculation using Ultralytics YOLOv8?
+### How do I delete points drawn during distance calculation using Ultralytics YOLO11?
-To delete points drawn during distance calculation with Ultralytics YOLOv8, you can use a right mouse click. This action will clear all the points you have drawn. For more details, refer to the note section under the [distance calculation example](#distance-calculation-using-ultralytics-yolov8).
+To delete points drawn during distance calculation with Ultralytics YOLO11, you can use a right mouse click. This action will clear all the points you have drawn. For more details, refer to the note section under the [distance calculation example](#distance-calculation-using-ultralytics-yolo11).
-### What are the key arguments for initializing the DistanceCalculation class in Ultralytics YOLOv8?
+### What are the key arguments for initializing the DistanceCalculation class in Ultralytics YOLO11?
-The key arguments for initializing the `DistanceCalculation` class in Ultralytics YOLOv8 include:
+The key arguments for initializing the `DistanceCalculation` class in Ultralytics YOLO11 include:
-- `names`: Dictionary mapping class indices to class names.
-- `view_img`: Flag to indicate if the video stream should be displayed.
-- `line_thickness`: Thickness of the lines drawn on the image.
-- `line_color`: Color of the lines drawn on the image (BGR format).
-- `centroid_color`: Color of the centroids (BGR format).
+- `model`: Model file path.
+- `show`: Flag to indicate if the video stream should be displayed.
+- `line_width`: Thickness of bounding box and the lines drawn on the image.
For an exhaustive list and default values, see the [arguments of DistanceCalculation](#arguments-distancecalculation).
diff --git a/docs/en/guides/docker-quickstart.md b/docs/en/guides/docker-quickstart.md
index 3ee48946c97..f08d62ad3cd 100644
--- a/docs/en/guides/docker-quickstart.md
+++ b/docs/en/guides/docker-quickstart.md
@@ -98,7 +98,7 @@ Here's how to execute the Ultralytics Docker container:
### Using only the CPU
```bash
-# Run with all GPUs
+# Run without GPU
sudo docker run -it --ipc=host $t
```
@@ -197,10 +197,10 @@ Setup and configuration of an X11 or Wayland display server is outside the scope
### Using Docker with a GUI
-Now you can display graphical applications inside your Docker container. For example, you can run the following [CLI command](../usage/cli.md) to visualize the [predictions](../modes/predict.md) from a [YOLOv8 model](../models/yolov8.md):
+Now you can display graphical applications inside your Docker container. For example, you can run the following [CLI command](../usage/cli.md) to visualize the [predictions](../modes/predict.md) from a [YOLO11 model](../models/yolo11.md):
```bash
-yolo predict model=yolov8n.pt show=True
+yolo predict model=yolo11n.pt show=True
```
??? info "Testing"
diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md
index d2ebd4b14bc..5310eb98ca2 100644
--- a/docs/en/guides/heatmaps.md
+++ b/docs/en/guides/heatmaps.md
@@ -1,14 +1,16 @@
---
comments: true
-description: Transform complex data into insightful heatmaps using Ultralytics YOLOv8. Discover patterns, trends, and anomalies with vibrant visualizations.
-keywords: Ultralytics, YOLOv8, heatmaps, data visualization, data analysis, complex data, patterns, trends, anomalies
+description: Transform complex data into insightful heatmaps using Ultralytics YOLO11. Discover patterns, trends, and anomalies with vibrant visualizations.
+keywords: Ultralytics, YOLO11, heatmaps, data visualization, data analysis, complex data, patterns, trends, anomalies
---
-# Advanced [Data Visualization](https://www.ultralytics.com/glossary/data-visualization): Heatmaps using Ultralytics YOLOv8 ๐
+# Advanced [Data Visualization](https://www.ultralytics.com/glossary/data-visualization): Heatmaps using Ultralytics YOLO11 ๐
## Introduction to Heatmaps
-A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.
+
+
+A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.
@@ -18,7 +20,7 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult
allowfullscreen>
- Watch: Heatmaps using Ultralytics YOLOv8
+ Watch: Heatmaps using Ultralytics YOLO11
## Why Choose Heatmaps for Data Analysis?
@@ -31,237 +33,65 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult
| Transportation | Retail |
| :--------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| Ultralytics YOLOv8 Transportation Heatmap | Ultralytics YOLOv8 Retail Heatmap |
-
-!!! tip "Heatmap Configuration"
-
- - `heatmap_alpha`: Ensure this value is within the range (0.0 - 1.0).
- - `decay_factor`: Used for removing heatmap after an object is no longer in the frame, its value should also be in the range (0.0 - 1.0).
-
-!!! example "Heatmaps using Ultralytics YOLOv8 Example"
-
- === "Heatmap"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Video writer
- video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- # Init heatmap
- heatmap_obj = solutions.Heatmap(
- colormap=cv2.COLORMAP_PARULA,
- view_img=True,
- shape="circle",
- names=model.names,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
- tracks = model.track(im0, persist=True, show=False)
-
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
-
- === "Line Counting"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Video writer
- video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- line_points = [(20, 400), (1080, 404)] # line for object counting
-
- # Init heatmap
- heatmap_obj = solutions.Heatmap(
- colormap=cv2.COLORMAP_PARULA,
- view_img=True,
- shape="circle",
- count_reg_pts=line_points,
- names=model.names,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
-
- tracks = model.track(im0, persist=True, show=False)
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
-
- === "Polygon Counting"
+|  |  |
+| Ultralytics YOLO11 Transportation Heatmap | Ultralytics YOLO11 Retail Heatmap |
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Video writer
- video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- # Define polygon points
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
-
- # Init heatmap
- heatmap_obj = solutions.Heatmap(
- colormap=cv2.COLORMAP_PARULA,
- view_img=True,
- shape="circle",
- count_reg_pts=region_points,
- names=model.names,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
-
- tracks = model.track(im0, persist=True, show=False)
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
-
- === "Region Counting"
-
- ```python
- import cv2
+!!! example "Heatmaps using Ultralytics YOLO11 Example"
- from ultralytics import YOLO, solutions
+ === "CLI"
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Video writer
- video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- # Define region points
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
- # Init heatmap
- heatmap_obj = solutions.Heatmap(
- colormap=cv2.COLORMAP_PARULA,
- view_img=True,
- shape="circle",
- count_reg_pts=region_points,
- names=model.names,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
+ ```bash
+ # Run a heatmap example
+ yolo solutions heatmap show=True
- tracks = model.track(im0, persist=True, show=False)
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
+ # Pass a source video
+ yolo solutions heatmap source="path/to/video/file.mp4"
- === "Im0"
+ # Pass a custom colormap
+ yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8s.pt") # YOLOv8 custom/pretrained model
-
- im0 = cv2.imread("path/to/image.png") # path to image file
- h, w = im0.shape[:2] # image height and width
-
- # Heatmap Init
- heatmap_obj = solutions.Heatmap(
- colormap=cv2.COLORMAP_PARULA,
- view_img=True,
- shape="circle",
- names=model.names,
- )
-
- results = model.track(im0, persist=True)
- im0 = heatmap_obj.generate_heatmap(im0, tracks=results)
- cv2.imwrite("ultralytics_output.png", im0)
+ # Heatmaps + object counting
+ yolo solutions heatmap region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
```
- === "Specific Classes"
+ === "Python"
```python
import cv2
- from ultralytics import YOLO, solutions
+ from ultralytics import solutions
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
+ cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- classes_for_heatmap = [0, 2] # classes for heatmap
+ # In case you want to apply object counting + heatmaps, you can pass region points.
+ # region_points = [(20, 400), (1080, 400)] # Define line points
+ # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Define region points
+ # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)] # Define polygon points
# Init heatmap
- heatmap_obj = solutions.Heatmap(
- colormap=cv2.COLORMAP_PARULA,
- view_img=True,
- shape="circle",
- names=model.names,
+ heatmap = solutions.Heatmap(
+ show=True, # Display the output
+ model="yolo11n.pt", # Path to the YOLO11 model file
+ colormap=cv2.COLORMAP_PARULA, # Colormap of heatmap
+ # region=region_points, # If you want to do object counting with heatmaps, you can pass region_points
+ # classes=[0, 2], # If you want to generate heatmap for specific classes i.e person and car.
+ # show_in=True, # Display in counts
+ # show_out=True, # Display out counts
+ # line_width=2, # Adjust the line width for bounding boxes and text display
)
+ # Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- tracks = model.track(im0, persist=True, show=False, classes=classes_for_heatmap)
-
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
+ im0 = heatmap.generate_heatmap(im0)
video_writer.write(im0)
cap.release()
@@ -271,25 +101,15 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult
### Arguments `Heatmap()`
-| Name | Type | Default | Description |
-| ------------------ | ---------------- | ------------------ | ----------------------------------------------------------------- |
-| `names` | `list` | `None` | Dictionary of class names. |
-| `imw` | `int` | `0` | Image width. |
-| `imh` | `int` | `0` | Image height. |
-| `colormap` | `int` | `cv2.COLORMAP_JET` | Colormap to use for the heatmap. |
-| `heatmap_alpha` | `float` | `0.5` | Alpha blending value for heatmap overlay. |
-| `view_img` | `bool` | `False` | Whether to display the image with the heatmap overlay. |
-| `view_in_counts` | `bool` | `True` | Whether to display the count of objects entering the region. |
-| `view_out_counts` | `bool` | `True` | Whether to display the count of objects exiting the region. |
-| `count_reg_pts` | `list` or `None` | `None` | Points defining the counting region (either a line or a polygon). |
-| `count_txt_color` | `tuple` | `(0, 0, 0)` | Text color for displaying counts. |
-| `count_bg_color` | `tuple` | `(255, 255, 255)` | Background color for displaying counts. |
-| `count_reg_color` | `tuple` | `(255, 0, 255)` | Color for the counting region. |
-| `region_thickness` | `int` | `5` | Thickness of the region line. |
-| `line_dist_thresh` | `int` | `15` | Distance threshold for line-based counting. |
-| `line_thickness` | `int` | `2` | Thickness of the lines used in drawing. |
-| `decay_factor` | `float` | `0.99` | Decay factor for the heatmap to reduce intensity over time. |
-| `shape` | `str` | `"circle"` | Shape of the heatmap blobs ('circle' or 'rect'). |
+| Name | Type | Default | Description |
+| ------------ | ------ | ------------------ | ----------------------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `colormap` | `int` | `cv2.COLORMAP_JET` | Colormap to use for the heatmap. |
+| `show` | `bool` | `False` | Whether to display the image with the heatmap overlay. |
+| `show_in` | `bool` | `True` | Whether to display the count of objects entering the region. |
+| `show_out` | `bool` | `True` | Whether to display the count of objects exiting the region. |
+| `region` | `list` | `None` | Points defining the counting region (either a line or a polygon). |
+| `line_width` | `int` | `2` | Thickness of the lines used in drawing. |
### Arguments `model.track`
@@ -326,29 +146,27 @@ These colormaps are commonly used for visualizing data with different color repr
## FAQ
-### How does Ultralytics YOLOv8 generate heatmaps and what are their benefits?
+### How does Ultralytics YOLO11 generate heatmaps and what are their benefits?
-Ultralytics YOLOv8 generates heatmaps by transforming complex data into a color-coded matrix where different hues represent data intensities. Heatmaps make it easier to visualize patterns, correlations, and anomalies in the data. Warmer hues indicate higher values, while cooler tones represent lower values. The primary benefits include intuitive visualization of data distribution, efficient pattern detection, and enhanced spatial analysis for decision-making. For more details and configuration options, refer to the [Heatmap Configuration](#arguments-heatmap) section.
+Ultralytics YOLO11 generates heatmaps by transforming complex data into a color-coded matrix where different hues represent data intensities. Heatmaps make it easier to visualize patterns, correlations, and anomalies in the data. Warmer hues indicate higher values, while cooler tones represent lower values. The primary benefits include intuitive visualization of data distribution, efficient pattern detection, and enhanced spatial analysis for decision-making. For more details and configuration options, refer to the [Heatmap Configuration](#arguments-heatmap) section.
-### Can I use Ultralytics YOLOv8 to perform object tracking and generate a heatmap simultaneously?
+### Can I use Ultralytics YOLO11 to perform object tracking and generate a heatmap simultaneously?
-Yes, Ultralytics YOLOv8 supports object tracking and heatmap generation concurrently. This can be achieved through its `Heatmap` solution integrated with object tracking models. To do so, you need to initialize the heatmap object and use YOLOv8's tracking capabilities. Here's a simple example:
+Yes, Ultralytics YOLO11 supports object tracking and heatmap generation concurrently. This can be achieved through its `Heatmap` solution integrated with object tracking models. To do so, you need to initialize the heatmap object and use YOLO11's tracking capabilities. Here's a simple example:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
-heatmap_obj = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, view_img=True, shape="circle", names=model.names)
+heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, show=True, model="yolo11n.pt")
while cap.isOpened():
success, im0 = cap.read()
if not success:
break
- tracks = model.track(im0, persist=True, show=False)
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
+ im0 = heatmap.generate_heatmap(im0)
cv2.imshow("Heatmap", im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
@@ -359,30 +177,27 @@ cv2.destroyAllWindows()
For further guidance, check the [Tracking Mode](../modes/track.md) page.
-### What makes Ultralytics YOLOv8 heatmaps different from other data visualization tools like those from [OpenCV](https://www.ultralytics.com/glossary/opencv) or Matplotlib?
+### What makes Ultralytics YOLO11 heatmaps different from other data visualization tools like those from [OpenCV](https://www.ultralytics.com/glossary/opencv) or Matplotlib?
-Ultralytics YOLOv8 heatmaps are specifically designed for integration with its [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking models, providing an end-to-end solution for real-time data analysis. Unlike generic visualization tools like OpenCV or Matplotlib, YOLOv8 heatmaps are optimized for performance and automated processing, supporting features like persistent tracking, decay factor adjustment, and real-time video overlay. For more information on YOLOv8's unique features, visit the [Ultralytics YOLOv8 Introduction](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
+Ultralytics YOLO11 heatmaps are specifically designed for integration with its [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking models, providing an end-to-end solution for real-time data analysis. Unlike generic visualization tools like OpenCV or Matplotlib, YOLO11 heatmaps are optimized for performance and automated processing, supporting features like persistent tracking, decay factor adjustment, and real-time video overlay. For more information on YOLO11's unique features, visit the [Ultralytics YOLO11 Introduction](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
-### How can I visualize only specific object classes in heatmaps using Ultralytics YOLOv8?
+### How can I visualize only specific object classes in heatmaps using Ultralytics YOLO11?
You can visualize specific object classes by specifying the desired classes in the `track()` method of the YOLO model. For instance, if you only want to visualize cars and persons (assuming their class indices are 0 and 2), you can set the `classes` parameter accordingly.
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
-heatmap_obj = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, view_img=True, shape="circle", names=model.names)
+heatmap = solutions.Heatmap(show=True, model="yolo11n.pt", classes=[0, 2])
-classes_for_heatmap = [0, 2] # Classes to visualize
while cap.isOpened():
success, im0 = cap.read()
if not success:
break
- tracks = model.track(im0, persist=True, show=False, classes=classes_for_heatmap)
- im0 = heatmap_obj.generate_heatmap(im0, tracks)
+ im0 = heatmap.generate_heatmap(im0)
cv2.imshow("Heatmap", im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
@@ -391,6 +206,6 @@ cap.release()
cv2.destroyAllWindows()
```
-### Why should businesses choose Ultralytics YOLOv8 for heatmap generation in data analysis?
+### Why should businesses choose Ultralytics YOLO11 for heatmap generation in data analysis?
-Ultralytics YOLOv8 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLOv8's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans).
+Ultralytics YOLO11 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO11's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans).
diff --git a/docs/en/guides/hyperparameter-tuning.md b/docs/en/guides/hyperparameter-tuning.md
index d715820f24a..267e7eb5d10 100644
--- a/docs/en/guides/hyperparameter-tuning.md
+++ b/docs/en/guides/hyperparameter-tuning.md
@@ -10,6 +10,17 @@ keywords: Ultralytics YOLO, hyperparameter tuning, machine learning, model optim
Hyperparameter tuning is not just a one-time set-up but an iterative process aimed at optimizing the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) model's performance metrics, such as accuracy, precision, and recall. In the context of Ultralytics YOLO, these hyperparameters could range from learning rate to architectural details, such as the number of layers or types of activation functions used.
+
+
+
+
+ Watch: How to Tune Hyperparameters for Better Model Performance ๐
+
+
### What are Hyperparameters?
Hyperparameters are high-level, structural settings for the algorithm. They are set prior to the training phase and remain constant during it. Here are some commonly tuned hyperparameters in Ultralytics YOLO:
@@ -23,7 +34,7 @@ Hyperparameters are high-level, structural settings for the algorithm. They are
-For a full list of augmentation hyperparameters used in YOLOv8 please refer to the [configurations page](../usage/cfg.md#augmentation-settings).
+For a full list of augmentation hyperparameters used in YOLO11 please refer to the [configurations page](../usage/cfg.md#augmentation-settings).
### Genetic Evolution and Mutation
@@ -65,9 +76,37 @@ It's crucial to log both the performance metrics and the corresponding hyperpara
The process is repeated until either the set number of iterations is reached or the performance metric is satisfactory.
-## Usage Example
-
-Here's how to use the `model.tune()` method to utilize the `Tuner` class for hyperparameter tuning of YOLOv8n on COCO8 for 30 epochs with an AdamW optimizer and skipping plotting, checkpointing and validation other than on final epoch for faster Tuning.
+## Default Search Space Description
+
+The following table lists the default search space parameters for hyperparameter tuning in YOLO11. Each parameter has a specific value range defined by a tuple `(min, max)`.
+
+| Parameter | Type | Value Range | Description |
+| ----------------- | ------- | -------------- | ---------------------------------------------------------------------------------------------------------------- |
+| `lr0` | `float` | `(1e-5, 1e-1)` | Initial learning rate at the start of training. Lower values provide more stable training but slower convergence |
+| `lrf` | `float` | `(0.01, 1.0)` | Final learning rate factor as a fraction of lr0. Controls how much the learning rate decreases during training |
+| `momentum` | `float` | `(0.6, 0.98)` | SGD momentum factor. Higher values help maintain consistent gradient direction and can speed up convergence |
+| `weight_decay` | `float` | `(0.0, 0.001)` | L2 regularization factor to prevent overfitting. Larger values enforce stronger regularization |
+| `warmup_epochs` | `float` | `(0.0, 5.0)` | Number of epochs for linear learning rate warmup. Helps prevent early training instability |
+| `warmup_momentum` | `float` | `(0.0, 0.95)` | Initial momentum during warmup phase. Gradually increases to the final momentum value |
+| `box` | `float` | `(0.02, 0.2)` | Bounding box loss weight in the total loss function. Balances box regression vs classification |
+| `cls` | `float` | `(0.2, 4.0)` | Classification loss weight in the total loss function. Higher values emphasize correct class prediction |
+| `hsv_h` | `float` | `(0.0, 0.1)` | Random hue augmentation range in HSV color space. Helps model generalize across color variations |
+| `hsv_s` | `float` | `(0.0, 0.9)` | Random saturation augmentation range in HSV space. Simulates different lighting conditions |
+| `hsv_v` | `float` | `(0.0, 0.9)` | Random value (brightness) augmentation range. Helps model handle different exposure levels |
+| `degrees` | `float` | `(0.0, 45.0)` | Maximum rotation augmentation in degrees. Helps model become invariant to object orientation |
+| `translate` | `float` | `(0.0, 0.9)` | Maximum translation augmentation as fraction of image size. Improves robustness to object position |
+| `scale` | `float` | `(0.0, 0.9)` | Random scaling augmentation range. Helps model detect objects at different sizes |
+| `shear` | `float` | `(0.0, 10.0)` | Maximum shear augmentation in degrees. Adds perspective-like distortions to training images |
+| `perspective` | `float` | `(0.0, 0.001)` | Random perspective augmentation range. Simulates different viewing angles |
+| `flipud` | `float` | `(0.0, 1.0)` | Probability of vertical image flip during training. Useful for overhead/aerial imagery |
+| `fliplr` | `float` | `(0.0, 1.0)` | Probability of horizontal image flip. Helps model become invariant to object direction |
+| `mosaic` | `float` | `(0.0, 1.0)` | Probability of using mosaic augmentation, which combines 4 images. Especially useful for small object detection |
+| `mixup` | `float` | `(0.0, 1.0)` | Probability of using mixup augmentation, which blends two images. Can improve model robustness |
+| `copy_paste` | `float` | `(0.0, 1.0)` | Probability of using copy-paste augmentation. Helps improve instance segmentation performance |
+
+## Custom Search Space Example
+
+Here's how to define a search space and use the `model.tune()` method to utilize the `Tuner` class for hyperparameter tuning of YOLO11n on COCO8 for 30 epochs with an AdamW optimizer and skipping plotting, checkpointing and validation other than on final epoch for faster Tuning.
!!! example
@@ -77,10 +116,25 @@ Here's how to use the `model.tune()` method to utilize the `Tuner` class for hyp
from ultralytics import YOLO
# Initialize the YOLO model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
+
+ # Define search space
+ search_space = {
+ "lr0": (1e-5, 1e-1),
+ "degrees": (0.0, 45.0),
+ }
# Tune hyperparameters on COCO8 for 30 epochs
- model.tune(data="coco8.yaml", epochs=30, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
+ model.tune(
+ data="coco8.yaml",
+ epochs=30,
+ iterations=300,
+ optimizer="AdamW",
+ space=search_space,
+ plots=False,
+ save=False,
+ val=False,
+ )
```
## Results
@@ -202,7 +256,7 @@ The hyperparameter tuning process in Ultralytics YOLO is simplified yet powerful
1. [Hyperparameter Optimization in Wikipedia](https://en.wikipedia.org/wiki/Hyperparameter_optimization)
2. [YOLOv5 Hyperparameter Evolution Guide](../yolov5/tutorials/hyperparameter_evolution.md)
-3. [Efficient Hyperparameter Tuning with Ray Tune and YOLOv8](../integrations/ray-tune.md)
+3. [Efficient Hyperparameter Tuning with Ray Tune and YOLO11](../integrations/ray-tune.md)
For deeper insights, you can explore the `Tuner` class source code and accompanying documentation. Should you have any questions, feature requests, or need further assistance, feel free to reach out to us on [GitHub](https://github.com/ultralytics/ultralytics/issues/new/choose) or [Discord](https://discord.com/invite/ultralytics).
@@ -220,7 +274,7 @@ To optimize the learning rate for Ultralytics YOLO, start by setting an initial
from ultralytics import YOLO
# Initialize the YOLO model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Tune hyperparameters on COCO8 for 30 epochs
model.tune(data="coco8.yaml", epochs=30, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
@@ -228,9 +282,9 @@ To optimize the learning rate for Ultralytics YOLO, start by setting an initial
For more details, check the [Ultralytics YOLO configuration page](../usage/cfg.md#augmentation-settings).
-### What are the benefits of using genetic algorithms for hyperparameter tuning in YOLOv8?
+### What are the benefits of using genetic algorithms for hyperparameter tuning in YOLO11?
-Genetic algorithms in Ultralytics YOLOv8 provide a robust method for exploring the hyperparameter space, leading to highly optimized model performance. Key benefits include:
+Genetic algorithms in Ultralytics YOLO11 provide a robust method for exploring the hyperparameter space, leading to highly optimized model performance. Key benefits include:
- **Efficient Search**: Genetic algorithms like mutation can quickly explore a large set of hyperparameters.
- **Avoiding Local Minima**: By introducing randomness, they help in avoiding local minima, ensuring better global optimization.
@@ -240,7 +294,7 @@ To see how genetic algorithms can optimize hyperparameters, check out the [hyper
### How long does the hyperparameter tuning process take for Ultralytics YOLO?
-The time required for hyperparameter tuning with Ultralytics YOLO largely depends on several factors such as the size of the dataset, the complexity of the model architecture, the number of iterations, and the computational resources available. For instance, tuning YOLOv8n on a dataset like COCO8 for 30 epochs might take several hours to days, depending on the hardware.
+The time required for hyperparameter tuning with Ultralytics YOLO largely depends on several factors such as the size of the dataset, the complexity of the model architecture, the number of iterations, and the computational resources available. For instance, tuning YOLO11n on a dataset like COCO8 for 30 epochs might take several hours to days, depending on the hardware.
To effectively manage tuning time, define a clear tuning budget beforehand ([internal section link](#preparing-for-hyperparameter-tuning)). This helps in balancing resource allocation and optimization goals.
diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md
index 1ad70434abc..f3deebd3e64 100644
--- a/docs/en/guides/index.md
+++ b/docs/en/guides/index.md
@@ -18,7 +18,7 @@ Whether you're a beginner or an expert in [deep learning](https://www.ultralytic
allowfullscreen>
- Watch: Ultralytics YOLOv8 Guides Overview
+ Watch: Ultralytics YOLO11 Guides Overview
## Guides
@@ -27,32 +27,32 @@ Here's a compilation of in-depth guides to help you master different aspects of
- [YOLO Common Issues](yolo-common-issues.md) โญ RECOMMENDED: Practical solutions and troubleshooting tips to the most frequently encountered issues when working with Ultralytics YOLO models.
- [YOLO Performance Metrics](yolo-performance-metrics.md) โญ ESSENTIAL: Understand the key metrics like mAP, IoU, and [F1 score](https://www.ultralytics.com/glossary/f1-score) used to evaluate the performance of your YOLO models. Includes practical examples and tips on how to improve detection accuracy and speed.
+- [YOLO Thread-Safe Inference](yolo-thread-safe-inference.md) ๐ NEW: Guidelines for performing inference with YOLO models in a thread-safe manner. Learn the importance of thread safety and best practices to prevent race conditions and ensure consistent predictions.
- [Model Deployment Options](model-deployment-options.md): Overview of YOLO [model deployment](https://www.ultralytics.com/glossary/model-deployment) formats like ONNX, OpenVINO, and TensorRT, with pros and cons for each to inform your deployment strategy.
- [K-Fold Cross Validation](kfold-cross-validation.md) ๐ NEW: Learn how to improve model generalization using K-Fold cross-validation technique.
- [Hyperparameter Tuning](hyperparameter-tuning.md) ๐ NEW: Discover how to optimize your YOLO models by fine-tuning hyperparameters using the Tuner class and genetic evolution algorithms.
-- [SAHI Tiled Inference](sahi-tiled-inference.md) ๐ NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLOv8 for object detection in high-resolution images.
+- [SAHI Tiled Inference](sahi-tiled-inference.md) ๐ NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLO11 for object detection in high-resolution images.
- [AzureML Quickstart](azureml-quickstart.md) ๐ NEW: Get up and running with Ultralytics YOLO models on Microsoft's Azure [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml) platform. Learn how to train, deploy, and scale your object detection projects in the cloud.
- [Conda Quickstart](conda-quickstart.md) ๐ NEW: Step-by-step guide to setting up a [Conda](https://anaconda.org/conda-forge/ultralytics) environment for Ultralytics. Learn how to install and start using the Ultralytics package efficiently with Conda.
- [Docker Quickstart](docker-quickstart.md) ๐ NEW: Complete guide to setting up and using Ultralytics YOLO models with [Docker](https://hub.docker.com/r/ultralytics/ultralytics). Learn how to install Docker, manage GPU support, and run YOLO models in isolated containers for consistent development and deployment.
- [Raspberry Pi](raspberry-pi.md) ๐ NEW: Quickstart tutorial to run YOLO models to the latest Raspberry Pi hardware.
- [NVIDIA Jetson](nvidia-jetson.md) ๐ NEW: Quickstart guide for deploying YOLO models on NVIDIA Jetson devices.
- [DeepStream on NVIDIA Jetson](deepstream-nvidia-jetson.md) ๐ NEW: Quickstart guide for deploying YOLO models on NVIDIA Jetson devices using DeepStream and TensorRT.
-- [Triton Inference Server Integration](triton-inference-server.md) ๐ NEW: Dive into the integration of Ultralytics YOLOv8 with NVIDIA's Triton Inference Server for scalable and efficient deep learning inference deployments.
-- [YOLO Thread-Safe Inference](yolo-thread-safe-inference.md) ๐ NEW: Guidelines for performing inference with YOLO models in a thread-safe manner. Learn the importance of thread safety and best practices to prevent race conditions and ensure consistent predictions.
+- [Triton Inference Server Integration](triton-inference-server.md) ๐ NEW: Dive into the integration of Ultralytics YOLO11 with NVIDIA's Triton Inference Server for scalable and efficient deep learning inference deployments.
- [Isolating Segmentation Objects](isolating-segmentation-objects.md) ๐ NEW: Step-by-step recipe and explanation on how to extract and/or isolate objects from images using Ultralytics Segmentation.
- [Edge TPU on Raspberry Pi](coral-edge-tpu-on-raspberry-pi.md): [Google Edge TPU](https://coral.ai/products/accelerator) accelerates YOLO inference on [Raspberry Pi](https://www.raspberrypi.com/).
- [View Inference Images in a Terminal](view-results-in-terminal.md): Use VSCode's integrated terminal to view inference results when using Remote Tunnel or SSH sessions.
- [OpenVINO Latency vs Throughput Modes](optimizing-openvino-latency-vs-throughput-modes.md) - Learn latency and throughput optimization techniques for peak YOLO inference performance.
+- [ROS Quickstart](ros-quickstart.md) ๐ NEW: Learn how to integrate YOLO with the Robot Operating System (ROS) for real-time object detection in robotics applications, including Point Cloud and Depth images.
- [Steps of a Computer Vision Project ](steps-of-a-cv-project.md) ๐ NEW: Learn about the key steps involved in a computer vision project, including defining goals, selecting models, preparing data, and evaluating results.
- [Defining A Computer Vision Project's Goals](defining-project-goals.md) ๐ NEW: Walk through how to effectively define clear and measurable goals for your computer vision project. Learn the importance of a well-defined problem statement and how it creates a roadmap for your project.
- [Data Collection and Annotation](data-collection-and-annotation.md) ๐ NEW: Explore the tools, techniques, and best practices for collecting and annotating data to create high-quality inputs for your computer vision models.
-- [Preprocessing Annotated Data](preprocessing_annotated_data.md) ๐ NEW: Learn about preprocessing and augmenting image data in computer vision projects using YOLOv8, including normalization, dataset augmentation, splitting, and exploratory data analysis (EDA).
+- [Preprocessing Annotated Data](preprocessing_annotated_data.md) ๐ NEW: Learn about preprocessing and augmenting image data in computer vision projects using YOLO11, including normalization, dataset augmentation, splitting, and exploratory data analysis (EDA).
- [Tips for Model Training](model-training-tips.md) ๐ NEW: Explore tips on optimizing [batch sizes](https://www.ultralytics.com/glossary/batch-size), using [mixed precision](https://www.ultralytics.com/glossary/mixed-precision), applying pre-trained weights, and more to make training your computer vision model a breeze.
- [Insights on Model Evaluation and Fine-Tuning](model-evaluation-insights.md) ๐ NEW: Gain insights into the strategies and best practices for evaluating and fine-tuning your computer vision models. Learn about the iterative process of refining models to achieve optimal results.
- [A Guide on Model Testing](model-testing.md) ๐ NEW: A thorough guide on testing your computer vision models in realistic settings. Learn how to verify accuracy, reliability, and performance in line with project goals.
- [Best Practices for Model Deployment](model-deployment-practices.md) ๐ NEW: Walk through tips and best practices for efficiently deploying models in computer vision projects, with a focus on optimization, troubleshooting, and security.
- [Maintaining Your Computer Vision Model](model-monitoring-and-maintenance.md) ๐ NEW: Understand the key practices for monitoring, maintaining, and documenting computer vision models to guarantee accuracy, spot anomalies, and mitigate data drift.
-- [ROS Quickstart](ros-quickstart.md) ๐ NEW: Learn how to integrate YOLO with the Robot Operating System (ROS) for real-time object detection in robotics applications, including Point Cloud and Depth images.
## Contribute to Our Guides
@@ -75,14 +75,14 @@ Training a custom object detection model with Ultralytics YOLO is straightforwar
```python
from ultralytics import YOLO
- model = YOLO("yolov8s.pt") # Load a pre-trained YOLO model
+ model = YOLO("yolo11n.pt") # Load a pre-trained YOLO model
model.train(data="path/to/dataset.yaml", epochs=50) # Train on custom dataset
```
=== "CLI"
```bash
- yolo task=detect mode=train model=yolov8s.pt data=path/to/dataset.yaml epochs=50
+ yolo task=detect mode=train model=yolo11n.pt data=path/to/dataset.yaml epochs=50
```
For detailed dataset formatting and additional options, refer to our [Tips for Model Training](model-training-tips.md) guide.
diff --git a/docs/en/guides/instance-segmentation-and-tracking.md b/docs/en/guides/instance-segmentation-and-tracking.md
index 95e91a8cafc..12cd7477a67 100644
--- a/docs/en/guides/instance-segmentation-and-tracking.md
+++ b/docs/en/guides/instance-segmentation-and-tracking.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Master instance segmentation and tracking with Ultralytics YOLOv8. Learn techniques for precise object identification and tracking.
-keywords: instance segmentation, tracking, YOLOv8, Ultralytics, object detection, machine learning, computer vision, python
+description: Master instance segmentation and tracking with Ultralytics YOLO11. Learn techniques for precise object identification and tracking.
+keywords: instance segmentation, tracking, YOLO11, Ultralytics, object detection, machine learning, computer vision, python
---
-# Instance Segmentation and Tracking using Ultralytics YOLOv8 ๐
+# Instance Segmentation and Tracking using Ultralytics YOLO11 ๐
## What is [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation)?
-[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation), it uniquely labels and precisely delineates each object, crucial for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and medical imaging.
+[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation), it uniquely labels and precisely delineates each object, crucial for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and medical imaging.
There are two types of instance segmentation tracking available in the Ultralytics package:
@@ -24,7 +24,7 @@ There are two types of instance segmentation tracking available in the Ultralyti
allowfullscreen>
- Watch: Instance Segmentation with Object Tracking using Ultralytics YOLOv8
+ Watch: Instance Segmentation with Object Tracking using Ultralytics YOLO11
## Samples
@@ -44,7 +44,7 @@ There are two types of instance segmentation tracking available in the Ultralyti
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8n-seg.pt") # segmentation model
+ model = YOLO("yolo11n-seg.pt") # segmentation model
names = model.model.names
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -82,16 +82,12 @@ There are two types of instance segmentation tracking available in the Ultralyti
=== "Instance Segmentation with Object Tracking"
```python
- from collections import defaultdict
-
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- track_history = defaultdict(lambda: [])
-
- model = YOLO("yolov8n-seg.pt") # segmentation model
+ model = YOLO("yolo11n-seg.pt") # segmentation model
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -142,9 +138,9 @@ For any inquiries, feel free to post your questions in the [Ultralytics Issue Se
## FAQ
-### How do I perform instance segmentation using Ultralytics YOLOv8?
+### How do I perform instance segmentation using Ultralytics YOLO11?
-To perform instance segmentation using Ultralytics YOLOv8, initialize the YOLO model with a segmentation version of YOLOv8 and process video frames through it. Here's a simplified code example:
+To perform instance segmentation using Ultralytics YOLO11, initialize the YOLO model with a segmentation version of YOLO11 and process video frames through it. Here's a simplified code example:
!!! example
@@ -156,7 +152,7 @@ To perform instance segmentation using Ultralytics YOLOv8, initialize the YOLO m
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8n-seg.pt") # segmentation model
+ model = YOLO("yolo11n-seg.pt") # segmentation model
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -186,17 +182,17 @@ To perform instance segmentation using Ultralytics YOLOv8, initialize the YOLO m
cv2.destroyAllWindows()
```
-Learn more about instance segmentation in the [Ultralytics YOLOv8 guide](#what-is-instance-segmentation).
+Learn more about instance segmentation in the [Ultralytics YOLO11 guide](#what-is-instance-segmentation).
-### What is the difference between instance segmentation and object tracking in Ultralytics YOLOv8?
+### What is the difference between instance segmentation and object tracking in Ultralytics YOLO11?
-Instance segmentation identifies and outlines individual objects within an image, giving each object a unique label and mask. Object tracking extends this by assigning consistent labels to objects across video frames, facilitating continuous tracking of the same objects over time. Learn more about the distinctions in the [Ultralytics YOLOv8 documentation](#samples).
+Instance segmentation identifies and outlines individual objects within an image, giving each object a unique label and mask. Object tracking extends this by assigning consistent labels to objects across video frames, facilitating continuous tracking of the same objects over time. Learn more about the distinctions in the [Ultralytics YOLO11 documentation](#samples).
-### Why should I use Ultralytics YOLOv8 for instance segmentation and tracking over other models like Mask R-CNN or Faster R-CNN?
+### Why should I use Ultralytics YOLO11 for instance segmentation and tracking over other models like Mask R-CNN or Faster R-CNN?
-Ultralytics YOLOv8 offers real-time performance, superior [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of use compared to other models like Mask R-CNN or Faster R-CNN. YOLOv8 provides a seamless integration with Ultralytics HUB, allowing users to manage models, datasets, and training pipelines efficiently. Discover more about the benefits of YOLOv8 in the [Ultralytics blog](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
+Ultralytics YOLO11 offers real-time performance, superior [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of use compared to other models like Mask R-CNN or Faster R-CNN. YOLO11 provides a seamless integration with Ultralytics HUB, allowing users to manage models, datasets, and training pipelines efficiently. Discover more about the benefits of YOLO11 in the [Ultralytics blog](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8).
-### How can I implement object tracking using Ultralytics YOLOv8?
+### How can I implement object tracking using Ultralytics YOLO11?
To implement object tracking, use the `model.track` method and ensure that each object's ID is consistently assigned across frames. Below is a simple example:
@@ -205,16 +201,12 @@ To implement object tracking, use the `model.track` method and ensure that each
=== "Python"
```python
- from collections import defaultdict
-
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- track_history = defaultdict(lambda: [])
-
- model = YOLO("yolov8n-seg.pt") # segmentation model
+ model = YOLO("yolo11n-seg.pt") # segmentation model
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -247,6 +239,6 @@ To implement object tracking, use the `model.track` method and ensure that each
Find more in the [Instance Segmentation and Tracking section](#samples).
-### Are there any datasets provided by Ultralytics suitable for training YOLOv8 models for instance segmentation and tracking?
+### Are there any datasets provided by Ultralytics suitable for training YOLO11 models for instance segmentation and tracking?
-Yes, Ultralytics offers several datasets suitable for training YOLOv8 models, including segmentation and tracking datasets. Dataset examples, structures, and instructions for use can be found in the [Ultralytics Datasets documentation](https://docs.ultralytics.com/datasets/).
+Yes, Ultralytics offers several datasets suitable for training YOLO11 models, including segmentation and tracking datasets. Dataset examples, structures, and instructions for use can be found in the [Ultralytics Datasets documentation](https://docs.ultralytics.com/datasets/).
diff --git a/docs/en/guides/isolating-segmentation-objects.md b/docs/en/guides/isolating-segmentation-objects.md
index 57b4b7ab8d7..e761bd8265c 100644
--- a/docs/en/guides/isolating-segmentation-objects.md
+++ b/docs/en/guides/isolating-segmentation-objects.md
@@ -1,7 +1,7 @@
---
comments: true
description: Learn to extract isolated objects from inference results using Ultralytics Predict Mode. Step-by-step guide for segmentation object isolation.
-keywords: Ultralytics, segmentation, object isolation, Predict Mode, YOLOv8, machine learning, object detection, binary mask, image processing
+keywords: Ultralytics, segmentation, object isolation, Predict Mode, YOLO11, machine learning, object detection, binary mask, image processing
---
# Isolating Segmentation Objects
@@ -24,7 +24,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n-seg.pt")
+ model = YOLO("yolo11n-seg.pt")
# Run inference
results = model.predict()
@@ -135,7 +135,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
=== "Black Background Pixels"
- ```py
+ ```python
# Create 3-channel mask
mask3ch = cv2.cvtColor(b_mask, cv2.COLOR_GRAY2BGR)
@@ -187,7 +187,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
=== "Transparent Background Pixels"
- ```py
+ ```python
# Isolate object with transparent background (when saved as PNG)
isolated = np.dstack([img, b_mask])
```
@@ -244,7 +244,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab
??? example "Example Final Step"
- ```py
+ ```python
# Save isolated object to file
_ = cv2.imwrite(f"{img_name}_{label}-{ci}.png", iso_crop)
```
@@ -263,7 +263,7 @@ import numpy as np
from ultralytics import YOLO
-m = YOLO("yolov8n-seg.pt") # (4)!
+m = YOLO("yolo11n-seg.pt") # (4)!
res = m.predict() # (3)!
# Iterate detection results (5)
@@ -306,16 +306,16 @@ for r in res:
## FAQ
-### How do I isolate objects using Ultralytics YOLOv8 for segmentation tasks?
+### How do I isolate objects using Ultralytics YOLO11 for segmentation tasks?
-To isolate objects using Ultralytics YOLOv8, follow these steps:
+To isolate objects using Ultralytics YOLO11, follow these steps:
1. **Load the model and run inference:**
```python
from ultralytics import YOLO
- model = YOLO("yolov8n-seg.pt")
+ model = YOLO("yolo11n-seg.pt")
results = model.predict(source="path/to/your/image.jpg")
```
@@ -341,7 +341,7 @@ Refer to the guide on [Predict Mode](../modes/predict.md) and the [Segment Task]
### What options are available for saving the isolated objects after segmentation?
-Ultralytics YOLOv8 offers two main options for saving isolated objects:
+Ultralytics YOLO11 offers two main options for saving isolated objects:
1. **With a Black Background:**
@@ -357,7 +357,7 @@ Ultralytics YOLOv8 offers two main options for saving isolated objects:
For further details, visit the [Predict Mode](../modes/predict.md) section.
-### How can I crop isolated objects to their bounding boxes using Ultralytics YOLOv8?
+### How can I crop isolated objects to their bounding boxes using Ultralytics YOLO11?
To crop isolated objects to their bounding boxes:
@@ -374,9 +374,9 @@ To crop isolated objects to their bounding boxes:
Learn more about bounding box results in the [Predict Mode](../modes/predict.md#boxes) documentation.
-### Why should I use Ultralytics YOLOv8 for object isolation in segmentation tasks?
+### Why should I use Ultralytics YOLO11 for object isolation in segmentation tasks?
-Ultralytics YOLOv8 provides:
+Ultralytics YOLO11 provides:
- **High-speed** real-time object detection and segmentation.
- **Accurate bounding box and mask generation** for precise object isolation.
@@ -384,9 +384,9 @@ Ultralytics YOLOv8 provides:
Explore the benefits of using YOLO in the [Segment Task documentation](../tasks/segment.md).
-### Can I save isolated objects including the background using Ultralytics YOLOv8?
+### Can I save isolated objects including the background using Ultralytics YOLO11?
-Yes, this is a built-in feature in Ultralytics YOLOv8. Use the `save_crop` argument in the `predict()` method. For example:
+Yes, this is a built-in feature in Ultralytics YOLO11. Use the `save_crop` argument in the `predict()` method. For example:
```python
results = model.predict(source="path/to/your/image.jpg", save_crop=True)
diff --git a/docs/en/guides/kfold-cross-validation.md b/docs/en/guides/kfold-cross-validation.md
index 80009e2eb29..369bfca9077 100644
--- a/docs/en/guides/kfold-cross-validation.md
+++ b/docs/en/guides/kfold-cross-validation.md
@@ -94,8 +94,8 @@ Without further ado, let's dive in!
```python
import pandas as pd
- indx = [label.stem for label in labels] # uses base filename as ID (no extension)
- labels_df = pd.DataFrame([], columns=cls_idx, index=indx)
+ index = [label.stem for label in labels] # uses base filename as ID (no extension)
+ labels_df = pd.DataFrame([], columns=cls_idx, index=index)
```
5. Count the instances of each class-label present in the annotation files.
@@ -154,11 +154,11 @@ The rows index the label files, each corresponding to an image in your dataset,
```python
folds = [f"split_{n}" for n in range(1, ksplit + 1)]
- folds_df = pd.DataFrame(index=indx, columns=folds)
+ folds_df = pd.DataFrame(index=index, columns=folds)
- for idx, (train, val) in enumerate(kfolds, start=1):
- folds_df[f"split_{idx}"].loc[labels_df.iloc[train].index] = "train"
- folds_df[f"split_{idx}"].loc[labels_df.iloc[val].index] = "val"
+ for i, (train, val) in enumerate(kfolds, start=1):
+ folds_df[f"split_{i}"].loc[labels_df.iloc[train].index] = "train"
+ folds_df[f"split_{i}"].loc[labels_df.iloc[val].index] = "val"
```
3. Now we will calculate the distribution of class labels for each fold as a ratio of the classes present in `val` to those present in `train`.
@@ -265,6 +265,7 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")
for k in range(ksplit):
dataset_yaml = ds_yamls[k]
+ model = YOLO(weights_path, task="detect")
model.train(data=dataset_yaml, epochs=epochs, batch=batch, project=project) # include any train arguments
results[k] = model.metrics # save output metrics for further analysis
```
diff --git a/docs/en/guides/model-deployment-options.md b/docs/en/guides/model-deployment-options.md
index c2ecf8b6495..2e7e98309b6 100644
--- a/docs/en/guides/model-deployment-options.md
+++ b/docs/en/guides/model-deployment-options.md
@@ -1,26 +1,26 @@
---
comments: true
-description: Learn about YOLOv8's diverse deployment options to maximize your model's performance. Explore PyTorch, TensorRT, OpenVINO, TF Lite, and more!.
-keywords: YOLOv8, deployment options, export formats, PyTorch, TensorRT, OpenVINO, TF Lite, machine learning, model deployment
+description: Learn about YOLO11's diverse deployment options to maximize your model's performance. Explore PyTorch, TensorRT, OpenVINO, TF Lite, and more!.
+keywords: YOLO11, deployment options, export formats, PyTorch, TensorRT, OpenVINO, TF Lite, machine learning, model deployment
---
-# Understanding YOLOv8's Deployment Options
+# Understanding YOLO11's Deployment Options
## Introduction
-You've come a long way on your journey with YOLOv8. You've diligently collected data, meticulously annotated it, and put in the hours to train and rigorously evaluate your custom YOLOv8 model. Now, it's time to put your model to work for your specific application, use case, or project. But there's a critical decision that stands before you: how to export and deploy your model effectively.
+You've come a long way on your journey with YOLO11. You've diligently collected data, meticulously annotated it, and put in the hours to train and rigorously evaluate your custom YOLO11 model. Now, it's time to put your model to work for your specific application, use case, or project. But there's a critical decision that stands before you: how to export and deploy your model effectively.
-This guide walks you through YOLOv8's deployment options and the essential factors to consider to choose the right option for your project.
+This guide walks you through YOLO11's deployment options and the essential factors to consider to choose the right option for your project.
-## How to Select the Right Deployment Option for Your YOLOv8 Model
+## How to Select the Right Deployment Option for Your YOLO11 Model
-When it's time to deploy your YOLOv8 model, selecting a suitable export format is very important. As outlined in the [Ultralytics YOLOv8 Modes documentation](../modes/export.md#usage-examples), the model.export() function allows for converting your trained model into a variety of formats tailored to diverse environments and performance requirements.
+When it's time to deploy your YOLO11 model, selecting a suitable export format is very important. As outlined in the [Ultralytics YOLO11 Modes documentation](../modes/export.md#usage-examples), the model.export() function allows for converting your trained model into a variety of formats tailored to diverse environments and performance requirements.
The ideal format depends on your model's intended operational context, balancing speed, hardware constraints, and ease of integration. In the following section, we'll take a closer look at each export option, understanding when to choose each one.
-### YOLOv8's Deployment Options
+### YOLO11's Deployment Options
-Let's walk through the different YOLOv8 deployment options. For a detailed walkthrough of the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md).
+Let's walk through the different YOLO11 deployment options. For a detailed walkthrough of the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md).
#### PyTorch
@@ -258,57 +258,62 @@ NCNN is a high-performance neural network inference framework optimized for the
- **Hardware Acceleration**: Tailored for ARM CPUs and GPUs, with specific optimizations for these architectures.
-## Comparative Analysis of YOLOv8 Deployment Options
-
-The following table provides a snapshot of the various deployment options available for YOLOv8 models, helping you to assess which may best fit your project needs based on several critical criteria. For an in-depth look at each deployment option's format, please see the [Ultralytics documentation page on export formats](../modes/export.md#export-formats).
-
-| Deployment Option | Performance Benchmarks | Compatibility and Integration | Community Support and Ecosystem | Case Studies | Maintenance and Updates | Security Considerations | Hardware Acceleration |
-| ----------------- | ----------------------------------------------- | ---------------------------------------------- | --------------------------------------------- | ------------------------------------------ | ------------------------------------------- | ------------------------------------------------- | ---------------------------------- |
-| PyTorch | Good flexibility; may trade off raw performance | Excellent with Python libraries | Extensive resources and community | Research and prototypes | Regular, active development | Dependent on deployment environment | CUDA support for GPU acceleration |
-| TorchScript | Better for production than PyTorch | Smooth transition from PyTorch to C++ | Specialized but narrower than PyTorch | Industry where Python is a bottleneck | Consistent updates with PyTorch | Improved security without full Python | Inherits CUDA support from PyTorch |
-| ONNX | Variable depending on runtime | High across different frameworks | Broad ecosystem, supported by many orgs | Flexibility across ML frameworks | Regular updates for new operations | Ensure secure conversion and deployment practices | Various hardware optimizations |
-| OpenVINO | Optimized for Intel hardware | Best within Intel ecosystem | Solid in computer vision domain | IoT and edge with Intel hardware | Regular updates for Intel hardware | Robust features for sensitive applications | Tailored for Intel hardware |
-| TensorRT | Top-tier on NVIDIA GPUs | Best for NVIDIA hardware | Strong network through NVIDIA | Real-time video and image inference | Frequent updates for new GPUs | Emphasis on security | Designed for NVIDIA GPUs |
-| CoreML | Optimized for on-device Apple hardware | Exclusive to Apple ecosystem | Strong Apple and developer support | On-device ML on Apple products | Regular Apple updates | Focus on privacy and security | Apple neural engine and GPU |
-| TF SavedModel | Scalable in server environments | Wide compatibility in TensorFlow ecosystem | Large support due to TensorFlow popularity | Serving models at scale | Regular updates by Google and community | Robust features for enterprise | Various hardware accelerations |
-| TF GraphDef | Stable for static computation graphs | Integrates well with TensorFlow infrastructure | Resources for optimizing static graphs | Scenarios requiring static graphs | Updates alongside TensorFlow core | Established TensorFlow security practices | TensorFlow acceleration options |
-| TF Lite | Speed and efficiency on mobile/embedded | Wide range of device support | Robust community, Google backed | Mobile applications with minimal footprint | Latest features for mobile | Secure environment on end-user devices | GPU and DSP among others |
-| TF Edge TPU | Optimized for Google's Edge TPU hardware | Exclusive to Edge TPU devices | Growing with Google and third-party resources | IoT devices requiring real-time processing | Improvements for new Edge TPU hardware | Google's robust IoT security | Custom-designed for Google Coral |
-| TF.js | Reasonable in-browser performance | High with web technologies | Web and Node.js developers support | Interactive web applications | TensorFlow team and community contributions | Web platform security model | Enhanced with WebGL and other APIs |
-| PaddlePaddle | Competitive, easy to use and scalable | Baidu ecosystem, wide application support | Rapidly growing, especially in China | Chinese market and language processing | Focus on Chinese AI applications | Emphasizes data privacy and security | Including Baidu's Kunlun chips |
-| NCNN | Optimized for mobile ARM-based devices | Mobile and embedded ARM systems | Niche but active mobile/embedded ML community | Android and ARM systems efficiency | High performance maintenance on ARM | On-device security advantages | ARM CPUs and GPUs optimizations |
+#### MNN
+
+MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. In addition, MNN is also used on embedded devices, such as IoT.
+
+## Comparative Analysis of YOLO11 Deployment Options
+
+The following table provides a snapshot of the various deployment options available for YOLO11 models, helping you to assess which may best fit your project needs based on several critical criteria. For an in-depth look at each deployment option's format, please see the [Ultralytics documentation page on export formats](../modes/export.md#export-formats).
+
+| Deployment Option | Performance Benchmarks | Compatibility and Integration | Community Support and Ecosystem | Case Studies | Maintenance and Updates | Security Considerations | Hardware Acceleration |
+| ----------------- | ----------------------------------------------- | ---------------------------------------------- | --------------------------------------------- | ------------------------------------------ | ---------------------------------------------- | ------------------------------------------------- | ---------------------------------- |
+| PyTorch | Good flexibility; may trade off raw performance | Excellent with Python libraries | Extensive resources and community | Research and prototypes | Regular, active development | Dependent on deployment environment | CUDA support for GPU acceleration |
+| TorchScript | Better for production than PyTorch | Smooth transition from PyTorch to C++ | Specialized but narrower than PyTorch | Industry where Python is a bottleneck | Consistent updates with PyTorch | Improved security without full Python | Inherits CUDA support from PyTorch |
+| ONNX | Variable depending on runtime | High across different frameworks | Broad ecosystem, supported by many orgs | Flexibility across ML frameworks | Regular updates for new operations | Ensure secure conversion and deployment practices | Various hardware optimizations |
+| OpenVINO | Optimized for Intel hardware | Best within Intel ecosystem | Solid in computer vision domain | IoT and edge with Intel hardware | Regular updates for Intel hardware | Robust features for sensitive applications | Tailored for Intel hardware |
+| TensorRT | Top-tier on NVIDIA GPUs | Best for NVIDIA hardware | Strong network through NVIDIA | Real-time video and image inference | Frequent updates for new GPUs | Emphasis on security | Designed for NVIDIA GPUs |
+| CoreML | Optimized for on-device Apple hardware | Exclusive to Apple ecosystem | Strong Apple and developer support | On-device ML on Apple products | Regular Apple updates | Focus on privacy and security | Apple neural engine and GPU |
+| TF SavedModel | Scalable in server environments | Wide compatibility in TensorFlow ecosystem | Large support due to TensorFlow popularity | Serving models at scale | Regular updates by Google and community | Robust features for enterprise | Various hardware accelerations |
+| TF GraphDef | Stable for static computation graphs | Integrates well with TensorFlow infrastructure | Resources for optimizing static graphs | Scenarios requiring static graphs | Updates alongside TensorFlow core | Established TensorFlow security practices | TensorFlow acceleration options |
+| TF Lite | Speed and efficiency on mobile/embedded | Wide range of device support | Robust community, Google backed | Mobile applications with minimal footprint | Latest features for mobile | Secure environment on end-user devices | GPU and DSP among others |
+| TF Edge TPU | Optimized for Google's Edge TPU hardware | Exclusive to Edge TPU devices | Growing with Google and third-party resources | IoT devices requiring real-time processing | Improvements for new Edge TPU hardware | Google's robust IoT security | Custom-designed for Google Coral |
+| TF.js | Reasonable in-browser performance | High with web technologies | Web and Node.js developers support | Interactive web applications | TensorFlow team and community contributions | Web platform security model | Enhanced with WebGL and other APIs |
+| PaddlePaddle | Competitive, easy to use and scalable | Baidu ecosystem, wide application support | Rapidly growing, especially in China | Chinese market and language processing | Focus on Chinese AI applications | Emphasizes data privacy and security | Including Baidu's Kunlun chips |
+| MNN | High-performance for mobile devices. | Mobile and embedded ARM systems and X86-64 CPU | Mobile/embedded ML community | Mobile systems efficiency | High performance maintenance on Mobile Devices | On-device security advantages | ARM CPUs and GPUs optimizations |
+| NCNN | Optimized for mobile ARM-based devices | Mobile and embedded ARM systems | Niche but active mobile/embedded ML community | Android and ARM systems efficiency | High performance maintenance on ARM | On-device security advantages | ARM CPUs and GPUs optimizations |
This comparative analysis gives you a high-level overview. For deployment, it's essential to consider the specific requirements and constraints of your project, and consult the detailed documentation and resources available for each option.
## Community and Support
-When you're getting started with YOLOv8, having a helpful community and support can make a significant impact. Here's how to connect with others who share your interests and get the assistance you need.
+When you're getting started with YOLO11, having a helpful community and support can make a significant impact. Here's how to connect with others who share your interests and get the assistance you need.
### Engage with the Broader Community
-- **GitHub Discussions:** The YOLOv8 repository on GitHub has a "Discussions" section where you can ask questions, report issues, and suggest improvements.
+- **GitHub Discussions:** The YOLO11 repository on GitHub has a "Discussions" section where you can ask questions, report issues, and suggest improvements.
- **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://discord.com/invite/ultralytics) where you can interact with other users and developers.
### Official Documentation and Resources
-- **Ultralytics YOLOv8 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting.
+- **Ultralytics YOLO11 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLO11, along with guides on installation, usage, and troubleshooting.
-These resources will help you tackle challenges and stay updated on the latest trends and best practices in the YOLOv8 community.
+These resources will help you tackle challenges and stay updated on the latest trends and best practices in the YOLO11 community.
## Conclusion
-In this guide, we've explored the different deployment options for YOLOv8. We've also discussed the important factors to consider when making your choice. These options allow you to customize your model for various environments and performance requirements, making it suitable for real-world applications.
+In this guide, we've explored the different deployment options for YOLO11. We've also discussed the important factors to consider when making your choice. These options allow you to customize your model for various environments and performance requirements, making it suitable for real-world applications.
-Don't forget that the YOLOv8 and Ultralytics community is a valuable source of help. Connect with other developers and experts to learn unique tips and solutions you might not find in regular documentation. Keep seeking knowledge, exploring new ideas, and sharing your experiences.
+Don't forget that the YOLO11 and Ultralytics community is a valuable source of help. Connect with other developers and experts to learn unique tips and solutions you might not find in regular documentation. Keep seeking knowledge, exploring new ideas, and sharing your experiences.
Happy deploying!
## FAQ
-### What are the deployment options available for YOLOv8 on different hardware platforms?
+### What are the deployment options available for YOLO11 on different hardware platforms?
-Ultralytics YOLOv8 supports various deployment formats, each designed for specific environments and hardware platforms. Key formats include:
+Ultralytics YOLO11 supports various deployment formats, each designed for specific environments and hardware platforms. Key formats include:
- **PyTorch** for research and prototyping, with excellent Python integration.
- **TorchScript** for production environments where Python is unavailable.
@@ -318,18 +323,18 @@ Ultralytics YOLOv8 supports various deployment formats, each designed for specif
Each format has unique advantages. For a detailed walkthrough, see our [export process documentation](../modes/export.md#usage-examples).
-### How do I improve the inference speed of my YOLOv8 model on an Intel CPU?
+### How do I improve the inference speed of my YOLO11 model on an Intel CPU?
-To enhance inference speed on Intel CPUs, you can deploy your YOLOv8 model using Intel's OpenVINO toolkit. OpenVINO offers significant performance boosts by optimizing models to leverage Intel hardware efficiently.
+To enhance inference speed on Intel CPUs, you can deploy your YOLO11 model using Intel's OpenVINO toolkit. OpenVINO offers significant performance boosts by optimizing models to leverage Intel hardware efficiently.
-1. Convert your YOLOv8 model to the OpenVINO format using the `model.export()` function.
+1. Convert your YOLO11 model to the OpenVINO format using the `model.export()` function.
2. Follow the detailed setup guide in the [Intel OpenVINO Export documentation](../integrations/openvino.md).
For more insights, check out our [blog post](https://www.ultralytics.com/blog/achieve-faster-inference-speeds-ultralytics-yolov8-openvino).
-### Can I deploy YOLOv8 models on mobile devices?
+### Can I deploy YOLO11 models on mobile devices?
-Yes, YOLOv8 models can be deployed on mobile devices using [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) Lite (TF Lite) for both Android and iOS platforms. TF Lite is designed for mobile and embedded devices, providing efficient on-device inference.
+Yes, YOLO11 models can be deployed on mobile devices using [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) Lite (TF Lite) for both Android and iOS platforms. TF Lite is designed for mobile and embedded devices, providing efficient on-device inference.
!!! example
@@ -349,9 +354,9 @@ Yes, YOLOv8 models can be deployed on mobile devices using [TensorFlow](https://
For more details on deploying models to mobile, refer to our [TF Lite integration guide](../integrations/tflite.md).
-### What factors should I consider when choosing a deployment format for my YOLOv8 model?
+### What factors should I consider when choosing a deployment format for my YOLO11 model?
-When choosing a deployment format for YOLOv8, consider the following factors:
+When choosing a deployment format for YOLO11, consider the following factors:
- **Performance**: Some formats like TensorRT provide exceptional speeds on NVIDIA GPUs, while OpenVINO is optimized for Intel hardware.
- **Compatibility**: ONNX offers broad compatibility across different platforms.
@@ -360,11 +365,11 @@ When choosing a deployment format for YOLOv8, consider the following factors:
For a comparative analysis, refer to our [export formats documentation](../modes/export.md#export-formats).
-### How can I deploy YOLOv8 models in a web application?
+### How can I deploy YOLO11 models in a web application?
-To deploy YOLOv8 models in a web application, you can use TensorFlow.js (TF.js), which allows for running [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models directly in the browser. This approach eliminates the need for backend infrastructure and provides real-time performance.
+To deploy YOLO11 models in a web application, you can use TensorFlow.js (TF.js), which allows for running [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models directly in the browser. This approach eliminates the need for backend infrastructure and provides real-time performance.
-1. Export the YOLOv8 model to the TF.js format.
+1. Export the YOLO11 model to the TF.js format.
2. Integrate the exported model into your web application.
For step-by-step instructions, refer to our guide on [TensorFlow.js integration](../integrations/tfjs.md).
diff --git a/docs/en/guides/model-deployment-practices.md b/docs/en/guides/model-deployment-practices.md
index f259779ceb3..5f6d2730aac 100644
--- a/docs/en/guides/model-deployment-practices.md
+++ b/docs/en/guides/model-deployment-practices.md
@@ -27,7 +27,7 @@ It's also important to follow best practices when deploying a model because depl
Often times, once a model is [trained](./model-training-tips.md), [evaluated](./model-evaluation-insights.md), and [tested](./model-testing.md), it needs to be converted into specific formats to be deployed effectively in various environments, such as cloud, edge, or local devices.
-With respect to YOLOv8, you can [export your model](../modes/export.md) to different formats. For example, when you need to transfer your model between different frameworks, ONNX is an excellent tool and [exporting to YOLOv8 to ONNX](../integrations/onnx.md) is easy. You can check out more options about integrating your model into different environments smoothly and effectively [here](../integrations/index.md).
+With respect to YOLO11, you can [export your model](../modes/export.md) to different formats. For example, when you need to transfer your model between different frameworks, ONNX is an excellent tool and [exporting to YOLO11 to ONNX](../integrations/onnx.md) is easy. You can check out more options about integrating your model into different environments smoothly and effectively [here](../integrations/index.md).
### Choosing a Deployment Environment
@@ -94,7 +94,7 @@ Experiencing a drop in your model's accuracy after deployment can be frustrating
- **Review Model Export and Conversion:** Re-export the model and make sure that the conversion process maintains the integrity of the model weights and architecture.
- **Test with a Controlled Dataset:** Deploy the model in a test environment with a dataset you control and compare the results with the training phase. You can identify if the issue is with the deployment environment or the data.
-When deploying YOLOv8, several factors can affect model accuracy. Converting models to formats like [TensorRT](../integrations/tensorrt.md) involves optimizations such as weight quantization and layer fusion, which can cause minor precision losses. Using FP16 (half-precision) instead of FP32 (full-precision) can speed up inference but may introduce numerical precision errors. Also, hardware constraints, like those on the [Jetson Nano](./nvidia-jetson.md), with lower CUDA core counts and reduced memory bandwidth, can impact performance.
+When deploying YOLO11, several factors can affect model accuracy. Converting models to formats like [TensorRT](../integrations/tensorrt.md) involves optimizations such as weight quantization and layer fusion, which can cause minor precision losses. Using FP16 (half-precision) instead of FP32 (full-precision) can speed up inference but may introduce numerical precision errors. Also, hardware constraints, like those on the [Jetson Nano](./nvidia-jetson.md), with lower CUDA core counts and reduced memory bandwidth, can impact performance.
### Inferences Are Taking Longer Than You Expected
@@ -106,7 +106,7 @@ When deploying [machine learning](https://www.ultralytics.com/glossary/machine-l
- **Profile the Inference Pipeline:** Identifying bottlenecks in the inference pipeline can help pinpoint the source of delays. Use profiling tools to analyze each step of the inference process, identifying and addressing any stages that cause significant delays, such as inefficient layers or data transfer issues.
- **Use Appropriate Precision:** Using higher precision than necessary can slow down inference times. Experiment with using lower precision, such as FP16 (half-precision), instead of FP32 (full-precision). While FP16 can reduce inference time, also keep in mind that it can impact model accuracy.
-If you are facing this issue while deploying YOLOv8, consider that YOLOv8 offers [various model sizes](../models/yolov8.md), such as YOLOv8n (nano) for devices with lower memory capacity and YOLOv8x (extra-large) for more powerful GPUs. Choosing the right model variant for your hardware can help balance memory usage and processing time.
+If you are facing this issue while deploying YOLO11, consider that YOLO11 offers [various model sizes](../models/yolo11.md), such as YOLO11n (nano) for devices with lower memory capacity and YOLO11x (extra-large) for more powerful GPUs. Choosing the right model variant for your hardware can help balance memory usage and processing time.
Also keep in mind that the size of the input images directly impacts memory usage and processing time. Lower resolutions reduce memory usage and speed up inference, while higher resolutions improve accuracy but require more memory and processing power.
@@ -132,12 +132,12 @@ Being part of a community of computer vision enthusiasts can help you solve prob
### Community Resources
-- **GitHub Issues:** Explore the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help.
+- **GitHub Issues:** Explore the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Visit the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
+- **Ultralytics YOLO11 Documentation:** Visit the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
Using these resources will help you solve challenges and stay up-to-date with the latest trends and practices in the computer vision community.
@@ -149,22 +149,22 @@ After deploying your model, the next step would be monitoring, maintaining, and
## FAQ
-### What are the best practices for deploying a machine learning model using Ultralytics YOLOv8?
+### What are the best practices for deploying a machine learning model using Ultralytics YOLO11?
-Deploying a machine learning model, particularly with Ultralytics YOLOv8, involves several best practices to ensure efficiency and reliability. First, choose the deployment environment that suits your needsโcloud, edge, or local. Optimize your model through techniques like [pruning, quantization, and knowledge distillation](#model-optimization-techniques) for efficient deployment in resource-constrained environments. Lastly, ensure data consistency and preprocessing steps align with the training phase to maintain performance. You can also refer to [model deployment options](./model-deployment-options.md) for more detailed guidelines.
+Deploying a machine learning model, particularly with Ultralytics YOLO11, involves several best practices to ensure efficiency and reliability. First, choose the deployment environment that suits your needsโcloud, edge, or local. Optimize your model through techniques like [pruning, quantization, and knowledge distillation](#model-optimization-techniques) for efficient deployment in resource-constrained environments. Lastly, ensure data consistency and preprocessing steps align with the training phase to maintain performance. You can also refer to [model deployment options](./model-deployment-options.md) for more detailed guidelines.
-### How can I troubleshoot common deployment issues with Ultralytics YOLOv8 models?
+### How can I troubleshoot common deployment issues with Ultralytics YOLO11 models?
Troubleshooting deployment issues can be broken down into a few key steps. If your model's accuracy drops after deployment, check for data consistency, validate preprocessing steps, and ensure the hardware/software environment matches what you used during training. For slow inference times, perform warm-up runs, optimize your inference engine, use asynchronous processing, and profile your inference pipeline. Refer to [troubleshooting deployment issues](#troubleshooting-deployment-issues) for a detailed guide on these best practices.
-### How does Ultralytics YOLOv8 optimization enhance model performance on edge devices?
+### How does Ultralytics YOLO11 optimization enhance model performance on edge devices?
-Optimizing Ultralytics YOLOv8 models for edge devices involves using techniques like pruning to reduce the model size, quantization to convert weights to lower precision, and knowledge distillation to train smaller models that mimic larger ones. These techniques ensure the model runs efficiently on devices with limited computational power. Tools like [TensorFlow Lite](../integrations/tflite.md) and [NVIDIA Jetson](./nvidia-jetson.md) are particularly useful for these optimizations. Learn more about these techniques in our section on [model optimization](#model-optimization-techniques).
+Optimizing Ultralytics YOLO11 models for edge devices involves using techniques like pruning to reduce the model size, quantization to convert weights to lower precision, and knowledge distillation to train smaller models that mimic larger ones. These techniques ensure the model runs efficiently on devices with limited computational power. Tools like [TensorFlow Lite](../integrations/tflite.md) and [NVIDIA Jetson](./nvidia-jetson.md) are particularly useful for these optimizations. Learn more about these techniques in our section on [model optimization](#model-optimization-techniques).
-### What are the security considerations for deploying machine learning models with Ultralytics YOLOv8?
+### What are the security considerations for deploying machine learning models with Ultralytics YOLO11?
Security is paramount when deploying machine learning models. Ensure secure data transmission using encryption protocols like TLS. Implement robust access controls, including strong authentication and role-based access control (RBAC). Model obfuscation techniques, such as encrypting model parameters and serving models in a secure environment like a trusted execution environment (TEE), offer additional protection. For detailed practices, refer to [security considerations](#security-considerations-in-model-deployment).
-### How do I choose the right deployment environment for my Ultralytics YOLOv8 model?
+### How do I choose the right deployment environment for my Ultralytics YOLO11 model?
-Selecting the optimal deployment environment for your Ultralytics YOLOv8 model depends on your application's specific needs. Cloud deployment offers scalability and ease of access, making it ideal for applications with high data volumes. Edge deployment is best for low-latency applications requiring real-time responses, using tools like [TensorFlow Lite](../integrations/tflite.md). Local deployment suits scenarios needing stringent data privacy and control. For a comprehensive overview of each environment, check out our section on [choosing a deployment environment](#choosing-a-deployment-environment).
+Selecting the optimal deployment environment for your Ultralytics YOLO11 model depends on your application's specific needs. Cloud deployment offers scalability and ease of access, making it ideal for applications with high data volumes. Edge deployment is best for low-latency applications requiring real-time responses, using tools like [TensorFlow Lite](../integrations/tflite.md). Local deployment suits scenarios needing stringent data privacy and control. For a comprehensive overview of each environment, check out our section on [choosing a deployment environment](#choosing-a-deployment-environment).
diff --git a/docs/en/guides/model-evaluation-insights.md b/docs/en/guides/model-evaluation-insights.md
index ef9389c266b..5b16a99bdb1 100644
--- a/docs/en/guides/model-evaluation-insights.md
+++ b/docs/en/guides/model-evaluation-insights.md
@@ -1,6 +1,6 @@
---
comments: true
-description: Explore the most effective ways to assess and refine YOLOv8 models for better performance. Learn about evaluation metrics, fine-tuning processes, and how to customize your model for specific needs.
+description: Explore the most effective ways to assess and refine YOLO11 models for better performance. Learn about evaluation metrics, fine-tuning processes, and how to customize your model for specific needs.
keywords: Model Evaluation, Machine Learning Model Evaluation, Fine Tuning Machine Learning, Fine Tune Model, Evaluating Models, Model Fine Tuning, How to Fine Tune a Model
---
@@ -10,6 +10,17 @@ keywords: Model Evaluation, Machine Learning Model Evaluation, Fine Tuning Machi
Once you've [trained](./model-training-tips.md) your computer vision model, evaluating and refining it to perform optimally is essential. Just training your model isn't enough. You need to make sure that your model is accurate, efficient, and fulfills the [objective](./defining-project-goals.md) of your computer vision project. By evaluating and fine-tuning your model, you can identify weaknesses, improve its accuracy, and boost overall performance.
+
+
+
+
+ Watch: Insights into Model Evaluation and Fine-Tuning | Tips for Improving Mean Average Precision
+
+
In this guide, we'll share insights on model evaluation and fine-tuning that'll make this [step of a computer vision project](./steps-of-a-cv-project.md) more approachable. We'll discuss how to understand evaluation metrics and implement fine-tuning techniques, giving you the knowledge to elevate your model's capabilities.
## Evaluating Model Performance Using Metrics
@@ -20,7 +31,7 @@ Evaluating how well a model performs helps us understand how effectively it work
The confidence score represents the model's certainty that a detected object belongs to a particular class. It ranges from 0 to 1, with higher scores indicating greater confidence. The confidence score helps filter predictions; only detections with confidence scores above a specified threshold are considered valid.
-_Quick Tip:_ When running inferences, if you aren't seeing any predictions and you've checked everything else, try lowering the confidence score. Sometimes, the threshold is too high, causing the model to ignore valid predictions. Lowering the score allows the model to consider more possibilities. This might not meet your project goals, but it's a good way to see what the model can do and decide how to fine-tune it.
+_Quick Tip:_ When running inferences, if you aren't seeing any predictions, and you've checked everything else, try lowering the confidence score. Sometimes, the threshold is too high, causing the model to ignore valid predictions. Lowering the score allows the model to consider more possibilities. This might not meet your project goals, but it's a good way to see what the model can do and decide how to fine-tune it.
### Intersection over Union
@@ -45,23 +56,23 @@ Other mAP metrics include mAP@0.75, which uses a stricter IoU threshold of 0.75,
-## Evaluating YOLOv8 Model Performance
+## Evaluating YOLO11 Model Performance
-With respect to YOLOv8, you can use the [validation mode](../modes/val.md) to evaluate the model. Also, be sure to take a look at our guide that goes in-depth into [YOLOv8 performance metrics](./yolo-performance-metrics.md) and how they can be interpreted.
+With respect to YOLO11, you can use the [validation mode](../modes/val.md) to evaluate the model. Also, be sure to take a look at our guide that goes in-depth into [YOLO11 performance metrics](./yolo-performance-metrics.md) and how they can be interpreted.
### Common Community Questions
-When evaluating your YOLOv8 model, you might run into a few hiccups. Based on common community questions, here are some tips to help you get the most out of your YOLOv8 model:
+When evaluating your YOLO11 model, you might run into a few hiccups. Based on common community questions, here are some tips to help you get the most out of your YOLO11 model:
#### Handling Variable Image Sizes
-Evaluating your YOLOv8 model with images of different sizes can help you understand its performance on diverse datasets. Using the `rect=true` validation parameter, YOLOv8 adjusts the network's stride for each batch based on the image sizes, allowing the model to handle rectangular images without forcing them to a single size.
+Evaluating your YOLO11 model with images of different sizes can help you understand its performance on diverse datasets. Using the `rect=true` validation parameter, YOLO11 adjusts the network's stride for each batch based on the image sizes, allowing the model to handle rectangular images without forcing them to a single size.
The `imgsz` validation parameter sets the maximum dimension for image resizing, which is 640 by default. You can adjust this based on your dataset's maximum dimensions and the GPU memory available. Even with `imgsz` set, `rect=true` lets the model manage varying image sizes effectively by dynamically adjusting the stride.
-#### Accessing YOLOv8 Metrics
+#### Accessing YOLO11 Metrics
-If you want to get a deeper understanding of your YOLOv8 model's performance, you can easily access specific evaluation metrics with a few lines of Python code. The code snippet below will let you load your model, run an evaluation, and print out various metrics that show how well your model is doing.
+If you want to get a deeper understanding of your YOLO11 model's performance, you can easily access specific evaluation metrics with a few lines of Python code. The code snippet below will let you load your model, run an evaluation, and print out various metrics that show how well your model is doing.
!!! example "Usage"
@@ -71,7 +82,7 @@ If you want to get a deeper understanding of your YOLOv8 model's performance, yo
from ultralytics import YOLO
# Load the model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Run the evaluation
results = model.val(data="coco8.yaml")
@@ -101,7 +112,7 @@ If you want to get a deeper understanding of your YOLOv8 model's performance, yo
print("Recall curve:", results.box.r_curve)
```
-The results object also includes speed metrics like preprocess time, inference time, loss, and postprocess time. By analyzing these metrics, you can fine-tune and optimize your YOLOv8 model for better performance, making it more effective for your specific use case.
+The results object also includes speed metrics like preprocess time, inference time, loss, and postprocess time. By analyzing these metrics, you can fine-tune and optimize your YOLO11 model for better performance, making it more effective for your specific use case.
## How Does Fine-Tuning Work?
@@ -115,11 +126,11 @@ Fine-tuning a model means paying close attention to several vital parameters and
Usually, during the initial training [epochs](https://www.ultralytics.com/glossary/epoch), the learning rate starts low and gradually increases to stabilize the training process. However, since your model has already learned some features from the previous dataset, starting with a higher learning rate right away can be more beneficial.
-When evaluating your YOLOv8 model, you can set the `warmup_epochs` validation parameter to `warmup_epochs=0` to prevent the learning rate from starting too high. By following this process, the training will continue from the provided weights, adjusting to the nuances of your new data.
+When evaluating your YOLO11 model, you can set the `warmup_epochs` validation parameter to `warmup_epochs=0` to prevent the learning rate from starting too high. By following this process, the training will continue from the provided weights, adjusting to the nuances of your new data.
### Image Tiling for Small Objects
-Image tiling can improve detection accuracy for small objects. By dividing larger images into smaller segments, such as splitting 1280x1280 images into multiple 640x640 segments, you maintain the original resolution, and the model can learn from high-resolution fragments. When using YOLOv8, make sure to adjust your labels for these new segments correctly.
+Image tiling can improve detection accuracy for small objects. By dividing larger images into smaller segments, such as splitting 1280x1280 images into multiple 640x640 segments, you maintain the original resolution, and the model can learn from high-resolution fragments. When using YOLO11, make sure to adjust your labels for these new segments correctly.
## Engage with the Community
@@ -127,12 +138,12 @@ Sharing your ideas and questions with other [computer vision](https://www.ultral
### Finding Help and Support
-- **GitHub Issues:** Explore the YOLOv8 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to ask questions, report bugs, and suggest features. The community and maintainers are available to assist with any issues you encounter.
+- **GitHub Issues:** Explore the YOLO11 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to ask questions, report bugs, and suggest features. The community and maintainers are available to assist with any issues you encounter.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to connect with other users and developers, get support, share knowledge, and brainstorm ideas.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Check out the [official YOLOv8 documentation](./index.md) for comprehensive guides and valuable insights on various computer vision tasks and projects.
+- **Ultralytics YOLO11 Documentation:** Check out the [official YOLO11 documentation](./index.md) for comprehensive guides and valuable insights on various computer vision tasks and projects.
## Final Thoughts
@@ -140,30 +151,30 @@ Evaluating and fine-tuning your computer vision model are important steps for su
## FAQ
-### What are the key metrics for evaluating YOLOv8 model performance?
+### What are the key metrics for evaluating YOLO11 model performance?
-To evaluate YOLOv8 model performance, important metrics include Confidence Score, Intersection over Union (IoU), and Mean Average Precision (mAP). Confidence Score measures the model's certainty for each detected object class. IoU evaluates how well the predicted bounding box overlaps with the ground truth. Mean Average Precision (mAP) aggregates precision scores across classes, with mAP@.5 and mAP@.5:.95 being two common types for varying IoU thresholds. Learn more about these metrics in our [YOLOv8 performance metrics guide](./yolo-performance-metrics.md).
+To evaluate YOLO11 model performance, important metrics include Confidence Score, Intersection over Union (IoU), and Mean Average Precision (mAP). Confidence Score measures the model's certainty for each detected object class. IoU evaluates how well the predicted bounding box overlaps with the ground truth. Mean Average Precision (mAP) aggregates precision scores across classes, with mAP@.5 and mAP@.5:.95 being two common types for varying IoU thresholds. Learn more about these metrics in our [YOLO11 performance metrics guide](./yolo-performance-metrics.md).
-### How can I fine-tune a pre-trained YOLOv8 model for my specific dataset?
+### How can I fine-tune a pre-trained YOLO11 model for my specific dataset?
-Fine-tuning a pre-trained YOLOv8 model involves adjusting its parameters to improve performance on a specific task or dataset. Start by evaluating your model using metrics, then set a higher initial learning rate by adjusting the `warmup_epochs` parameter to 0 for immediate stability. Use parameters like `rect=true` for handling varied image sizes effectively. For more detailed guidance, refer to our section on [fine-tuning YOLOv8 models](#how-does-fine-tuning-work).
+Fine-tuning a pre-trained YOLO11 model involves adjusting its parameters to improve performance on a specific task or dataset. Start by evaluating your model using metrics, then set a higher initial learning rate by adjusting the `warmup_epochs` parameter to 0 for immediate stability. Use parameters like `rect=true` for handling varied image sizes effectively. For more detailed guidance, refer to our section on [fine-tuning YOLO11 models](#how-does-fine-tuning-work).
-### How can I handle variable image sizes when evaluating my YOLOv8 model?
+### How can I handle variable image sizes when evaluating my YOLO11 model?
-To handle variable image sizes during evaluation, use the `rect=true` parameter in YOLOv8, which adjusts the network's stride for each batch based on image sizes. The `imgsz` parameter sets the maximum dimension for image resizing, defaulting to 640. Adjust `imgsz` to suit your dataset and GPU memory. For more details, visit our [section on handling variable image sizes](#handling-variable-image-sizes).
+To handle variable image sizes during evaluation, use the `rect=true` parameter in YOLO11, which adjusts the network's stride for each batch based on image sizes. The `imgsz` parameter sets the maximum dimension for image resizing, defaulting to 640. Adjust `imgsz` to suit your dataset and GPU memory. For more details, visit our [section on handling variable image sizes](#handling-variable-image-sizes).
-### What practical steps can I take to improve mean average precision for my YOLOv8 model?
+### What practical steps can I take to improve mean average precision for my YOLO11 model?
-Improving mean average precision (mAP) for a YOLOv8 model involves several steps:
+Improving mean average precision (mAP) for a YOLO11 model involves several steps:
1. **Tuning Hyperparameters**: Experiment with different learning rates, [batch sizes](https://www.ultralytics.com/glossary/batch-size), and image augmentations.
2. **[Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation)**: Use techniques like Mosaic and MixUp to create diverse training samples.
3. **Image Tiling**: Split larger images into smaller tiles to improve detection accuracy for small objects.
Refer to our detailed guide on [model fine-tuning](#tips-for-fine-tuning-your-model) for specific strategies.
-### How do I access YOLOv8 model evaluation metrics in Python?
+### How do I access YOLO11 model evaluation metrics in Python?
-You can access YOLOv8 model evaluation metrics using Python with the following steps:
+You can access YOLO11 model evaluation metrics using Python with the following steps:
!!! example "Usage"
@@ -173,7 +184,7 @@ You can access YOLOv8 model evaluation metrics using Python with the following s
from ultralytics import YOLO
# Load the model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Run the evaluation
results = model.val(data="coco8.yaml")
@@ -185,4 +196,4 @@ You can access YOLOv8 model evaluation metrics using Python with the following s
print("Mean recall:", results.box.mr)
```
-Analyzing these metrics helps fine-tune and optimize your YOLOv8 model. For a deeper dive, check out our guide on [YOLOv8 metrics](../modes/val.md).
+Analyzing these metrics helps fine-tune and optimize your YOLO11 model. For a deeper dive, check out our guide on [YOLO11 metrics](../modes/val.md).
diff --git a/docs/en/guides/model-monitoring-and-maintenance.md b/docs/en/guides/model-monitoring-and-maintenance.md
index 2aedc8e3a35..79fa52ea5e4 100644
--- a/docs/en/guides/model-monitoring-and-maintenance.md
+++ b/docs/en/guides/model-monitoring-and-maintenance.md
@@ -10,6 +10,17 @@ keywords: Computer Vision Models, AI Model Monitoring, Data Drift Detection, Ano
If you are here, we can assume you've completed many [steps in your computer vision project](./steps-of-a-cv-project.md): from [gathering requirements](./defining-project-goals.md), [annotating data](./data-collection-and-annotation.md), and [training the model](./model-training-tips.md) to finally [deploying](./model-deployment-practices.md) it. Your application is now running in production, but your project doesn't end here. The most important part of a computer vision project is making sure your model continues to fulfill your [project's objectives](./defining-project-goals.md) over time, and that's where monitoring, maintaining, and documenting your computer vision model enters the picture.
+
+
+
+
+ Watch: How to Maintain Computer Vision Models after Deployment | Data Drift Detection
+
+
In this guide, we'll take a closer look at how you can maintain your computer vision models after deployment. We'll explore how model monitoring can help you catch problems early on, how to keep your model accurate and up-to-date, and why documentation is important for troubleshooting.
## Model Monitoring is Key
@@ -23,7 +34,7 @@ Regular model monitoring helps developers track the [model's performance](./mode
Here are some best practices to keep in mind while monitoring your computer vision model in production:
- **Track Performance Regularly**: Continuously monitor the model's performance to detect changes over time.
-- **Double Check the Data Quality**: Check for missing values or anomalies in the data.
+- **Double-Check the Data Quality**: Check for missing values or anomalies in the data.
- **Use Diverse Data Sources**: Monitor data from various sources to get a comprehensive view of the model's performance.
- **Combine Monitoring Techniques**: Use a mix of drift detection algorithms and rule-based approaches to identify a wide range of issues.
- **Monitor Inputs and Outputs**: Keep an eye on both the data the model processes and the results it produces to make sure everything is functioning correctly.
@@ -123,12 +134,12 @@ Joining a community of computer vision enthusiasts can help you solve problems a
### Community Resources
-- **GitHub Issues:** Check out the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are highly active and supportive.
+- **GitHub Issues:** Check out the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are highly active and supportive.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Visit the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
+- **Ultralytics YOLO11 Documentation:** Visit the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
Using these resources will help you solve challenges and stay up-to-date with the latest trends and practices in the computer vision community.
diff --git a/docs/en/guides/model-testing.md b/docs/en/guides/model-testing.md
index 8d324679554..f6123a845e1 100644
--- a/docs/en/guides/model-testing.md
+++ b/docs/en/guides/model-testing.md
@@ -10,6 +10,17 @@ keywords: Overfitting and Underfitting in Machine Learning, Model Testing, Data
After [training](./model-training-tips.md) and [evaluating](./model-evaluation-insights.md) your model, it's time to test it. Model testing involves assessing how well it performs in real-world scenarios. Testing considers factors like accuracy, reliability, fairness, and how easy it is to understand the model's decisions. The goal is to make sure the model performs as intended, delivers the expected results, and fits into the [overall objective of your application](./defining-project-goals.md) or project.
+
+
+
+
+ Watch: How to Test Machine Learning Models | Avoid Data Leakage in Computer Vision ๐
+
+
Model testing is quite similar to model evaluation, but they are two distinct [steps in a computer vision project](./steps-of-a-cv-project.md). Model evaluation involves metrics and plots to assess the model's accuracy. On the other hand, model testing checks if the model's learned behavior is the same as expectations. In this guide, we'll explore strategies for testing your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models.
## Model Testing Vs. Model Evaluation
@@ -44,22 +55,22 @@ Next, the testing results can be analyzed:
- **Error Analysis:** Perform a thorough error analysis to understand the types of errors (e.g., false positives vs. false negatives) and their potential causes.
- **Bias and Fairness:** Check for any biases in the model's predictions. Ensure that the model performs equally well across different subsets of the data, especially if it includes sensitive attributes like race, gender, or age.
-## Testing Your YOLOv8 Model
+## Testing Your YOLO11 Model
-To test your YOLOv8 model, you can use the validation mode. It's a straightforward way to understand the model's strengths and areas that need improvement. Also, you'll need to format your test dataset correctly for YOLOv8. For more details on how to use the validation mode, check out the [Model Validation](../modes/val.md) docs page.
+To test your YOLO11 model, you can use the validation mode. It's a straightforward way to understand the model's strengths and areas that need improvement. Also, you'll need to format your test dataset correctly for YOLO11. For more details on how to use the validation mode, check out the [Model Validation](../modes/val.md) docs page.
-## Using YOLOv8 to Predict on Multiple Test Images
+## Using YOLO11 to Predict on Multiple Test Images
-If you want to test your trained YOLOv8 model on multiple images stored in a folder, you can easily do so in one go. Instead of using the validation mode, which is typically used to evaluate model performance on a validation set and provide detailed metrics, you might just want to see predictions on all images in your test set. For this, you can use the [prediction mode](../modes/predict.md).
+If you want to test your trained YOLO11 model on multiple images stored in a folder, you can easily do so in one go. Instead of using the validation mode, which is typically used to evaluate model performance on a validation set and provide detailed metrics, you might just want to see predictions on all images in your test set. For this, you can use the [prediction mode](../modes/predict.md).
### Difference Between Validation and Prediction Modes
- **[Validation Mode](../modes/val.md):** Used to evaluate the model's performance by comparing predictions against known labels (ground truth). It provides detailed metrics such as accuracy, precision, recall, and F1 score.
- **[Prediction Mode](../modes/predict.md):** Used to run the model on new, unseen data to generate predictions. It does not provide detailed performance metrics but allows you to see how the model performs on real-world images.
-## Running YOLOv8 Predictions Without Custom Training
+## Running YOLO11 Predictions Without Custom Training
-If you are interested in testing the basic YOLOv8 model to understand whether it can be used for your application without custom training, you can use the prediction mode. While the model is pre-trained on datasets like COCO, running predictions on your own dataset can give you a quick sense of how well it might perform in your specific context.
+If you are interested in testing the basic YOLO11 model to understand whether it can be used for your application without custom training, you can use the prediction mode. While the model is pre-trained on datasets like COCO, running predictions on your own dataset can give you a quick sense of how well it might perform in your specific context.
## Overfitting and [Underfitting](https://www.ultralytics.com/glossary/underfitting) in [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml)
@@ -81,7 +92,7 @@ Underfitting occurs when your model can't capture the underlying patterns in the
#### Signs of Underfitting
- **Low Training Accuracy:** If your model can't achieve high accuracy on the training set, it might be underfitting.
-- **Visual Misclassification:** Consistent failure to recognize obvious features or objects suggests underfitting.
+- **Visual Mis-classification:** Consistent failure to recognize obvious features or objects suggests underfitting.
### Balancing Overfitting and Underfitting
@@ -128,12 +139,12 @@ Becoming part of a community of computer vision enthusiasts can aid in solving p
### Community Resources
-- **GitHub Issues:** Explore the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help.
+- **GitHub Issues:** Explore the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Check out the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
+- **Ultralytics YOLO11 Documentation:** Check out the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
These resources will help you navigate challenges and remain updated on the latest trends and practices within the computer vision community.
@@ -147,9 +158,9 @@ Building trustworthy computer vision models relies on rigorous model testing. By
Model evaluation and model testing are distinct steps in a computer vision project. Model evaluation involves using a labeled dataset to compute metrics such as [accuracy](https://www.ultralytics.com/glossary/accuracy), precision, recall, and [F1 score](https://www.ultralytics.com/glossary/f1-score), providing insights into the model's performance with a controlled dataset. Model testing, on the other hand, assesses the model's performance in real-world scenarios by applying it to new, unseen data, ensuring the model's learned behavior aligns with expectations outside the evaluation environment. For a detailed guide, refer to the [steps in a computer vision project](./steps-of-a-cv-project.md).
-### How can I test my Ultralytics YOLOv8 model on multiple images?
+### How can I test my Ultralytics YOLO11 model on multiple images?
-To test your Ultralytics YOLOv8 model on multiple images, you can use the [prediction mode](../modes/predict.md). This mode allows you to run the model on new, unseen data to generate predictions without providing detailed metrics. This is ideal for real-world performance testing on larger image sets stored in a folder. For evaluating performance metrics, use the [validation mode](../modes/val.md) instead.
+To test your Ultralytics YOLO11 model on multiple images, you can use the [prediction mode](../modes/predict.md). This mode allows you to run the model on new, unseen data to generate predictions without providing detailed metrics. This is ideal for real-world performance testing on larger image sets stored in a folder. For evaluating performance metrics, use the [validation mode](../modes/val.md) instead.
### What should I do if my computer vision model shows signs of overfitting or underfitting?
@@ -195,6 +206,6 @@ Post-testing, if the model performance meets the project goals, proceed with dep
Gain insights from the [Model Testing Vs. Model Evaluation](#model-testing-vs-model-evaluation) section to refine and enhance model effectiveness in real-world applications.
-### How do I run YOLOv8 predictions without custom training?
+### How do I run YOLO11 predictions without custom training?
-You can run predictions using the pre-trained YOLOv8 model on your dataset to see if it suits your application needs. Utilize the [prediction mode](../modes/predict.md) to get a quick sense of performance results without diving into custom training.
+You can run predictions using the pre-trained YOLO11 model on your dataset to see if it suits your application needs. Utilize the [prediction mode](../modes/predict.md) to get a quick sense of performance results without diving into custom training.
diff --git a/docs/en/guides/model-training-tips.md b/docs/en/guides/model-training-tips.md
index 725081a244b..6ace0bc50ea 100644
--- a/docs/en/guides/model-training-tips.md
+++ b/docs/en/guides/model-training-tips.md
@@ -18,7 +18,7 @@ One of the most important steps when working on a [computer vision project](./st
allowfullscreen>
- Watch: Model Training Tips | How to Handle Large Datasets | Batch Size, GPU Utilization and [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision)
+ Watch: Model Training Tips | How to Handle Large Datasets | Batch Size, GPU Utilization and Mixed Precision
So, what is [model training](../modes/train.md)? Model training is the process of teaching your model to recognize visual patterns and make predictions based on your data. It directly impacts the performance and accuracy of your application. In this guide, we'll cover best practices, optimization techniques, and troubleshooting tips to help you train your computer vision models effectively.
@@ -46,25 +46,25 @@ There are a few different aspects to think about when you are planning on using
When training models on large datasets, efficiently utilizing your GPU is key. Batch size is an important factor. It is the number of data samples that a machine learning model processes in a single training iteration.
Using the maximum batch size supported by your GPU, you can fully take advantage of its capabilities and reduce the time model training takes. However, you want to avoid running out of GPU memory. If you encounter memory errors, reduce the batch size incrementally until the model trains smoothly.
-With respect to YOLOv8, you can set the `batch_size` parameter in the [training configuration](../modes/train.md) to match your GPU capacity. Also, setting `batch=-1` in your training script will automatically determine the [batch size](https://www.ultralytics.com/glossary/batch-size) that can be efficiently processed based on your device's capabilities. By fine-tuning the batch size, you can make the most of your GPU resources and improve the overall training process.
+With respect to YOLO11, you can set the `batch_size` parameter in the [training configuration](../modes/train.md) to match your GPU capacity. Also, setting `batch=-1` in your training script will automatically determine the [batch size](https://www.ultralytics.com/glossary/batch-size) that can be efficiently processed based on your device's capabilities. By fine-tuning the batch size, you can make the most of your GPU resources and improve the overall training process.
### Subset Training
Subset training is a smart strategy that involves training your model on a smaller set of data that represents the larger dataset. It can save time and resources, especially during initial model development and testing. If you are running short on time or experimenting with different model configurations, subset training is a good option.
-When it comes to YOLOv8, you can easily implement subset training by using the `fraction` parameter. This parameter lets you specify what fraction of your dataset to use for training. For example, setting `fraction=0.1` will train your model on 10% of the data. You can use this technique for quick iterations and tuning your model before committing to training a model using a full dataset. Subset training helps you make rapid progress and identify potential issues early on.
+When it comes to YOLO11, you can easily implement subset training by using the `fraction` parameter. This parameter lets you specify what fraction of your dataset to use for training. For example, setting `fraction=0.1` will train your model on 10% of the data. You can use this technique for quick iterations and tuning your model before committing to training a model using a full dataset. Subset training helps you make rapid progress and identify potential issues early on.
### Multi-scale Training
Multiscale training is a technique that improves your model's ability to generalize by training it on images of varying sizes. Your model can learn to detect objects at different scales and distances and become more robust.
-For example, when you train YOLOv8, you can enable multiscale training by setting the `scale` parameter. This parameter adjusts the size of training images by a specified factor, simulating objects at different distances. For example, setting `scale=0.5` will reduce the image size by half, while `scale=2.0` will double it. Configuring this parameter allows your model to experience a variety of image scales and improve its detection capabilities across different object sizes and scenarios.
+For example, when you train YOLO11, you can enable multiscale training by setting the `scale` parameter. This parameter adjusts the size of training images by a specified factor, simulating objects at different distances. For example, setting `scale=0.5` randomly zooms training images by a factor between 0.5 and 1.5 during training. Configuring this parameter allows your model to experience a variety of image scales and improve its detection capabilities across different object sizes and scenarios.
### Caching
Caching is an important technique to improve the efficiency of training machine learning models. By storing preprocessed images in memory, caching reduces the time the GPU spends waiting for data to be loaded from the disk. The model can continuously receive data without delays caused by disk I/O operations.
-Caching can be controlled when training YOLOv8 using the `cache` parameter:
+Caching can be controlled when training YOLO11 using the `cache` parameter:
- _`cache=True`_: Stores dataset images in RAM, providing the fastest access speed but at the cost of increased memory usage.
- _`cache='disk'`_: Stores the images on disk, slower than RAM but faster than loading fresh data each time.
@@ -80,19 +80,19 @@ Mixed precision training uses both 16-bit (FP16) and 32-bit (FP32) floating-poin
To implement mixed precision training, you'll need to modify your training scripts and ensure your hardware (like GPUs) supports it. Many modern [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) frameworks, such as [Tensorflow](https://www.ultralytics.com/glossary/tensorflow), offer built-in support for mixed precision.
-Mixed precision training is straightforward when working with YOLOv8. You can use the `amp` flag in your training configuration. Setting `amp=True` enables Automatic Mixed Precision (AMP) training. Mixed precision training is a simple yet effective way to optimize your model training process.
+Mixed precision training is straightforward when working with YOLO11. You can use the `amp` flag in your training configuration. Setting `amp=True` enables Automatic Mixed Precision (AMP) training. Mixed precision training is a simple yet effective way to optimize your model training process.
### Pre-trained Weights
Using pretrained weights is a smart way to speed up your model's training process. Pretrained weights come from models already trained on large datasets, giving your model a head start. [Transfer learning](https://www.ultralytics.com/glossary/transfer-learning) adapts pretrained models to new, related tasks. Fine-tuning a pre-trained model involves starting with these weights and then continuing training on your specific dataset. This method of training results in faster training times and often better performance because the model starts with a solid understanding of basic features.
-The `pretrained` parameter makes transfer learning easy with YOLOv8. Setting `pretrained=True` will use default pre-trained weights, or you can specify a path to a custom pre-trained model. Using pre-trained weights and transfer learning effectively boosts your model's capabilities and reduces training costs.
+The `pretrained` parameter makes transfer learning easy with YOLO11. Setting `pretrained=True` will use default pre-trained weights, or you can specify a path to a custom pre-trained model. Using pre-trained weights and transfer learning effectively boosts your model's capabilities and reduces training costs.
### Other Techniques to Consider When Handling a Large Dataset
There are a couple of other techniques to consider when handling a large dataset:
-- **[Learning Rate](https://www.ultralytics.com/glossary/learning-rate) Schedulers**: Implementing learning rate schedulers dynamically adjusts the learning rate during training. A well-tuned learning rate can prevent the model from overshooting minima and improve stability. When training YOLOv8, the `lrf` parameter helps manage learning rate scheduling by setting the final learning rate as a fraction of the initial rate.
+- **[Learning Rate](https://www.ultralytics.com/glossary/learning-rate) Schedulers**: Implementing learning rate schedulers dynamically adjusts the learning rate during training. A well-tuned learning rate can prevent the model from overshooting minima and improve stability. When training YOLO11, the `lrf` parameter helps manage learning rate scheduling by setting the final learning rate as a fraction of the initial rate.
- **Distributed Training**: For handling large datasets, distributed training can be a game-changer. You can reduce the training time by spreading the training workload across multiple GPUs or machines.
## The Number of Epochs To Train For
@@ -101,7 +101,7 @@ When training a model, an epoch refers to one complete pass through the entire t
A common question that comes up is how to determine the number of epochs to train the model for. A good starting point is 300 epochs. If the model overfits early, you can reduce the number of epochs. If [overfitting](https://www.ultralytics.com/glossary/overfitting) does not occur after 300 epochs, you can extend the training to 600, 1200, or more epochs.
-However, the ideal number of epochs can vary based on your dataset's size and project goals. Larger datasets might require more epochs for the model to learn effectively, while smaller datasets might need fewer epochs to avoid overfitting. With respect to YOLOv8, you can set the `epochs` parameter in your training script.
+However, the ideal number of epochs can vary based on your dataset's size and project goals. Larger datasets might require more epochs for the model to learn effectively, while smaller datasets might need fewer epochs to avoid overfitting. With respect to YOLO11, you can set the `epochs` parameter in your training script.
## Early Stopping
@@ -113,7 +113,7 @@ The process involves setting a patience parameter that determines how many [epoc
-For YOLOv8, you can enable early stopping by setting the patience parameter in your training configuration. For example, `patience=5` means training will stop if there's no improvement in validation metrics for 5 consecutive epochs. Using this method ensures the training process remains efficient and achieves optimal performance without excessive computation.
+For YOLO11, you can enable early stopping by setting the patience parameter in your training configuration. For example, `patience=5` means training will stop if there's no improvement in validation metrics for 5 consecutive epochs. Using this method ensures the training process remains efficient and achieves optimal performance without excessive computation.
## Choosing Between Cloud and Local Training
@@ -143,13 +143,13 @@ Different optimizers have various strengths and weaknesses. Let's take a glimpse
- Combines the benefits of both SGD with momentum and RMSProp.
- Adjusts the learning rate for each parameter based on estimates of the first and second moments of the gradients.
- Well-suited for noisy data and sparse gradients.
- - Efficient and generally requires less tuning, making it a recommended optimizer for YOLOv8.
+ - Efficient and generally requires less tuning, making it a recommended optimizer for YOLO11.
- **RMSProp (Root Mean Square Propagation)**:
- Adjusts the learning rate for each parameter by dividing the gradient by a running average of the magnitudes of recent gradients.
- Helps in handling the vanishing gradient problem and is effective for [recurrent neural networks](https://www.ultralytics.com/glossary/recurrent-neural-network-rnn).
-For YOLOv8, the `optimizer` parameter lets you choose from various optimizers, including SGD, Adam, AdamW, NAdam, RAdam, and RMSProp, or you can set it to `auto` for automatic selection based on model configuration.
+For YOLO11, the `optimizer` parameter lets you choose from various optimizers, including SGD, Adam, AdamW, NAdam, RAdam, and RMSProp, or you can set it to `auto` for automatic selection based on model configuration.
## Connecting with the Community
@@ -157,12 +157,12 @@ Being part of a community of computer vision enthusiasts can help you solve prob
### Community Resources
-- **GitHub Issues:** Visit the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help.
+- **GitHub Issues:** Visit the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Check out the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
+- **Ultralytics YOLO11 Documentation:** Check out the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects.
Using these resources will help you solve challenges and stay up-to-date with the latest trends and practices in the computer vision community.
@@ -174,20 +174,20 @@ Training computer vision models involves following good practices, optimizing yo
### How can I improve GPU utilization when training a large dataset with Ultralytics YOLO?
-To improve GPU utilization, set the `batch_size` parameter in your training configuration to the maximum size supported by your GPU. This ensures that you make full use of the GPU's capabilities, reducing training time. If you encounter memory errors, incrementally reduce the batch size until training runs smoothly. For YOLOv8, setting `batch=-1` in your training script will automatically determine the optimal batch size for efficient processing. For further information, refer to the [training configuration](../modes/train.md).
+To improve GPU utilization, set the `batch_size` parameter in your training configuration to the maximum size supported by your GPU. This ensures that you make full use of the GPU's capabilities, reducing training time. If you encounter memory errors, incrementally reduce the batch size until training runs smoothly. For YOLO11, setting `batch=-1` in your training script will automatically determine the optimal batch size for efficient processing. For further information, refer to the [training configuration](../modes/train.md).
-### What is mixed precision training, and how do I enable it in YOLOv8?
+### What is mixed precision training, and how do I enable it in YOLO11?
-Mixed precision training utilizes both 16-bit (FP16) and 32-bit (FP32) floating-point types to balance computational speed and precision. This approach speeds up training and reduces memory usage without sacrificing model [accuracy](https://www.ultralytics.com/glossary/accuracy). To enable mixed precision training in YOLOv8, set the `amp` parameter to `True` in your training configuration. This activates Automatic Mixed Precision (AMP) training. For more details on this optimization technique, see the [training configuration](../modes/train.md).
+Mixed precision training utilizes both 16-bit (FP16) and 32-bit (FP32) floating-point types to balance computational speed and precision. This approach speeds up training and reduces memory usage without sacrificing model [accuracy](https://www.ultralytics.com/glossary/accuracy). To enable mixed precision training in YOLO11, set the `amp` parameter to `True` in your training configuration. This activates Automatic Mixed Precision (AMP) training. For more details on this optimization technique, see the [training configuration](../modes/train.md).
-### How does multiscale training enhance YOLOv8 model performance?
+### How does multiscale training enhance YOLO11 model performance?
-Multiscale training enhances model performance by training on images of varying sizes, allowing the model to better generalize across different scales and distances. In YOLOv8, you can enable multiscale training by setting the `scale` parameter in the training configuration. For example, `scale=0.5` reduces the image size by half, while `scale=2.0` doubles it. This technique simulates objects at different distances, making the model more robust across various scenarios. For settings and more details, check out the [training configuration](../modes/train.md).
+Multiscale training enhances model performance by training on images of varying sizes, allowing the model to better generalize across different scales and distances. In YOLO11, you can enable multiscale training by setting the `scale` parameter in the training configuration. For example, `scale=0.5` reduces the image size by half, while `scale=2.0` doubles it. This technique simulates objects at different distances, making the model more robust across various scenarios. For settings and more details, check out the [training configuration](../modes/train.md).
-### How can I use pre-trained weights to speed up training in YOLOv8?
+### How can I use pre-trained weights to speed up training in YOLO11?
-Using pre-trained weights can significantly reduce training times and improve model performance by starting from a model that already understands basic features. In YOLOv8, you can set the `pretrained` parameter to `True` or specify a path to custom pre-trained weights in your training configuration. This approach, known as transfer learning, leverages knowledge from large datasets to adapt to your specific task. Learn more about pre-trained weights and their advantages [here](../modes/train.md).
+Using pre-trained weights can significantly reduce training times and improve model performance by starting from a model that already understands basic features. In YOLO11, you can set the `pretrained` parameter to `True` or specify a path to custom pre-trained weights in your training configuration. This approach, known as transfer learning, leverages knowledge from large datasets to adapt to your specific task. Learn more about pre-trained weights and their advantages [here](../modes/train.md).
-### What is the recommended number of epochs for training a model, and how do I set this in YOLOv8?
+### What is the recommended number of epochs for training a model, and how do I set this in YOLO11?
-The number of epochs refers to the complete passes through the training dataset during model training. A typical starting point is 300 epochs. If your model overfits early, you can reduce the number. Alternatively, if overfitting isn't observed, you might extend training to 600, 1200, or more epochs. To set this in YOLOv8, use the `epochs` parameter in your training script. For additional advice on determining the ideal number of epochs, refer to this section on [number of epochs](#the-number-of-epochs-to-train-for).
+The number of epochs refers to the complete passes through the training dataset during model training. A typical starting point is 300 epochs. If your model overfits early, you can reduce the number. Alternatively, if overfitting isn't observed, you might extend training to 600, 1200, or more epochs. To set this in YOLO11, use the `epochs` parameter in your training script. For additional advice on determining the ideal number of epochs, refer to this section on [number of epochs](#the-number-of-epochs-to-train-for).
diff --git a/docs/en/guides/nvidia-jetson.md b/docs/en/guides/nvidia-jetson.md
index f352c76b8cd..38301b3c4d8 100644
--- a/docs/en/guides/nvidia-jetson.md
+++ b/docs/en/guides/nvidia-jetson.md
@@ -1,12 +1,17 @@
---
comments: true
-description: Learn to deploy Ultralytics YOLOv8 on NVIDIA Jetson devices with our detailed guide. Explore performance benchmarks and maximize AI capabilities.
-keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, performance benchmarks, embedded systems, deep learning, TensorRT, computer vision
+description: Learn to deploy Ultralytics YOLO11 on NVIDIA Jetson devices with our detailed guide. Explore performance benchmarks and maximize AI capabilities.
+keywords: Ultralytics, YOLO11, NVIDIA Jetson, JetPack, AI deployment, performance benchmarks, embedded systems, deep learning, TensorRT, computer vision
+benchmark_version: 8.3.51
---
-# Quick Start Guide: NVIDIA Jetson with Ultralytics YOLOv8
+# Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO11
-This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLOv8 on these small and powerful devices.
+This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLO11 on these small and powerful devices.
+
+!!! tip "New product support"
+
+ We have updated this guide with the latest [NVIDIA Jetson Orin Nano Super Developer Kit](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit) which delivers up to 67 TOPS of AI performance โ a 1.7X improvement over its predecessor โ to seamlessly run the most popular AI models.
@@ -16,14 +21,14 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti
allowfullscreen>
- Watch: How to Setup NVIDIA Jetson with Ultralytics YOLOv8
+ Watch: How to Setup NVIDIA Jetson with Ultralytics YOLO11
!!! note
- This guide has been tested with both [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running the latest stable JetPack release of [JP6.0](https://developer.nvidia.com/embedded/jetpack-sdk-60), JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.1](https://developer.nvidia.com/embedded/jetpack-sdk-461). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy.
+ This guide has been tested with [NVIDIA Jetson Orin Nano Super Developer Kit](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit) running the latest stable JetPack release of [JP6.1](https://developer.nvidia.com/embedded/jetpack-sdk-61), [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running JetPack release of [JP6.0](https://developer.nvidia.com/embedded/jetpack-sdk-60)/ JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.1](https://developer.nvidia.com/embedded/jetpack-sdk-461). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy.
## What is NVIDIA Jetson?
@@ -33,14 +38,14 @@ NVIDIA Jetson is a series of embedded computing boards designed to bring acceler
[Jetson Orin](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/) is the latest iteration of the NVIDIA Jetson family based on NVIDIA Ampere architecture which brings drastically improved AI performance when compared to the previous generations. Below table compared few of the Jetson devices in the ecosystem.
-| | Jetson AGX Orin 64GB | Jetson Orin NX 16GB | Jetson Orin Nano 8GB | Jetson AGX Xavier | Jetson Xavier NX | Jetson Nano |
+| | Jetson AGX Orin 64GB | Jetson Orin NX 16GB | Jetson Orin Nano Super | Jetson AGX Xavier | Jetson Xavier NX | Jetson Nano |
| ----------------- | ----------------------------------------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------- | ----------------------------------------------------------- | ------------------------------------------------------------- | --------------------------------------------- |
-| AI Performance | 275 TOPS | 100 TOPS | 40 TOPs | 32 TOPS | 21 TOPS | 472 GFLOPS |
+| AI Performance | 275 TOPS | 100 TOPS | 67 TOPs | 32 TOPS | 21 TOPS | 472 GFLOPS |
| GPU | 2048-core NVIDIA Ampere architecture GPU with 64 Tensor Cores | 1024-core NVIDIA Ampere architecture GPU with 32 Tensor Cores | 1024-core NVIDIA Ampere architecture GPU with 32 Tensor Cores | 512-core NVIDIA Volta architecture GPU with 64 Tensor Cores | 384-core NVIDIA Voltaโข architecture GPU with 48 Tensor Cores | 128-core NVIDIA Maxwellโข architecture GPU |
-| GPU Max Frequency | 1.3 GHz | 918 MHz | 625 MHz | 1377 MHz | 1100 MHz | 921MHz |
+| GPU Max Frequency | 1.3 GHz | 918 MHz | 1020 MHz | 1377 MHz | 1100 MHz | 921MHz |
| CPU | 12-core NVIDIA Armยฎ Cortex A78AE v8.2 64-bit CPU 3MB L2 + 6MB L3 | 8-core NVIDIA Armยฎ Cortex A78AE v8.2 64-bit CPU 2MB L2 + 4MB L3 | 6-core Armยฎ Cortexยฎ-A78AE v8.2 64-bit CPU 1.5MB L2 + 4MB L3 | 8-core NVIDIA Carmel Armยฎv8.2 64-bit CPU 8MB L2 + 4MB L3 | 6-core NVIDIA Carmel Armยฎv8.2 64-bit CPU 6MB L2 + 4MB L3 | Quad-Core Armยฎ Cortexยฎ-A57 MPCore processor |
-| CPU Max Frequency | 2.2 GHz | 2.0 GHz | 1.5 GHz | 2.2 GHz | 1.9 GHz | 1.43GHz |
-| Memory | 64GB 256-bit LPDDR5 204.8GB/s | 16GB 128-bit LPDDR5 102.4GB/s | 8GB 128-bit LPDDR5 68 GB/s | 32GB 256-bit LPDDR4x 136.5GB/s | 8GB 128-bit LPDDR4x 59.7GB/s | 4GB 64-bit LPDDR4 25.6GB/s" |
+| CPU Max Frequency | 2.2 GHz | 2.0 GHz | 1.7 GHz | 2.2 GHz | 1.9 GHz | 1.43GHz |
+| Memory | 64GB 256-bit LPDDR5 204.8GB/s | 16GB 128-bit LPDDR5 102.4GB/s | 8GB 128-bit LPDDR5 102 GB/s | 32GB 256-bit LPDDR4x 136.5GB/s | 8GB 128-bit LPDDR4x 59.7GB/s | 4GB 64-bit LPDDR4 25.6GB/s" |
For a more detailed comparison table, please visit the **Technical Specifications** section of [official NVIDIA Jetson page](https://developer.nvidia.com/embedded/jetson-modules).
@@ -77,7 +82,7 @@ The below table highlights NVIDIA JetPack versions supported by different NVIDIA
## Quick Start with Docker
-The fastest way to get started with Ultralytics YOLOv8 on NVIDIA Jetson is to run with pre-built docker images for Jetson. Refer to the table above and choose the JetPack version according to the Jetson device you own.
+The fastest way to get started with Ultralytics YOLO11 on NVIDIA Jetson is to run with pre-built docker images for Jetson. Refer to the table above and choose the JetPack version according to the Jetson device you own.
=== "JetPack 4"
@@ -106,7 +111,7 @@ After this is done, skip to [Use TensorRT on NVIDIA Jetson section](#use-tensorr
For a native installation without Docker, please refer to the steps below.
-### Run on JetPack 6.x
+### Run on JetPack 6.1
#### Install Ultralytics Package
@@ -136,25 +141,34 @@ Here we will install Ultralytics package on the Jetson with optional dependencie
The above ultralytics installation will install Torch and Torchvision. However, these 2 packages installed via pip are not compatible to run on Jetson platform which is based on ARM64 architecture. Therefore, we need to manually install pre-built PyTorch pip wheel and compile/ install Torchvision from source.
-Install `torch 2.3.0` and `torchvision 0.18` according to JP6.0
+Install `torch 2.5.0` and `torchvision 0.20` according to JP6.1
```bash
-sudo apt-get install libopenmpi-dev libopenblas-base libomp-dev -y
-pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.3.0-cp310-cp310-linux_aarch64.whl
-pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.18.0a0+6043bc2-cp310-cp310-linux_aarch64.whl
+pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.5.0a0+872d972e41.nv24.08-cp310-cp310-linux_aarch64.whl
+pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl
```
-Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision).
+!!! note
+
+ Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision).
+
+Install [`cuSPARSELt`](https://developer.nvidia.com/cusparselt-downloads?target_os=Linux&target_arch=aarch64-jetson&Compilation=Native&Distribution=Ubuntu&target_version=22.04&target_type=deb_network) to fix a dependency issue with `torch 2.5.0`
+
+```bash
+wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+sudo apt-get -y install libcusparselt0 libcusparselt-dev
+```
#### Install `onnxruntime-gpu`
The [onnxruntime-gpu](https://pypi.org/project/onnxruntime-gpu/) package hosted in PyPI does not have `aarch64` binaries for the Jetson. So we need to manually install this package. This package is needed for some of the exports.
-All different `onnxruntime-gpu` packages corresponding to different JetPack and Python versions are listed [here](https://elinux.org/Jetson_Zoo#ONNX_Runtime). However, here we will download and install `onnxruntime-gpu 1.18.0` with `Python3.10` support.
+All different `onnxruntime-gpu` packages corresponding to different JetPack and Python versions are listed [here](https://elinux.org/Jetson_Zoo#ONNX_Runtime). However, here we will download and install `onnxruntime-gpu 1.20.0` with `Python3.10` support.
```bash
-wget https://nvidia.box.com/shared/static/48dtuob7meiw6ebgfsfqakc9vse62sg4.whl -O onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl
-pip install onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl
+pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.20.0-cp310-cp310-linux_aarch64.whl
```
!!! note
@@ -163,7 +177,7 @@ pip install onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl
`pip install numpy==1.23.5`
-### Run on JetPack 5.x
+### Run on JetPack 5.1.2
#### Install Ultralytics Package
@@ -199,25 +213,16 @@ The above ultralytics installation will install Torch and Torchvision. However,
pip uninstall torch torchvision
```
-2. Install PyTorch 2.1.0 according to JP5.1.3
+2. Install `torch 2.1.0` and `torchvision 0.16.2` according to JP5.1.2
```bash
- sudo apt-get install -y libopenblas-base libopenmpi-dev
- wget https://developer.download.nvidia.com/compute/redist/jp/v512/pytorch/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl -O torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl
- pip install torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl
+ pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl
+ pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl
```
-3. Install Torchvision v0.16.2 according to PyTorch v2.1.0
-
- ```bash
- sudo apt install -y libjpeg-dev zlib1g-dev
- git clone https://github.com/pytorch/vision torchvision
- cd torchvision
- git checkout v0.16.2
- python3 setup.py install --user
- ```
+!!! note
-Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision).
+ Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision).
#### Install `onnxruntime-gpu`
@@ -240,9 +245,9 @@ pip install onnxruntime_gpu-1.17.0-cp38-cp38-linux_aarch64.whl
Out of all the model export formats supported by Ultralytics, TensorRT delivers the best inference performance when working with NVIDIA Jetson devices and our recommendation is to use TensorRT with Jetson. We also have a detailed document on TensorRT [here](../integrations/tensorrt.md).
-## Convert Model to TensorRT and Run Inference
+### Convert Model to TensorRT and Run Inference
-The YOLOv8n model in PyTorch format is converted to TensorRT to run inference with the exported model.
+The YOLO11n model in PyTorch format is converted to TensorRT to run inference with the exported model.
!!! example
@@ -251,14 +256,14 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi
```python
from ultralytics import YOLO
- # Load a YOLOv8n PyTorch model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
- # Export the model
- model.export(format="engine") # creates 'yolov8n.engine'
+ # Export the model to TensorRT
+ model.export(format="engine") # creates 'yolo11n.engine'
# Load the exported TensorRT model
- trt_model = YOLO("yolov8n.engine")
+ trt_model = YOLO("yolo11n.engine")
# Run inference
results = trt_model("https://ultralytics.com/images/bus.jpg")
@@ -267,119 +272,279 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to TensorRT format
- yolo export model=yolov8n.pt format=engine # creates 'yolov8n.engine'
+ # Export a YOLO11n PyTorch model to TensorRT format
+ yolo export model=yolo11n.pt format=engine # creates 'yolo11n.engine'
# Run inference with the exported model
- yolo predict model=yolov8n.engine source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg'
```
!!! note
Visit the [Export page](../modes/export.md#arguments) to access additional arguments when exporting models to different model formats
-## NVIDIA Jetson Orin YOLOv8 Benchmarks
+### Use NVIDIA Deep Learning Accelerator (DLA)
+
+[NVIDIA Deep Learning Accelerator (DLA)](https://developer.nvidia.com/deep-learning-accelerator) is a specialized hardware component built into NVIDIA Jetson devices that optimizes deep learning inference for energy efficiency and performance. By offloading tasks from the GPU (freeing it up for more intensive processes), DLA enables models to run with lower power consumption while maintaining high throughput, ideal for embedded systems and real-time AI applications.
+
+The following Jetson devices are equipped with DLA hardware:
+
+- Jetson Orin NX 16GB
+- Jetson AGX Orin Series
+- Jetson AGX Xavier Series
+- Jetson Xavier NX Series
+
+!!! example
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
+
+ # Export the model to TensorRT with DLA enabled (only works with FP16 or INT8)
+ model.export(format="engine", device="dla:0", half=True) # dla:0 or dla:1 corresponds to the DLA cores
+
+ # Load the exported TensorRT model
+ trt_model = YOLO("yolo11n.engine")
+
+ # Run inference
+ results = trt_model("https://ultralytics.com/images/bus.jpg")
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8)
+ yolo export model=yolo11n.pt format=engine device="dla:0" half=True # dla:0 or dla:1 corresponds to the DLA cores
+
+ # Run inference with the exported model on the DLA
+ yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg'
+ ```
+
+!!! note
+
+ When using DLA exports, some layers may not be supported to run on DLA and will fall back to the GPU for execution. This fallback can introduce additional latency and impact the overall inference performance. Therefore, DLA is not primarily designed to reduce inference latency compared to TensorRT running entirely on the GPU. Instead, its primary purpose is to increase throughput and improve energy efficiency.
-YOLOv8 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
+## NVIDIA Jetson Orin YOLO11 Benchmarks
-### Comparison Chart
+YOLO11 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on both NVIDIA Jetson Orin Nano Super Developer Kit and Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
+
+### Comparison Charts
Even though all model exports are working with NVIDIA Jetson, we have only included **PyTorch, TorchScript, TensorRT** for the comparison chart below because, they make use of the GPU on the Jetson and are guaranteed to produce the best results. All the other exports only utilize the CPU and the performance is not as good as the above three. You can find benchmarks for all exports in the section after this chart.
-
-
-
+#### NVIDIA Jetson Orin Nano Super Developer Kit
+
+
+
+ Benchmarked with Ultralytics {{ benchmark_version }}
+
+
+#### NVIDIA Jetson Orin NX 16GB
+
+
+
+ Benchmarked with Ultralytics {{ benchmark_version }}
+
-### Detailed Comparison Table
+### Detailed Comparison Tables
+
+The below table represents the benchmark results for five different models (YOLO11n, YOLO11s, YOLO11m, YOLO11l, YOLO11x) across ten different formats (PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), giving us the status, size, mAP50-95(B) metric, and inference time for each combination.
+
+#### NVIDIA Jetson Orin Nano Super Developer Kit
+
+!!! performance
-The below table represents the benchmark results for five different models (YOLOv8n, YOLOv8s, YOLOv8m, YOLOv8l, YOLOv8x) across ten different formats (PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), giving us the status, size, mAP50-95(B) metric, and inference time for each combination.
+ === "YOLO11n"
+
+ | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
+ |-----------------|--------|-------------------|-------------|------------------------|
+ | PyTorch | โ | 5.4 | 0.6176 | 21.3 |
+ | TorchScript | โ | 10.5 | 0.6100 | 13.40 |
+ | ONNX | โ | 10.2 | 0.6100 | 7.94 |
+ | OpenVINO | โ | 10.4 | 0.6091 | 57.36 |
+ | TensorRT (FP32) | โ | 11.9 | 0.6082 | 7.60 |
+ | TensorRT (FP16) | โ | 8.3 | 0.6096 | 4.91 |
+ | TensorRT (INT8) | โ | 5.6 | 0.3180 | 3.91 |
+ | TF SavedModel | โ | 25.8 | 0.6082 | 223.98 |
+ | TF GraphDef | โ | 10.3 | 0.6082 | 289.95 |
+ | TF Lite | โ | 10.3 | 0.6082 | 328.29 |
+ | PaddlePaddle | โ | 20.4 | 0.6082 | 530.46 |
+ | MNN | โ | 10.1 | 0.6120 | 74.75 |
+ | NCNN | โ | 10.2 | 0.6106 | 46.12 |
+
+ === "YOLO11s"
+
+ | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
+ |-----------------|--------|-------------------|-------------|------------------------|
+ | PyTorch | โ | 18.4 | 0.7526 | 22.00 |
+ | TorchScript | โ | 36.5 | 0.7400 | 21.35 |
+ | ONNX | โ | 36.3 | 0.7400 | 13.91 |
+ | OpenVINO | โ | 36.4 | 0.7391 | 126.95 |
+ | TensorRT (FP32) | โ | 38.0 | 0.7400 | 13.29 |
+ | TensorRT (FP16) | โ | 21.3 | 0.7431 | 7.30 |
+ | TensorRT (INT8) | โ | 12.2 | 0.3243 | 5.25 |
+ | TF SavedModel | โ | 91.1 | 0.7400 | 406.73 |
+ | TF GraphDef | โ | 36.4 | 0.7400 | 629.80 |
+ | TF Lite | โ | 36.4 | 0.7400 | 953.98 |
+ | PaddlePaddle | โ | 72.5 | 0.7400 | 1311.67 |
+ | MNN | โ | 36.2 | 0.7392 | 187.66 |
+ | NCNN | โ | 36.2 | 0.7403 | 122.02 |
+
+ === "YOLO11m"
+
+ | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
+ |-----------------|--------|-------------------|-------------|------------------------|
+ | PyTorch | โ | 38.8 | 0.7598 | 33.00 |
+ | TorchScript | โ | 77.3 | 0.7643 | 48.17 |
+ | ONNX | โ | 76.9 | 0.7641 | 29.31 |
+ | OpenVINO | โ | 77.1 | 0.7642 | 313.49 |
+ | TensorRT (FP32) | โ | 78.7 | 0.7641 | 28.21 |
+ | TensorRT (FP16) | โ | 41.8 | 0.7653 | 13.99 |
+ | TensorRT (INT8) | โ | 23.2 | 0.4194 | 9.58 |
+ | TF SavedModel | โ | 192.7 | 0.7643 | 802.30 |
+ | TF GraphDef | โ | 77.0 | 0.7643 | 1335.42 |
+ | TF Lite | โ | 77.0 | 0.7643 | 2842.42 |
+ | PaddlePaddle | โ | 153.8 | 0.7643 | 3644.29 |
+ | MNN | โ | 76.8 | 0.7648 | 503.90 |
+ | NCNN | โ | 76.8 | 0.7674 | 298.78 |
+
+ === "YOLO11l"
+
+ | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
+ |-----------------|--------|-------------------|-------------|------------------------|
+ | PyTorch | โ | 49.0 | 0.7475 | 43.00 |
+ | TorchScript | โ | 97.6 | 0.7250 | 62.94 |
+ | ONNX | โ | 97.0 | 0.7250 | 36.33 |
+ | OpenVINO | โ | 97.3 | 0.7226 | 387.72 |
+ | TensorRT (FP32) | โ | 99.1 | 0.7250 | 35.59 |
+ | TensorRT (FP16) | โ | 52.0 | 0.7265 | 17.57 |
+ | TensorRT (INT8) | โ | 31.0 | 0.4033 | 12.37 |
+ | TF SavedModel | โ | 243.3 | 0.7250 | 1116.20 |
+ | TF GraphDef | โ | 97.2 | 0.7250 | 1603.32 |
+ | TF Lite | โ | 97.2 | 0.7250 | 3607.51 |
+ | PaddlePaddle | โ | 193.9 | 0.7250 | 4890.90 |
+ | MNN | โ | 96.9 | 0.7222 | 619.04 |
+ | NCNN | โ | 96.9 | 0.7252 | 352.85 |
+
+ === "YOLO11x"
+
+ | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
+ |-----------------|--------|-------------------|-------------|------------------------|
+ | PyTorch | โ | 109.3 | 0.8288 | 81.00 |
+ | TorchScript | โ | 218.1 | 0.8308 | 113.49 |
+ | ONNX | โ | 217.5 | 0.8308 | 75.20 |
+ | OpenVINO | โ | 217.8 | 0.8285 | 508.12 |
+ | TensorRT (FP32) | โ | 219.5 | 0.8307 | 67.32 |
+ | TensorRT (FP16) | โ | 112.2 | 0.8248 | 32.94 |
+ | TensorRT (INT8) | โ | 61.7 | 0.4854 | 20.72 |
+ | TF SavedModel | โ | 545.0 | 0.8308 | 1048.8 |
+ | TF GraphDef | โ | 217.8 | 0.8308 | 2961.8 |
+ | TF Lite | โ | 217.8 | 0.8308 | 7898.8 |
+ | PaddlePaddle | โ | 434.8 | 0.8308 | 9903.68 |
+ | MNN | โ | 217.3 | 0.8308 | 1242.97 |
+ | NCNN | โ | 217.3 | 0.8304 | 850.05 |
+
+ Benchmarked with Ultralytics {{ benchmark_version }}
+
+#### NVIDIA Jetson Orin NX 16GB
!!! performance
- === "YOLOv8n"
+ === "YOLO11n"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 6.2 | 0.6381 | 14.3 |
- | TorchScript | โ | 12.4 | 0.6117 | 13.3 |
- | ONNX | โ | 12.2 | 0.6092 | 70.6 |
- | OpenVINO | โ | 12.3 | 0.6092 | 104.2 |
- | TensorRT (FP32) | โ | 16.1 | 0.6091 | 8.01 |
- | TensorRT (FP16) | โ | 9.2 | 0.6093 | 4.55 |
- | TensorRT (INT8) | โ | 5.9 | 0.2759 | 4.09 |
- | TF SavedModel | โ | 30.6 | 0.6092 | 141.74 |
- | TF GraphDef | โ | 12.3 | 0.6092 | 199.93 |
- | TF Lite | โ | 12.3 | 0.6092 | 349.18 |
- | PaddlePaddle | โ | 24.4 | 0.6030 | 555 |
- | NCNN | โ | 12.2 | 0.6092 | 32 |
-
- === "YOLOv8s"
+ | PyTorch | โ | 5.4 | 0.6176 | 19.50 |
+ | TorchScript | โ | 10.5 | 0.6100 | 13.03 |
+ | ONNX | โ | 10.2 | 0.6100 | 8.44 |
+ | OpenVINO | โ | 10.4 | 0.6091 | 40.83 |
+ | TensorRT (FP32) | โ | 11.9 | 0.6100 | 8.05 |
+ | TensorRT (FP16) | โ | 8.2 | 0.6096 | 4.85 |
+ | TensorRT (INT8) | โ | 5.5 | 0.3180 | 4.37 |
+ | TF SavedModel | โ | 25.8 | 0.6082 | 185.39 |
+ | TF GraphDef | โ | 10.3 | 0.6082 | 244.85 |
+ | TF Lite | โ | 10.3 | 0.6082 | 289.77 |
+ | PaddlePaddle | โ | 20.4 | 0.6082 | 476.52 |
+ | MNN | โ | 10.1 | 0.6120 | 53.37 |
+ | NCNN | โ | 10.2 | 0.6106 | 33.55 |
+
+ === "YOLO11s"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 21.5 | 0.6967 | 18 |
- | TorchScript | โ | 43.0 | 0.7136 | 23.81 |
- | ONNX | โ | 42.8 | 0.7136 | 185.55 |
- | OpenVINO | โ | 42.9 | 0.7136 | 243.97 |
- | TensorRT (FP32) | โ | 46.4 | 0.7136 | 14.01 |
- | TensorRT (FP16) | โ | 24.2 | 0.722 | 7.16 |
- | TensorRT (INT8) | โ | 13.7 | 0.4233 | 5.49 |
- | TF SavedModel | โ | 107 | 0.7136 | 260.03 |
- | TF GraphDef | โ | 42.8 | 0.7136 | 423.4 |
- | TF Lite | โ | 42.8 | 0.7136 | 1046.64 |
- | PaddlePaddle | โ | 85.5 | 0.7140 | 1464 |
- | NCNN | โ | 42.7 | 0.7200 | 63 |
-
- === "YOLOv8m"
+ | PyTorch | โ | 18.4 | 0.7526 | 19.00 |
+ | TorchScript | โ | 36.5 | 0.7400 | 22.90 |
+ | ONNX | โ | 36.3 | 0.7400 | 14.44 |
+ | OpenVINO | โ | 36.4 | 0.7391 | 88.70 |
+ | TensorRT (FP32) | โ | 37.9 | 0.7400 | 14.13 |
+ | TensorRT (FP16) | โ | 21.6 | 0.7406 | 7.55 |
+ | TensorRT (INT8) | โ | 12.2 | 0.3243 | 5.63 |
+ | TF SavedModel | โ | 91.1 | 0.7400 | 317.61 |
+ | TF GraphDef | โ | 36.4 | 0.7400 | 515.99 |
+ | TF Lite | โ | 36.4 | 0.7400 | 838.85 |
+ | PaddlePaddle | โ | 72.5 | 0.7400 | 1170.07 |
+ | MNN | โ | 36.2 | 0.7413 | 125.23 |
+ | NCNN | โ | 36.2 | 0.7403 | 68.13 |
+
+ === "YOLO11m"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 49.7 | 0.7370 | 36.4 |
- | TorchScript | โ | 99.2 | 0.7285 | 53.58 |
- | ONNX | โ | 99 | 0.7280 | 452.09 |
- | OpenVINO | โ | 99.1 | 0.7280 | 544.36 |
- | TensorRT (FP32) | โ | 102.4 | 0.7285 | 31.51 |
- | TensorRT (FP16) | โ | 52.6 | 0.7324 | 14.88 |
- | TensorRT (INT8) | โ | 28.6 | 0.3283 | 10.89 |
- | TF SavedModel | โ | 247.5 | 0.7280 | 543.65 |
- | TF GraphDef | โ | 99 | 0.7280 | 906.63 |
- | TF Lite | โ | 99 | 0.7280 | 2758.08 |
- | PaddlePaddle | โ | 197.9 | 0.7280 | 3678 |
- | NCNN | โ | 98.9 | 0.7260 | 135 |
-
- === "YOLOv8l"
+ | PyTorch | โ | 38.8 | 0.7598 | 36.50 |
+ | TorchScript | โ | 77.3 | 0.7643 | 52.55 |
+ | ONNX | โ | 76.9 | 0.7640 | 31.16 |
+ | OpenVINO | โ | 77.1 | 0.7642 | 208.57 |
+ | TensorRT (FP32) | โ | 78.7 | 0.7640 | 30.72 |
+ | TensorRT (FP16) | โ | 41.5 | 0.7651 | 14.45 |
+ | TensorRT (INT8) | โ | 23.3 | 0.4194 | 10.19 |
+ | TF SavedModel | โ | 192.7 | 0.7643 | 590.11 |
+ | TF GraphDef | โ | 77.0 | 0.7643 | 998.57 |
+ | TF Lite | โ | 77.0 | 0.7643 | 2486.11 |
+ | PaddlePaddle | โ | 153.8 | 0.7643 | 3236.09 |
+ | MNN | โ | 76.8 | 0.7661 | 335.78 |
+ | NCNN | โ | 76.8 | 0.7674 | 188.43 |
+
+ === "YOLO11l"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 83.7 | 0.7768 | 61.3 |
- | TorchScript | โ | 167.2 | 0.7554 | 87.9 |
- | ONNX | โ | 166.8 | 0.7551 | 852.29 |
- | OpenVINO | โ | 167 | 0.7551 | 1012.6 |
- | TensorRT (FP32) | โ | 170.5 | 0.7554 | 49.79 |
- | TensorRT (FP16) | โ | 86.1 | 0.7535 | 22.89 |
- | TensorRT (INT8) | โ | 46.4 | 0.4048 | 14.61 |
- | TF SavedModel | โ | 417.2 | 0.7551 | 990.45 |
- | TF GraphDef | โ | 166.9 | 0.7551 | 1649.86 |
- | TF Lite | โ | 166.9 | 0.7551 | 5652.37 |
- | PaddlePaddle | โ | 333.6 | 0.7551 | 7114.67 |
- | NCNN | โ | 166.8 | 0.7685 | 231.9 |
-
- === "YOLOv8x"
+ | PyTorch | โ | 49.0 | 0.7475 | 46.6 |
+ | TorchScript | โ | 97.6 | 0.7250 | 66.54 |
+ | ONNX | โ | 97.0 | 0.7250 | 39.55 |
+ | OpenVINO | โ | 97.3 | 0.7226 | 262.44 |
+ | TensorRT (FP32) | โ | 99.2 | 0.7250 | 38.68 |
+ | TensorRT (FP16) | โ | 51.9 | 0.7265 | 18.53 |
+ | TensorRT (INT8) | โ | 30.9 | 0.4033 | 13.36 |
+ | TF SavedModel | โ | 243.3 | 0.7250 | 850.25 |
+ | TF GraphDef | โ | 97.2 | 0.7250 | 1324.60 |
+ | TF Lite | โ | 97.2 | 0.7250 | 3191.24 |
+ | PaddlePaddle | โ | 193.9 | 0.7250 | 4204.97 |
+ | MNN | โ | 96.9 | 0.7225 | 414.41 |
+ | NCNN | โ | 96.9 | 0.7252 | 237.74 |
+
+ === "YOLO11x"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 130.5 | 0.7759 | 93 |
- | TorchScript | โ | 260.7 | 0.7472 | 135.1 |
- | ONNX | โ | 260.4 | 0.7479 | 1296.13 |
- | OpenVINO | โ | 260.6 | 0.7479 | 1502.15 |
- | TensorRT (FP32) | โ | 264.0 | 0.7469 | 80.01 |
- | TensorRT (FP16) | โ | 133.3 | 0.7513 | 40.76 |
- | TensorRT (INT8) | โ | 70.2 | 0.4277 | 22.08 |
- | TF SavedModel | โ | 651.1 | 0.7479 | 1451.76 |
- | TF GraphDef | โ | 260.5 | 0.7479 | 4029.36 |
- | TF Lite | โ | 260.4 | 0.7479 | 8772.86 |
- | PaddlePaddle | โ | 520.8 | 0.7479 | 10619.53 |
- | NCNN | โ | 260.4 | 0.7646 | 376.38 |
+ | PyTorch | โ | 109.3 | 0.8288 | 86.00 |
+ | TorchScript | โ | 218.1 | 0.8308 | 122.43 |
+ | ONNX | โ | 217.5 | 0.8307 | 77.50 |
+ | OpenVINO | โ | 217.8 | 0.8285 | 508.12 |
+ | TensorRT (FP32) | โ | 219.5 | 0.8307 | 76.44 |
+ | TensorRT (FP16) | โ | 112.0 | 0.8309 | 35.99 |
+ | TensorRT (INT8) | โ | 61.6 | 0.4854 | 22.32 |
+ | TF SavedModel | โ | 545.0 | 0.8308 | 1470.06 |
+ | TF GraphDef | โ | 217.8 | 0.8308 | 2549.78 |
+ | TF Lite | โ | 217.8 | 0.8308 | 7025.44 |
+ | PaddlePaddle | โ | 434.8 | 0.8308 | 8364.89 |
+ | MNN | โ | 217.3 | 0.8289 | 827.13 |
+ | NCNN | โ | 217.3 | 0.8304 | 490.29 |
+
+ Benchmarked with Ultralytics {{ benchmark_version }}
[Explore more benchmarking efforts by Seeed Studio](https://www.seeedstudio.com/blog/2023/03/30/yolov8-performance-benchmarks-on-nvidia-jetson-devices) running on different versions of NVIDIA Jetson hardware.
@@ -394,25 +559,25 @@ To reproduce the above Ultralytics benchmarks on all export [formats](../modes/e
```python
from ultralytics import YOLO
- # Load a YOLOv8n PyTorch model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
- # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats
- results = model.benchmarks(data="coco8.yaml", imgsz=640)
+ # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats
+ results = model.benchmark(data="coco8.yaml", imgsz=640)
```
=== "CLI"
```bash
- # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats
- yolo benchmark model=yolov8n.pt data=coco8.yaml imgsz=640
+ # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats
+ yolo benchmark model=yolo11n.pt data=coco8.yaml imgsz=640
```
Note that benchmarking results might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. For the most reliable results use a dataset with a large number of images, i.e. `data='coco8.yaml' (4 val images), or `data='coco.yaml'` (5000 val images).
## Best Practices when using NVIDIA Jetson
-When using NVIDIA Jetson, there are a couple of best practices to follow in order to enable maximum performance on the NVIDIA Jetson running YOLOv8.
+When using NVIDIA Jetson, there are a couple of best practices to follow in order to enable maximum performance on the NVIDIA Jetson running YOLO11.
1. Enable MAX Power Mode
@@ -445,29 +610,29 @@ When using NVIDIA Jetson, there are a couple of best practices to follow in orde
## Next Steps
-Congratulations on successfully setting up YOLOv8 on your NVIDIA Jetson! For further learning and support, visit more guide at [Ultralytics YOLOv8 Docs](../index.md)!
+Congratulations on successfully setting up YOLO11 on your NVIDIA Jetson! For further learning and support, visit more guide at [Ultralytics YOLO11 Docs](../index.md)!
## FAQ
-### How do I deploy Ultralytics YOLOv8 on NVIDIA Jetson devices?
+### How do I deploy Ultralytics YOLO11 on NVIDIA Jetson devices?
-Deploying Ultralytics YOLOv8 on NVIDIA Jetson devices is a straightforward process. First, flash your Jetson device with the NVIDIA JetPack SDK. Then, either use a pre-built Docker image for quick setup or manually install the required packages. Detailed steps for each approach can be found in sections [Quick Start with Docker](#quick-start-with-docker) and [Start with Native Installation](#start-with-native-installation).
+Deploying Ultralytics YOLO11 on NVIDIA Jetson devices is a straightforward process. First, flash your Jetson device with the NVIDIA JetPack SDK. Then, either use a pre-built Docker image for quick setup or manually install the required packages. Detailed steps for each approach can be found in sections [Quick Start with Docker](#quick-start-with-docker) and [Start with Native Installation](#start-with-native-installation).
-### What performance benchmarks can I expect from YOLOv8 models on NVIDIA Jetson devices?
+### What performance benchmarks can I expect from YOLO11 models on NVIDIA Jetson devices?
-YOLOv8 models have been benchmarked on various NVIDIA Jetson devices showing significant performance improvements. For example, the TensorRT format delivers the best inference performance. The table in the [Detailed Comparison Table](#detailed-comparison-table) section provides a comprehensive view of performance metrics like mAP50-95 and inference time across different model formats.
+YOLO11 models have been benchmarked on various NVIDIA Jetson devices showing significant performance improvements. For example, the TensorRT format delivers the best inference performance. The table in the [Detailed Comparison Tables](#detailed-comparison-tables) section provides a comprehensive view of performance metrics like mAP50-95 and inference time across different model formats.
-### Why should I use TensorRT for deploying YOLOv8 on NVIDIA Jetson?
+### Why should I use TensorRT for deploying YOLO11 on NVIDIA Jetson?
-TensorRT is highly recommended for deploying YOLOv8 models on NVIDIA Jetson due to its optimal performance. It accelerates inference by leveraging the Jetson's GPU capabilities, ensuring maximum efficiency and speed. Learn more about how to convert to TensorRT and run inference in the [Use TensorRT on NVIDIA Jetson](#use-tensorrt-on-nvidia-jetson) section.
+TensorRT is highly recommended for deploying YOLO11 models on NVIDIA Jetson due to its optimal performance. It accelerates inference by leveraging the Jetson's GPU capabilities, ensuring maximum efficiency and speed. Learn more about how to convert to TensorRT and run inference in the [Use TensorRT on NVIDIA Jetson](#use-tensorrt-on-nvidia-jetson) section.
### How can I install PyTorch and Torchvision on NVIDIA Jetson?
-To install PyTorch and Torchvision on NVIDIA Jetson, first uninstall any existing versions that may have been installed via pip. Then, manually install the compatible PyTorch and Torchvision versions for the Jetson's ARM64 architecture. Detailed instructions for this process are provided in the [Install PyTorch and Torchvision](#install-pytorch-and-torchvision) section.
+To install PyTorch and Torchvision on NVIDIA Jetson, first uninstall any existing versions that may have been installed via pip. Then, manually install the compatible PyTorch and Torchvision versions for the Jetson's ARM64 architecture. Detailed instructions for this process are provided in the [Installation of PyTorch and Torchvision](#install-pytorch-and-torchvision) section.
-### What are the best practices for maximizing performance on NVIDIA Jetson when using YOLOv8?
+### What are the best practices for maximizing performance on NVIDIA Jetson when using YOLO11?
-To maximize performance on NVIDIA Jetson with YOLOv8, follow these best practices:
+To maximize performance on NVIDIA Jetson with YOLO11, follow these best practices:
1. Enable MAX Power Mode to utilize all CPU and GPU cores.
2. Enable Jetson Clocks to run all cores at their maximum frequency.
diff --git a/docs/en/guides/object-blurring.md b/docs/en/guides/object-blurring.md
index 315bcd76ea0..2c6a3bdfc94 100644
--- a/docs/en/guides/object-blurring.md
+++ b/docs/en/guides/object-blurring.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to use Ultralytics YOLOv8 for real-time object blurring to enhance privacy and focus in your images and videos.
-keywords: YOLOv8, object blurring, real-time processing, privacy protection, image manipulation, video editing, Ultralytics
+description: Learn how to use Ultralytics YOLO11 for real-time object blurring to enhance privacy and focus in your images and videos.
+keywords: YOLO11, object blurring, real-time processing, privacy protection, image manipulation, video editing, Ultralytics
---
-# Object Blurring using Ultralytics YOLOv8 ๐
+# Object Blurring using Ultralytics YOLO11 ๐
## What is Object Blurring?
-Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves applying a blurring effect to specific detected objects in an image or video. This can be achieved using the YOLOv8 model capabilities to identify and manipulate objects within a given scene.
+Object blurring with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves applying a blurring effect to specific detected objects in an image or video. This can be achieved using the YOLO11 model capabilities to identify and manipulate objects within a given scene.
@@ -18,16 +18,16 @@ Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
allowfullscreen>
- Watch: Object Blurring using Ultralytics YOLOv8
+ Watch: Object Blurring using Ultralytics YOLO11
## Advantages of Object Blurring?
- **Privacy Protection**: Object blurring is an effective tool for safeguarding privacy by concealing sensitive or personally identifiable information in images or videos.
-- **Selective Focus**: YOLOv8 allows for selective blurring, enabling users to target specific objects, ensuring a balance between privacy and retaining relevant visual information.
-- **Real-time Processing**: YOLOv8's efficiency enables object blurring in real-time, making it suitable for applications requiring on-the-fly privacy enhancements in dynamic environments.
+- **Selective Focus**: YOLO11 allows for selective blurring, enabling users to target specific objects, ensuring a balance between privacy and retaining relevant visual information.
+- **Real-time Processing**: YOLO11's efficiency enables object blurring in real-time, making it suitable for applications requiring on-the-fly privacy enhancements in dynamic environments.
-!!! example "Object Blurring using YOLOv8 Example"
+!!! example "Object Blurring using YOLO11 Example"
=== "Object Blurring"
@@ -37,7 +37,7 @@ Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
names = model.names
cap = cv2.VideoCapture("path/to/video/file.mp4")
@@ -86,20 +86,20 @@ Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
## FAQ
-### What is object blurring with Ultralytics YOLOv8?
+### What is object blurring with Ultralytics YOLO11?
-Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves automatically detecting and applying a blurring effect to specific objects in images or videos. This technique enhances privacy by concealing sensitive information while retaining relevant visual data. YOLOv8's real-time processing capabilities make it suitable for applications requiring immediate privacy protection and selective focus adjustments.
+Object blurring with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves automatically detecting and applying a blurring effect to specific objects in images or videos. This technique enhances privacy by concealing sensitive information while retaining relevant visual data. YOLO11's real-time processing capabilities make it suitable for applications requiring immediate privacy protection and selective focus adjustments.
-### How can I implement real-time object blurring using YOLOv8?
+### How can I implement real-time object blurring using YOLO11?
-To implement real-time object blurring with YOLOv8, follow the provided Python example. This involves using YOLOv8 for [object detection](https://www.ultralytics.com/glossary/object-detection) and OpenCV for applying the blur effect. Here's a simplified version:
+To implement real-time object blurring with YOLO11, follow the provided Python example. This involves using YOLO11 for [object detection](https://www.ultralytics.com/glossary/object-detection) and OpenCV for applying the blur effect. Here's a simplified version:
```python
import cv2
from ultralytics import YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
while cap.isOpened():
@@ -112,7 +112,7 @@ while cap.isOpened():
obj = im0[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])]
im0[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] = cv2.blur(obj, (50, 50))
- cv2.imshow("YOLOv8 Blurring", im0)
+ cv2.imshow("YOLO11 Blurring", im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
@@ -120,9 +120,9 @@ cap.release()
cv2.destroyAllWindows()
```
-### What are the benefits of using Ultralytics YOLOv8 for object blurring?
+### What are the benefits of using Ultralytics YOLO11 for object blurring?
-Ultralytics YOLOv8 offers several advantages for object blurring:
+Ultralytics YOLO11 offers several advantages for object blurring:
- **Privacy Protection**: Effectively obscure sensitive or identifiable information.
- **Selective Focus**: Target specific objects for blurring, maintaining essential visual content.
@@ -130,10 +130,10 @@ Ultralytics YOLOv8 offers several advantages for object blurring:
For more detailed applications, check the [advantages of object blurring section](#advantages-of-object-blurring).
-### Can I use Ultralytics YOLOv8 to blur faces in a video for privacy reasons?
+### Can I use Ultralytics YOLO11 to blur faces in a video for privacy reasons?
-Yes, Ultralytics YOLOv8 can be configured to detect and blur faces in videos to protect privacy. By training or using a pre-trained model to specifically recognize faces, the detection results can be processed with [OpenCV](https://www.ultralytics.com/glossary/opencv) to apply a blur effect. Refer to our guide on [object detection with YOLOv8](https://docs.ultralytics.com/models/yolov8/) and modify the code to target face detection.
+Yes, Ultralytics YOLO11 can be configured to detect and blur faces in videos to protect privacy. By training or using a pre-trained model to specifically recognize faces, the detection results can be processed with [OpenCV](https://www.ultralytics.com/glossary/opencv) to apply a blur effect. Refer to our guide on [object detection with YOLO11](https://docs.ultralytics.com/models/yolov8/) and modify the code to target face detection.
-### How does YOLOv8 compare to other object detection models like Faster R-CNN for object blurring?
+### How does YOLO11 compare to other object detection models like Faster R-CNN for object blurring?
-Ultralytics YOLOv8 typically outperforms models like Faster R-CNN in terms of speed, making it more suitable for real-time applications. While both models offer accurate detection, YOLOv8's architecture is optimized for rapid inference, which is critical for tasks like real-time object blurring. Learn more about the technical differences and performance metrics in our [YOLOv8 documentation](https://docs.ultralytics.com/models/yolov8/).
+Ultralytics YOLO11 typically outperforms models like Faster R-CNN in terms of speed, making it more suitable for real-time applications. While both models offer accurate detection, YOLO11's architecture is optimized for rapid inference, which is critical for tasks like real-time object blurring. Learn more about the technical differences and performance metrics in our [YOLO11 documentation](https://docs.ultralytics.com/models/yolov8/).
diff --git a/docs/en/guides/object-counting.md b/docs/en/guides/object-counting.md
index 7c1367b29e3..73dcd3056d8 100644
--- a/docs/en/guides/object-counting.md
+++ b/docs/en/guides/object-counting.md
@@ -1,37 +1,27 @@
---
comments: true
-description: Learn to accurately identify and count objects in real-time using Ultralytics YOLOv8 for applications like crowd analysis and surveillance.
-keywords: object counting, YOLOv8, Ultralytics, real-time object detection, AI, deep learning, object tracking, crowd analysis, surveillance, resource optimization
+description: Learn to accurately identify and count objects in real-time using Ultralytics YOLO11 for applications like crowd analysis and surveillance.
+keywords: object counting, YOLO11, Ultralytics, real-time object detection, AI, deep learning, object tracking, crowd analysis, surveillance, resource optimization
---
-# Object Counting using Ultralytics YOLOv8
+# Object Counting using Ultralytics YOLO11
## What is Object Counting?
-Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLOv8 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) capabilities.
-
-
-
-
-
-
- Watch: Object Counting using Ultralytics YOLOv8
-
+
+
+Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLO11 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) capabilities.
+
+
## Advantages of Object Counting?
@@ -43,86 +33,61 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
| Logistics | Aquaculture |
| :-----------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| Conveyor Belt Packets Counting Using Ultralytics YOLOv8 | Fish Counting in Sea using Ultralytics YOLOv8 |
-
-!!! example "Object Counting using YOLOv8 Example"
-
- === "Count in Region"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
+|  |  |
+| Conveyor Belt Packets Counting Using Ultralytics YOLO11 | Fish Counting in Sea using Ultralytics YOLO11 |
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Define region points
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
- # Video writer
- video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+!!! example "Object Counting using YOLO11 Example"
- # Init Object Counter
- counter = solutions.ObjectCounter(
- view_img=True,
- reg_pts=region_points,
- names=model.names,
- draw_tracks=True,
- line_thickness=2,
- )
+ === "CLI"
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
- tracks = model.track(im0, persist=True, show=False)
+ ```bash
+ # Run a counting example
+ yolo solutions count show=True
- im0 = counter.start_counting(im0, tracks)
- video_writer.write(im0)
+ # Pass a source video
+ yolo solutions count source="path/to/video/file.mp4"
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
+ # Pass region coordinates
+ yolo solutions count region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
```
- === "OBB Object Counting"
+ === "Python"
```python
import cv2
- from ultralytics import YOLO, solutions
+ from ultralytics import solutions
- model = YOLO("yolov8n-obb.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Define region points
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+ # region_points = [(20, 400), (1080, 400)] # For line counting
+ region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # For rectangle region counting
+ # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)] # For polygon region counting
# Video writer
video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- # Init Object Counter
+ # Init ObjectCounter
counter = solutions.ObjectCounter(
- view_img=True,
- reg_pts=region_points,
- names=model.names,
- line_thickness=2,
+ show=True, # Display the output
+ region=region_points, # Pass region points
+ model="yolo11n.pt", # model="yolo11n-obb.pt" for object counting using YOLO11 OBB model.
+ # classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model.
+ # show_in=True, # Display in counts
+ # show_out=True, # Display out counts
+ # line_width=2, # Adjust the line width for bounding boxes and text display
)
+ # Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- tracks = model.track(im0, persist=True, show=False)
- im0 = counter.start_counting(im0, tracks)
+ im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
@@ -130,146 +95,18 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
cv2.destroyAllWindows()
```
- === "Count in Polygon"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Define region points as a polygon with 5 points
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
-
- # Video writer
- video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- # Init Object Counter
- counter = solutions.ObjectCounter(
- view_img=True,
- reg_pts=region_points,
- names=model.names,
- draw_tracks=True,
- line_thickness=2,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
- tracks = model.track(im0, persist=True, show=False)
- im0 = counter.start_counting(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
-
- === "Count in Line"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- # Define line points
- line_points = [(20, 400), (1080, 400)]
-
- # Video writer
- video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- # Init Object Counter
- counter = solutions.ObjectCounter(
- view_img=True,
- reg_pts=line_points,
- names=model.names,
- draw_tracks=True,
- line_thickness=2,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
- tracks = model.track(im0, persist=True, show=False)
- im0 = counter.start_counting(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
-
- === "Specific Classes"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
-
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- line_points = [(20, 400), (1080, 400)] # line or region points
- classes_to_count = [0, 2] # person and car classes for count
-
- # Video writer
- video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- # Init Object Counter
- counter = solutions.ObjectCounter(
- view_img=True,
- reg_pts=line_points,
- names=model.names,
- draw_tracks=True,
- line_thickness=2,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
- tracks = model.track(im0, persist=True, show=False, classes=classes_to_count)
- im0 = counter.start_counting(im0, tracks)
- video_writer.write(im0)
-
- cap.release()
- video_writer.release()
- cv2.destroyAllWindows()
- ```
-
-???+ tip "Region is Movable"
-
- You can move the region anywhere in the frame by clicking on its edges
-
### Argument `ObjectCounter`
Here's a table with the `ObjectCounter` arguments:
-| Name | Type | Default | Description |
-| ----------------- | ------ | -------------------------- | ---------------------------------------------------------------------- |
-| `names` | `dict` | `None` | Dictionary of classes names. |
-| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. |
-| `line_thickness` | `int` | `2` | Line thickness for bounding boxes. |
-| `view_img` | `bool` | `False` | Flag to control whether to display the video stream. |
-| `view_in_counts` | `bool` | `True` | Flag to control whether to display the in counts on the video stream. |
-| `view_out_counts` | `bool` | `True` | Flag to control whether to display the out counts on the video stream. |
-| `draw_tracks` | `bool` | `False` | Flag to control whether to draw the object tracks. |
+| Name | Type | Default | Description |
+| ------------ | ------ | -------------------------- | ---------------------------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
+| `show_in` | `bool` | `True` | Flag to control whether to display the in counts on the video stream. |
+| `show_out` | `bool` | `True` | Flag to control whether to display the out counts on the video stream. |
### Arguments `model.track`
@@ -277,43 +114,39 @@ Here's a table with the `ObjectCounter` arguments:
## FAQ
-### How do I count objects in a video using Ultralytics YOLOv8?
+### How do I count objects in a video using Ultralytics YOLO11?
-To count objects in a video using Ultralytics YOLOv8, you can follow these steps:
+To count objects in a video using Ultralytics YOLO11, you can follow these steps:
1. Import the necessary libraries (`cv2`, `ultralytics`).
-2. Load a pretrained YOLOv8 model.
-3. Define the counting region (e.g., a polygon, line, etc.).
-4. Set up the video capture and initialize the object counter.
-5. Process each frame to track objects and count them within the defined region.
+2. Define the counting region (e.g., a polygon, line, etc.).
+3. Set up the video capture and initialize the object counter.
+4. Process each frame to track objects and count them within the defined region.
Here's a simple example for counting in a region:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
def count_objects_in_region(video_path, output_video_path, model_path):
"""Count objects in a specific region within a video."""
- model = YOLO(model_path)
cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- counter = solutions.ObjectCounter(
- view_img=True, reg_pts=region_points, names=model.names, draw_tracks=True, line_thickness=2
- )
+
+ region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
+ counter = solutions.ObjectCounter(show=True, region=region_points, model=model_path)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- tracks = model.track(im0, persist=True, show=False)
- im0 = counter.start_counting(im0, tracks)
+ im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
@@ -321,14 +154,14 @@ def count_objects_in_region(video_path, output_video_path, model_path):
cv2.destroyAllWindows()
-count_objects_in_region("path/to/video.mp4", "output_video.avi", "yolov8n.pt")
+count_objects_in_region("path/to/video.mp4", "output_video.avi", "yolo11n.pt")
```
-Explore more configurations and options in the [Object Counting](#object-counting-using-ultralytics-yolov8) section.
+Explore more configurations and options in the [Object Counting](#object-counting-using-ultralytics-yolo11) section.
-### What are the advantages of using Ultralytics YOLOv8 for object counting?
+### What are the advantages of using Ultralytics YOLO11 for object counting?
-Using Ultralytics YOLOv8 for object counting offers several advantages:
+Using Ultralytics YOLO11 for object counting offers several advantages:
1. **Resource Optimization:** It facilitates efficient resource management by providing accurate counts, helping optimize resource allocation in industries like inventory management.
2. **Enhanced Security:** It enhances security and surveillance by accurately tracking and counting entities, aiding in proactive threat detection.
@@ -336,35 +169,32 @@ Using Ultralytics YOLOv8 for object counting offers several advantages:
For real-world applications and code examples, visit the [Advantages of Object Counting](#advantages-of-object-counting) section.
-### How can I count specific classes of objects using Ultralytics YOLOv8?
+### How can I count specific classes of objects using Ultralytics YOLO11?
-To count specific classes of objects using Ultralytics YOLOv8, you need to specify the classes you are interested in during the tracking phase. Below is a Python example:
+To count specific classes of objects using Ultralytics YOLO11, you need to specify the classes you are interested in during the tracking phase. Below is a Python example:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
def count_specific_classes(video_path, output_video_path, model_path, classes_to_count):
"""Count specific classes of objects in a video."""
- model = YOLO(model_path)
cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
- line_points = [(20, 400), (1080, 400)]
video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- counter = solutions.ObjectCounter(
- view_img=True, reg_pts=line_points, names=model.names, draw_tracks=True, line_thickness=2
- )
+
+ line_points = [(20, 400), (1080, 400)]
+ counter = solutions.ObjectCounter(show=True, region=line_points, model=model_path, classes=classes_to_count)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- tracks = model.track(im0, persist=True, show=False, classes=classes_to_count)
- im0 = counter.start_counting(im0, tracks)
+ im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
@@ -372,27 +202,27 @@ def count_specific_classes(video_path, output_video_path, model_path, classes_to
cv2.destroyAllWindows()
-count_specific_classes("path/to/video.mp4", "output_specific_classes.avi", "yolov8n.pt", [0, 2])
+count_specific_classes("path/to/video.mp4", "output_specific_classes.avi", "yolo11n.pt", [0, 2])
```
In this example, `classes_to_count=[0, 2]`, which means it counts objects of class `0` and `2` (e.g., person and car).
-### Why should I use YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models for real-time applications?
+### Why should I use YOLO11 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models for real-time applications?
-Ultralytics YOLOv8 provides several advantages over other object detection models like Faster R-CNN, SSD, and previous YOLO versions:
+Ultralytics YOLO11 provides several advantages over other object detection models like Faster R-CNN, SSD, and previous YOLO versions:
-1. **Speed and Efficiency:** YOLOv8 offers real-time processing capabilities, making it ideal for applications requiring high-speed inference, such as surveillance and autonomous driving.
+1. **Speed and Efficiency:** YOLO11 offers real-time processing capabilities, making it ideal for applications requiring high-speed inference, such as surveillance and autonomous driving.
2. **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** It provides state-of-the-art accuracy for object detection and tracking tasks, reducing the number of false positives and improving overall system reliability.
-3. **Ease of Integration:** YOLOv8 offers seamless integration with various platforms and devices, including mobile and edge devices, which is crucial for modern AI applications.
+3. **Ease of Integration:** YOLO11 offers seamless integration with various platforms and devices, including mobile and edge devices, which is crucial for modern AI applications.
4. **Flexibility:** Supports various tasks like object detection, segmentation, and tracking with configurable models to meet specific use-case requirements.
-Check out Ultralytics [YOLOv8 Documentation](https://docs.ultralytics.com/models/yolov8/) for a deeper dive into its features and performance comparisons.
+Check out Ultralytics [YOLO11 Documentation](https://docs.ultralytics.com/models/yolo11/) for a deeper dive into its features and performance comparisons.
-### Can I use YOLOv8 for advanced applications like crowd analysis and traffic management?
+### Can I use YOLO11 for advanced applications like crowd analysis and traffic management?
-Yes, Ultralytics YOLOv8 is perfectly suited for advanced applications like crowd analysis and traffic management due to its real-time detection capabilities, scalability, and integration flexibility. Its advanced features allow for high-accuracy object tracking, counting, and classification in dynamic environments. Example use cases include:
+Yes, Ultralytics YOLO11 is perfectly suited for advanced applications like crowd analysis and traffic management due to its real-time detection capabilities, scalability, and integration flexibility. Its advanced features allow for high-accuracy object tracking, counting, and classification in dynamic environments. Example use cases include:
- **Crowd Analysis:** Monitor and manage large gatherings, ensuring safety and optimizing crowd flow.
- **Traffic Management:** Track and count vehicles, analyze traffic patterns, and manage congestion in real-time.
-For more information and implementation details, refer to the guide on [Real World Applications](#real-world-applications) of object counting with YOLOv8.
+For more information and implementation details, refer to the guide on [Real World Applications](#real-world-applications) of object counting with YOLO11.
diff --git a/docs/en/guides/object-cropping.md b/docs/en/guides/object-cropping.md
index f4b50ed0276..8bfcac5fe1f 100644
--- a/docs/en/guides/object-cropping.md
+++ b/docs/en/guides/object-cropping.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to crop and extract objects using Ultralytics YOLOv8 for focused analysis, reduced data volume, and enhanced precision.
-keywords: Ultralytics, YOLOv8, object cropping, object detection, image processing, video analysis, AI, machine learning
+description: Learn how to crop and extract objects using Ultralytics YOLO11 for focused analysis, reduced data volume, and enhanced precision.
+keywords: Ultralytics, YOLO11, object cropping, object detection, image processing, video analysis, AI, machine learning
---
-# Object Cropping using Ultralytics YOLOv8
+# Object Cropping using Ultralytics YOLO11
## What is Object Cropping?
-Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves isolating and extracting specific detected objects from an image or video. The YOLOv8 model capabilities are utilized to accurately identify and delineate objects, enabling precise cropping for further analysis or manipulation.
+Object cropping with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves isolating and extracting specific detected objects from an image or video. The YOLO11 model capabilities are utilized to accurately identify and delineate objects, enabling precise cropping for further analysis or manipulation.
@@ -18,23 +18,23 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
allowfullscreen>
- Watch: Object Cropping using Ultralytics YOLOv8
+ Watch: Object Cropping using Ultralytics YOLO
## Advantages of Object Cropping?
-- **Focused Analysis**: YOLOv8 facilitates targeted object cropping, allowing for in-depth examination or processing of individual items within a scene.
+- **Focused Analysis**: YOLO11 facilitates targeted object cropping, allowing for in-depth examination or processing of individual items within a scene.
- **Reduced Data Volume**: By extracting only relevant objects, object cropping helps in minimizing data size, making it efficient for storage, transmission, or subsequent computational tasks.
-- **Enhanced Precision**: YOLOv8's [object detection](https://www.ultralytics.com/glossary/object-detection) [accuracy](https://www.ultralytics.com/glossary/accuracy) ensures that the cropped objects maintain their spatial relationships, preserving the integrity of the visual information for detailed analysis.
+- **Enhanced Precision**: YOLO11's [object detection](https://www.ultralytics.com/glossary/object-detection) [accuracy](https://www.ultralytics.com/glossary/accuracy) ensures that the cropped objects maintain their spatial relationships, preserving the integrity of the visual information for detailed analysis.
## Visuals
| Airport Luggage |
| :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |
-| Suitcases Cropping at airport conveyor belt using Ultralytics YOLOv8 |
+|  |
+| Suitcases Cropping at airport conveyor belt using Ultralytics YOLO11 |
-!!! example "Object Cropping using YOLOv8 Example"
+!!! example "Object Cropping using YOLO11 Example"
=== "Object Cropping"
@@ -46,7 +46,7 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
names = model.names
cap = cv2.VideoCapture("path/to/video/file.mp4")
@@ -98,22 +98,22 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly
## FAQ
-### What is object cropping in Ultralytics YOLOv8 and how does it work?
+### What is object cropping in Ultralytics YOLO11 and how does it work?
-Object cropping using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves isolating and extracting specific objects from an image or video based on YOLOv8's detection capabilities. This process allows for focused analysis, reduced data volume, and enhanced [precision](https://www.ultralytics.com/glossary/precision) by leveraging YOLOv8 to identify objects with high accuracy and crop them accordingly. For an in-depth tutorial, refer to the [object cropping example](#object-cropping-using-ultralytics-yolov8).
+Object cropping using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) involves isolating and extracting specific objects from an image or video based on YOLO11's detection capabilities. This process allows for focused analysis, reduced data volume, and enhanced [precision](https://www.ultralytics.com/glossary/precision) by leveraging YOLO11 to identify objects with high accuracy and crop them accordingly. For an in-depth tutorial, refer to the [object cropping example](#object-cropping-using-ultralytics-yolo11).
-### Why should I use Ultralytics YOLOv8 for object cropping over other solutions?
+### Why should I use Ultralytics YOLO11 for object cropping over other solutions?
-Ultralytics YOLOv8 stands out due to its precision, speed, and ease of use. It allows detailed and accurate object detection and cropping, essential for [focused analysis](#advantages-of-object-cropping) and applications needing high data integrity. Moreover, YOLOv8 integrates seamlessly with tools like OpenVINO and TensorRT for deployments requiring real-time capabilities and optimization on diverse hardware. Explore the benefits in the [guide on model export](../modes/export.md).
+Ultralytics YOLO11 stands out due to its precision, speed, and ease of use. It allows detailed and accurate object detection and cropping, essential for [focused analysis](#advantages-of-object-cropping) and applications needing high data integrity. Moreover, YOLO11 integrates seamlessly with tools like OpenVINO and TensorRT for deployments requiring real-time capabilities and optimization on diverse hardware. Explore the benefits in the [guide on model export](../modes/export.md).
### How can I reduce the data volume of my dataset using object cropping?
-By using Ultralytics YOLOv8 to crop only relevant objects from your images or videos, you can significantly reduce the data size, making it more efficient for storage and processing. This process involves training the model to detect specific objects and then using the results to crop and save these portions only. For more information on exploiting Ultralytics YOLOv8's capabilities, visit our [quickstart guide](../quickstart.md).
+By using Ultralytics YOLO11 to crop only relevant objects from your images or videos, you can significantly reduce the data size, making it more efficient for storage and processing. This process involves training the model to detect specific objects and then using the results to crop and save these portions only. For more information on exploiting Ultralytics YOLO11's capabilities, visit our [quickstart guide](../quickstart.md).
-### Can I use Ultralytics YOLOv8 for real-time video analysis and object cropping?
+### Can I use Ultralytics YOLO11 for real-time video analysis and object cropping?
-Yes, Ultralytics YOLOv8 can process real-time video feeds to detect and crop objects dynamically. The model's high-speed inference capabilities make it ideal for real-time applications such as surveillance, sports analysis, and automated inspection systems. Check out the [tracking and prediction modes](../modes/predict.md) to understand how to implement real-time processing.
+Yes, Ultralytics YOLO11 can process real-time video feeds to detect and crop objects dynamically. The model's high-speed inference capabilities make it ideal for real-time applications such as surveillance, sports analysis, and automated inspection systems. Check out the [tracking and prediction modes](../modes/predict.md) to understand how to implement real-time processing.
-### What are the hardware requirements for efficiently running YOLOv8 for object cropping?
+### What are the hardware requirements for efficiently running YOLO11 for object cropping?
-Ultralytics YOLOv8 is optimized for both CPU and GPU environments, but to achieve optimal performance, especially for real-time or high-volume inference, a dedicated GPU (e.g., NVIDIA Tesla, RTX series) is recommended. For deployment on lightweight devices, consider using CoreML for iOS or TFLite for Android. More details on supported devices and formats can be found in our [model deployment options](../guides/model-deployment-options.md).
+Ultralytics YOLO11 is optimized for both CPU and GPU environments, but to achieve optimal performance, especially for real-time or high-volume inference, a dedicated GPU (e.g., NVIDIA Tesla, RTX series) is recommended. For deployment on lightweight devices, consider using CoreML for iOS or TFLite for Android. More details on supported devices and formats can be found in our [model deployment options](../guides/model-deployment-options.md).
diff --git a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
index 154ec7a893a..cffeb223503 100644
--- a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
+++ b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
@@ -61,7 +61,7 @@ OpenVINO's multi-device mode simplifies scaling throughput by automatically bala
Optimizing Ultralytics YOLO models for latency and throughput with OpenVINO can significantly enhance your application's performance. By carefully applying the strategies outlined in this guide, developers can ensure their models run efficiently, meeting the demands of various deployment scenarios. Remember, the choice between optimizing for latency or throughput depends on your specific application needs and the characteristics of the deployment environment.
-For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/latest/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models.
+For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/2024/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models.
---
diff --git a/docs/en/guides/parking-management.md b/docs/en/guides/parking-management.md
index 78686bd0613..b6140181aef 100644
--- a/docs/en/guides/parking-management.md
+++ b/docs/en/guides/parking-management.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Optimize parking spaces and enhance safety with Ultralytics YOLOv8. Explore real-time vehicle detection and smart parking solutions.
-keywords: parking management, YOLOv8, Ultralytics, vehicle detection, real-time tracking, parking lot optimization, smart parking
+description: Optimize parking spaces and enhance safety with Ultralytics YOLO11. Explore real-time vehicle detection and smart parking solutions.
+keywords: parking management, YOLO11, Ultralytics, vehicle detection, real-time tracking, parking lot optimization, smart parking
---
-# Parking Management using Ultralytics YOLOv8 ๐
+# Parking Management using Ultralytics YOLO11 ๐
## What is Parking Management System?
-Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) ensures efficient and safe parking by organizing spaces and monitoring availability. YOLOv8 can improve parking lot management through real-time vehicle detection, and insights into parking occupancy.
+Parking management with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) ensures efficient and safe parking by organizing spaces and monitoring availability. YOLO11 can improve parking lot management through real-time vehicle detection, and insights into parking occupancy.
@@ -18,21 +18,21 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
allowfullscreen>
- Watch: How to Implement Parking Management Using Ultralytics YOLOv8 ๐
+ Watch: How to Implement Parking Management Using Ultralytics YOLO ๐
## Advantages of Parking Management System?
- **Efficiency**: Parking lot management optimizes the use of parking spaces and reduces congestion.
-- **Safety and Security**: Parking management using YOLOv8 improves the safety of both people and vehicles through surveillance and security measures.
-- **Reduced Emissions**: Parking management using YOLOv8 manages traffic flow to minimize idle time and emissions in parking lots.
+- **Safety and Security**: Parking management using YOLO11 improves the safety of both people and vehicles through surveillance and security measures.
+- **Reduced Emissions**: Parking management using YOLO11 manages traffic flow to minimize idle time and emissions in parking lots.
## Real World Applications
| Parking Management System | Parking Management System |
| :----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| Parking management Aerial View using Ultralytics YOLOv8 | Parking management Top View using Ultralytics YOLOv8 |
+|  |  |
+| Parking management Aerial View using Ultralytics YOLO11 | Parking management Top View using Ultralytics YOLO11 |
## Parking Management System Code Workflow
@@ -49,7 +49,7 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
Max Image Size of 1920 * 1080 supported
-!!! example "Parking slots Annotator Ultralytics YOLOv8"
+!!! example "Parking slots Annotator Ultralytics YOLO11"
=== "Parking Annotator"
@@ -61,11 +61,11 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
- After defining the parking areas with polygons, click `save` to store a JSON file with the data in your working directory.
-
+
### Python Code for Parking Management
-!!! example "Parking management using YOLOv8 Example"
+!!! example "Parking management using YOLO11 Example"
=== "Parking Management"
@@ -84,7 +84,7 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
# Initialize parking management object
parking_manager = solutions.ParkingManagement(
- model="yolov8n.pt", # path to model file
+ model="yolo11n.pt", # path to model file
json_file="bounding_boxes.json", # path to parking annotations file
)
@@ -102,12 +102,10 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
### Optional Arguments `ParkingManagement`
-| Name | Type | Default | Description |
-| ------------------------ | ------- | ------------- | -------------------------------------------------------------- |
-| `model` | `str` | `None` | Path to the YOLOv8 model. |
-| `json_file` | `str` | `None` | Path to the JSON file, that have all parking coordinates data. |
-| `occupied_region_color` | `tuple` | `(0, 0, 255)` | RGB color for occupied regions. |
-| `available_region_color` | `tuple` | `(0, 255, 0)` | RGB color for available regions. |
+| Name | Type | Default | Description |
+| ----------- | ----- | ------- | -------------------------------------------------------------- |
+| `model` | `str` | `None` | Path to the YOLO11 model. |
+| `json_file` | `str` | `None` | Path to the JSON file, that have all parking coordinates data. |
### Arguments `model.track`
@@ -115,33 +113,33 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr
## FAQ
-### How does Ultralytics YOLOv8 enhance parking management systems?
+### How does Ultralytics YOLO11 enhance parking management systems?
-Ultralytics YOLOv8 greatly enhances parking management systems by providing **real-time vehicle detection** and monitoring. This results in optimized usage of parking spaces, reduced congestion, and improved safety through continuous surveillance. The [Parking Management System](https://github.com/ultralytics/ultralytics) enables efficient traffic flow, minimizing idle times and emissions in parking lots, thereby contributing to environmental sustainability. For further details, refer to the [parking management code workflow](#python-code-for-parking-management).
+Ultralytics YOLO11 greatly enhances parking management systems by providing **real-time vehicle detection** and monitoring. This results in optimized usage of parking spaces, reduced congestion, and improved safety through continuous surveillance. The [Parking Management System](https://github.com/ultralytics/ultralytics) enables efficient traffic flow, minimizing idle times and emissions in parking lots, thereby contributing to environmental sustainability. For further details, refer to the [parking management code workflow](#python-code-for-parking-management).
-### What are the benefits of using Ultralytics YOLOv8 for smart parking?
+### What are the benefits of using Ultralytics YOLO11 for smart parking?
-Using Ultralytics YOLOv8 for smart parking yields numerous benefits:
+Using Ultralytics YOLO11 for smart parking yields numerous benefits:
- **Efficiency**: Optimizes the use of parking spaces and decreases congestion.
- **Safety and Security**: Enhances surveillance and ensures the safety of vehicles and pedestrians.
- **Environmental Impact**: Helps in reducing emissions by minimizing vehicle idle times. More details on the advantages can be seen [here](#advantages-of-parking-management-system).
-### How can I define parking spaces using Ultralytics YOLOv8?
+### How can I define parking spaces using Ultralytics YOLO11?
-Defining parking spaces is straightforward with Ultralytics YOLOv8:
+Defining parking spaces is straightforward with Ultralytics YOLO11:
1. Capture a frame from a video or camera stream.
2. Use the provided code to launch a GUI for selecting an image and drawing polygons to define parking spaces.
3. Save the labeled data in JSON format for further processing. For comprehensive instructions, check the [selection of points](#selection-of-points) section.
-### Can I customize the YOLOv8 model for specific parking management needs?
+### Can I customize the YOLO11 model for specific parking management needs?
-Yes, Ultralytics YOLOv8 allows customization for specific parking management needs. You can adjust parameters such as the **occupied and available region colors**, margins for text display, and much more. Utilizing the `ParkingManagement` class's [optional arguments](#optional-arguments-parkingmanagement), you can tailor the model to suit your particular requirements, ensuring maximum efficiency and effectiveness.
+Yes, Ultralytics YOLO11 allows customization for specific parking management needs. You can adjust parameters such as the **occupied and available region colors**, margins for text display, and much more. Utilizing the `ParkingManagement` class's [optional arguments](#optional-arguments-parkingmanagement), you can tailor the model to suit your particular requirements, ensuring maximum efficiency and effectiveness.
-### What are some real-world applications of Ultralytics YOLOv8 in parking lot management?
+### What are some real-world applications of Ultralytics YOLO11 in parking lot management?
-Ultralytics YOLOv8 is utilized in various real-world applications for parking lot management, including:
+Ultralytics YOLO11 is utilized in various real-world applications for parking lot management, including:
- **Parking Space Detection**: Accurately identifying available and occupied spaces.
- **Surveillance**: Enhancing security through real-time monitoring.
diff --git a/docs/en/guides/preprocessing_annotated_data.md b/docs/en/guides/preprocessing_annotated_data.md
index fcd329c7438..62f69694922 100644
--- a/docs/en/guides/preprocessing_annotated_data.md
+++ b/docs/en/guides/preprocessing_annotated_data.md
@@ -1,7 +1,7 @@
---
comments: true
description: Learn essential data preprocessing techniques for annotated computer vision data, including resizing, normalizing, augmenting, and splitting datasets for optimal model training.
-keywords: data preprocessing, computer vision, image resizing, normalization, data augmentation, training dataset, validation dataset, test dataset, YOLOv8
+keywords: data preprocessing, computer vision, image resizing, normalization, data augmentation, training dataset, validation dataset, test dataset, YOLO11
---
# Data Preprocessing Techniques for Annotated [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Data
@@ -36,7 +36,7 @@ To make resizing a simpler task, you can use the following tools:
- **[OpenCV](https://www.ultralytics.com/glossary/opencv)**: A popular computer vision library with extensive functions for image processing.
- **PIL (Pillow)**: A Python Imaging Library for opening, manipulating, and saving image files.
-With respect to YOLOv8, the 'imgsz' parameter during [model training](../modes/train.md) allows for flexible input sizes. When set to a specific size, such as 640, the model will resize input images so their largest dimension is 640 pixels while maintaining the original aspect ratio.
+With respect to YOLO11, the 'imgsz' parameter during [model training](../modes/train.md) allows for flexible input sizes. When set to a specific size, such as 640, the model will resize input images so their largest dimension is 640 pixels while maintaining the original aspect ratio.
By evaluating your model's and dataset's specific needs, you can determine whether resizing is a necessary preprocessing step or if your model can efficiently handle images of varying sizes.
@@ -47,7 +47,7 @@ Another preprocessing technique is normalization. Normalization scales the pixel
- **Min-Max Scaling**: Scales pixel values to a range of 0 to 1.
- **Z-Score Normalization**: Scales pixel values based on their mean and standard deviation.
-With respect to YOLOv8, normalization is seamlessly handled as part of its preprocessing pipeline during model training. YOLOv8 automatically performs several preprocessing steps, including conversion to RGB, scaling pixel values to the range [0, 1], and normalization using predefined mean and standard deviation values.
+With respect to YOLO11, normalization is seamlessly handled as part of its preprocessing pipeline during model training. YOLO11 automatically performs several preprocessing steps, including conversion to RGB, scaling pixel values to the range [0, 1], and normalization using predefined mean and standard deviation values.
### Splitting the Dataset
@@ -76,9 +76,9 @@ Common augmentation techniques include flipping, rotation, scaling, and color ad
-With respect to YOLOv8, you can [augment your custom dataset](../modes/train.md) by modifying the dataset configuration file, a .yaml file. In this file, you can add an augmentation section with parameters that specify how you want to augment your data.
+With respect to YOLO11, you can [augment your custom dataset](../modes/train.md) by modifying the dataset configuration file, a .yaml file. In this file, you can add an augmentation section with parameters that specify how you want to augment your data.
-The [Ultralytics YOLOv8 repository](https://github.com/ultralytics/ultralytics/tree/main) supports a wide range of data augmentations. You can apply various transformations such as:
+The [Ultralytics YOLO11 repository](https://github.com/ultralytics/ultralytics/tree/main) supports a wide range of data augmentations. You can apply various transformations such as:
- Random Crops
- Flipping: Images can be flipped horizontally or vertically.
@@ -89,12 +89,12 @@ Also, you can adjust the intensity of these augmentation techniques through spec
## A Case Study of Preprocessing
-Consider a project aimed at developing a model to detect and classify different types of vehicles in traffic images using YOLOv8. We've collected traffic images and annotated them with bounding boxes and labels.
+Consider a project aimed at developing a model to detect and classify different types of vehicles in traffic images using YOLO11. We've collected traffic images and annotated them with bounding boxes and labels.
Here's what each step of preprocessing would look like for this project:
-- Resizing Images: Since YOLOv8 handles flexible input sizes and performs resizing automatically, manual resizing is not required. The model will adjust the image size according to the specified 'imgsz' parameter during training.
-- Normalizing Pixel Values: YOLOv8 automatically normalizes pixel values to a range of 0 to 1 during preprocessing, so it's not required.
+- Resizing Images: Since YOLO11 handles flexible input sizes and performs resizing automatically, manual resizing is not required. The model will adjust the image size according to the specified 'imgsz' parameter during training.
+- Normalizing Pixel Values: YOLO11 automatically normalizes pixel values to a range of 0 to 1 during preprocessing, so it's not required.
- Splitting the Dataset: Divide the dataset into training (70%), validation (20%), and test (10%) sets using tools like scikit-learn.
- [Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation): Modify the dataset configuration file (.yaml) to include data augmentation techniques such as random crops, horizontal flips, and brightness adjustments.
@@ -120,6 +120,10 @@ Common tools for visualizations include:
### Using Ultralytics Explorer for EDA
+!!! warning "Community Note โ ๏ธ"
+
+ As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐
+
For a more advanced approach to EDA, you can use the Ultralytics Explorer tool. It offers robust capabilities for exploring computer vision datasets. By supporting semantic search, SQL queries, and vector similarity search, the tool makes it easy to analyze and understand your data. With Ultralytics Explorer, you can create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset to find similar images, run SQL queries for detailed analysis, and perform semantic searches, all through a user-friendly graphical interface.
@@ -132,12 +136,12 @@ Having discussions about your project with other computer vision enthusiasts can
### Channels to Connect with the Community
-- **GitHub Issues:** Visit the YOLOv8 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face.
+- **GitHub Issues:** Visit the YOLO11 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to connect with other users and developers, get support, share knowledge, and brainstorm ideas.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Refer to the [official YOLOv8 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects.
+- **Ultralytics YOLO11 Documentation:** Refer to the [official YOLO11 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects.
## Your Dataset Is Ready!
@@ -151,7 +155,7 @@ Data preprocessing is essential in computer vision projects because it ensures t
### How can I use Ultralytics YOLO for data augmentation?
-For data augmentation with Ultralytics YOLOv8, you need to modify the dataset configuration file (.yaml). In this file, you can specify various augmentation techniques such as random crops, horizontal flips, and brightness adjustments. This can be effectively done using the training configurations [explained here](../modes/train.md). Data augmentation helps create a more robust dataset, reduce [overfitting](https://www.ultralytics.com/glossary/overfitting), and improve model generalization.
+For data augmentation with Ultralytics YOLO11, you need to modify the dataset configuration file (.yaml). In this file, you can specify various augmentation techniques such as random crops, horizontal flips, and brightness adjustments. This can be effectively done using the training configurations [explained here](../modes/train.md). Data augmentation helps create a more robust dataset, reduce [overfitting](https://www.ultralytics.com/glossary/overfitting), and improve model generalization.
### What are the best data normalization techniques for computer vision data?
@@ -160,12 +164,12 @@ Normalization scales pixel values to a standard range for faster convergence and
- **Min-Max Scaling**: Scales pixel values to a range of 0 to 1.
- **Z-Score Normalization**: Scales pixel values based on their mean and standard deviation.
-For YOLOv8, normalization is handled automatically, including conversion to RGB and pixel value scaling. Learn more about it in the [model training section](../modes/train.md).
+For YOLO11, normalization is handled automatically, including conversion to RGB and pixel value scaling. Learn more about it in the [model training section](../modes/train.md).
### How should I split my annotated dataset for training?
To split your dataset, a common practice is to divide it into 70% for training, 20% for validation, and 10% for testing. It is important to maintain the data distribution of classes across these splits and avoid data leakage by performing augmentation only on the training set. Use tools like scikit-learn or [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) for efficient dataset splitting. See the detailed guide on [dataset preparation](../guides/data-collection-and-annotation.md).
-### Can I handle varying image sizes in YOLOv8 without manual resizing?
+### Can I handle varying image sizes in YOLO11 without manual resizing?
-Yes, Ultralytics YOLOv8 can handle varying image sizes through the 'imgsz' parameter during model training. This parameter ensures that images are resized so their largest dimension matches the specified size (e.g., 640 pixels), while maintaining the aspect ratio. For more flexible input handling and automatic adjustments, check the [model training section](../modes/train.md).
+Yes, Ultralytics YOLO11 can handle varying image sizes through the 'imgsz' parameter during model training. This parameter ensures that images are resized so their largest dimension matches the specified size (e.g., 640 pixels), while maintaining the aspect ratio. For more flexible input handling and automatic adjustments, check the [model training section](../modes/train.md).
diff --git a/docs/en/guides/queue-management.md b/docs/en/guides/queue-management.md
index 9fb4897edf3..c97d9eeaa4f 100644
--- a/docs/en/guides/queue-management.md
+++ b/docs/en/guides/queue-management.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to manage and optimize queues using Ultralytics YOLOv8 to reduce wait times and increase efficiency in various real-world applications.
-keywords: queue management, YOLOv8, Ultralytics, reduce wait times, efficiency, customer satisfaction, retail, airports, healthcare, banks
+description: Learn how to manage and optimize queues using Ultralytics YOLO11 to reduce wait times and increase efficiency in various real-world applications.
+keywords: queue management, YOLO11, Ultralytics, reduce wait times, efficiency, customer satisfaction, retail, airports, healthcare, banks
---
-# Queue Management using Ultralytics YOLOv8 ๐
+# Queue Management using Ultralytics YOLO11 ๐
## What is Queue Management?
-Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves organizing and controlling lines of people or vehicles to reduce wait times and enhance efficiency. It's about optimizing queues to improve customer satisfaction and system performance in various settings like retail, banks, airports, and healthcare facilities.
+Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves organizing and controlling lines of people or vehicles to reduce wait times and enhance efficiency. It's about optimizing queues to improve customer satisfaction and system performance in various settings like retail, banks, airports, and healthcare facilities.
@@ -18,7 +18,7 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra
allowfullscreen>
- Watch: How to Implement Queue Management with Ultralytics YOLOv8 | Airport and Metro Station
+ Watch: How to Implement Queue Management with Ultralytics YOLO11 | Airport and Metro Station
## Advantages of Queue Management?
@@ -30,104 +30,74 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra
| Logistics | Retail |
| :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| Queue management at airport ticket counter Using Ultralytics YOLOv8 | Queue monitoring in crowd Ultralytics YOLOv8 |
+|  |  |
+| Queue management at airport ticket counter Using Ultralytics YOLO11 | Queue monitoring in crowd Ultralytics YOLO11 |
-!!! example "Queue Management using YOLOv8 Example"
+!!! example "Queue Management using YOLO11 Example"
- === "Queue Manager"
+ === "CLI"
- ```python
- import cv2
+ ```bash
+ # Run a queue example
+ yolo solutions queue show=True
- from ultralytics import YOLO, solutions
+ # Pass a source video
+ yolo solutions queue source="path/to/video/file.mp4"
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
-
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
- queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
- queue = solutions.QueueManager(
- names=model.names,
- reg_pts=queue_region,
- line_thickness=3,
- )
-
- while cap.isOpened():
- success, im0 = cap.read()
-
- if success:
- tracks = model.track(im0, persist=True)
- out = queue.process_queue(im0, tracks)
-
- video_writer.write(im0)
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
- continue
-
- print("Video frame is empty or video processing has been successfully completed.")
- break
-
- cap.release()
- cv2.destroyAllWindows()
+ # Pass queue coordinates
+ yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
```
- === "Queue Manager Specific Classes"
+ === "Python"
```python
import cv2
- from ultralytics import YOLO, solutions
+ from ultralytics import solutions
- model = YOLO("yolov8n.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
+ cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+ # Video writer
video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+ # Define queue region points
+ queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Define queue region points
+ # queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)] # Define queue polygon points
+ # Init Queue Manager
queue = solutions.QueueManager(
- names=model.names,
- reg_pts=queue_region,
- line_thickness=3,
+ show=True, # Display the output
+ model="yolo11n.pt", # Path to the YOLO11 model file
+ region=queue_region, # Pass queue region points
+ # classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model.
+ # line_width=2, # Adjust the line width for bounding boxes and text display
)
+ # Process video
while cap.isOpened():
success, im0 = cap.read()
-
- if success:
- tracks = model.track(im0, persist=True, classes=0) # Only person class
- out = queue.process_queue(im0, tracks)
-
- video_writer.write(im0)
- if cv2.waitKey(1) & 0xFF == ord("q"):
- break
- continue
-
- print("Video frame is empty or video processing has been successfully completed.")
- break
+ if not success:
+ print("Video frame is empty or video processing has been successfully completed.")
+ break
+ out = queue.process_queue(im0)
+ video_writer.write(im0)
cap.release()
+ video_writer.release()
cv2.destroyAllWindows()
```
### Arguments `QueueManager`
-| Name | Type | Default | Description |
-| ---------------- | ---------------- | -------------------------- | -------------------------------------------------------------------------------- |
-| `names` | `dict` | `model.names` | A dictionary mapping class IDs to class names. |
-| `reg_pts` | `list of tuples` | `[(20, 400), (1260, 400)]` | Points defining the counting region polygon. Defaults to a predefined rectangle. |
-| `line_thickness` | `int` | `2` | Thickness of the annotation lines. |
-| `view_img` | `bool` | `False` | Whether to display the image frames. |
-| `draw_tracks` | `bool` | `False` | Whether to draw tracks of the objects. |
+| Name | Type | Default | Description |
+| ------------ | ------ | -------------------------- | ---------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the queue region. |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
### Arguments `model.track`
@@ -135,11 +105,11 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra
## FAQ
-### How can I use Ultralytics YOLOv8 for real-time queue management?
+### How can I use Ultralytics YOLO11 for real-time queue management?
-To use Ultralytics YOLOv8 for real-time queue management, you can follow these steps:
+To use Ultralytics YOLO11 for real-time queue management, you can follow these steps:
-1. Load the YOLOv8 model with `YOLO("yolov8n.pt")`.
+1. Load the YOLO11 model with `YOLO("yolo11n.pt")`.
2. Capture the video feed using `cv2.VideoCapture`.
3. Define the region of interest (ROI) for queue management.
4. Process frames to detect objects and manage queues.
@@ -149,23 +119,21 @@ Here's a minimal example:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture("path/to/video.mp4")
-queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
queue = solutions.QueueManager(
- names=model.names,
- reg_pts=queue_region,
- line_thickness=3,
+ model="yolo11n.pt",
+ region=queue_region,
+ line_width=3,
)
while cap.isOpened():
success, im0 = cap.read()
if success:
- tracks = model.track(im0, show=False, persist=True, verbose=False)
- out = queue.process_queue(im0, tracks)
+ out = queue.process_queue(im0)
cv2.imshow("Queue Management", im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
@@ -176,9 +144,9 @@ cv2.destroyAllWindows()
Leveraging Ultralytics [HUB](https://docs.ultralytics.com/hub/) can streamline this process by providing a user-friendly platform for deploying and managing your queue management solution.
-### What are the key advantages of using Ultralytics YOLOv8 for queue management?
+### What are the key advantages of using Ultralytics YOLO11 for queue management?
-Using Ultralytics YOLOv8 for queue management offers several benefits:
+Using Ultralytics YOLO11 for queue management offers several benefits:
- **Plummeting Waiting Times:** Efficiently organizes queues, reducing customer wait times and boosting satisfaction.
- **Enhancing Efficiency:** Analyzes queue data to optimize staff deployment and operations, thereby reducing costs.
@@ -187,37 +155,37 @@ Using Ultralytics YOLOv8 for queue management offers several benefits:
For more details, explore our [Queue Management](https://docs.ultralytics.com/reference/solutions/queue_management/) solutions.
-### Why should I choose Ultralytics YOLOv8 over competitors like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or Detectron2 for queue management?
+### Why should I choose Ultralytics YOLO11 over competitors like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or Detectron2 for queue management?
-Ultralytics YOLOv8 has several advantages over TensorFlow and Detectron2 for queue management:
+Ultralytics YOLO11 has several advantages over TensorFlow and Detectron2 for queue management:
-- **Real-time Performance:** YOLOv8 is known for its real-time detection capabilities, offering faster processing speeds.
+- **Real-time Performance:** YOLO11 is known for its real-time detection capabilities, offering faster processing speeds.
- **Ease of Use:** Ultralytics provides a user-friendly experience, from training to deployment, via [Ultralytics HUB](https://docs.ultralytics.com/hub/).
- **Pretrained Models:** Access to a range of pretrained models, minimizing the time needed for setup.
- **Community Support:** Extensive documentation and active community support make problem-solving easier.
Learn how to get started with [Ultralytics YOLO](https://docs.ultralytics.com/quickstart/).
-### Can Ultralytics YOLOv8 handle multiple types of queues, such as in airports and retail?
+### Can Ultralytics YOLO11 handle multiple types of queues, such as in airports and retail?
-Yes, Ultralytics YOLOv8 can manage various types of queues, including those in airports and retail environments. By configuring the QueueManager with specific regions and settings, YOLOv8 can adapt to different queue layouts and densities.
+Yes, Ultralytics YOLO11 can manage various types of queues, including those in airports and retail environments. By configuring the QueueManager with specific regions and settings, YOLO11 can adapt to different queue layouts and densities.
Example for airports:
```python
queue_region_airport = [(50, 600), (1200, 600), (1200, 550), (50, 550)]
queue_airport = solutions.QueueManager(
- names=model.names,
- reg_pts=queue_region_airport,
- line_thickness=3,
+ model="yolo11n.pt",
+ region=queue_region_airport,
+ line_width=3,
)
```
For more information on diverse applications, check out our [Real World Applications](#real-world-applications) section.
-### What are some real-world applications of Ultralytics YOLOv8 in queue management?
+### What are some real-world applications of Ultralytics YOLO11 in queue management?
-Ultralytics YOLOv8 is used in various real-world applications for queue management:
+Ultralytics YOLO11 is used in various real-world applications for queue management:
- **Retail:** Monitors checkout lines to reduce wait times and improve customer satisfaction.
- **Airports:** Manages queues at ticket counters and security checkpoints for a smoother passenger experience.
diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md
index c25557e8a3f..00b8d31572a 100644
--- a/docs/en/guides/raspberry-pi.md
+++ b/docs/en/guides/raspberry-pi.md
@@ -1,12 +1,13 @@
---
comments: true
-description: Learn how to deploy Ultralytics YOLOv8 on Raspberry Pi with our comprehensive guide. Get performance benchmarks, setup instructions, and best practices.
-keywords: Ultralytics, YOLOv8, Raspberry Pi, setup, guide, benchmarks, computer vision, object detection, NCNN, Docker, camera modules
+description: Learn how to deploy Ultralytics YOLO11 on Raspberry Pi with our comprehensive guide. Get performance benchmarks, setup instructions, and best practices.
+keywords: Ultralytics, YOLO11, Raspberry Pi, setup, guide, benchmarks, computer vision, object detection, NCNN, Docker, camera modules
+benchmark_version: 8.3.39
---
-# Quick Start Guide: Raspberry Pi with Ultralytics YOLOv8
+# Quick Start Guide: Raspberry Pi with Ultralytics YOLO11
-This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [Raspberry Pi](https://www.raspberrypi.com/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLOv8 on these small and powerful devices.
+This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [Raspberry Pi](https://www.raspberrypi.com/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLO11 on these small and powerful devices.
@@ -41,7 +42,7 @@ Raspberry Pi is a small, affordable, single-board computer. It has become popula
## What is Raspberry Pi OS?
-[Raspberry Pi OS](https://www.raspberrypi.com/software) (formerly known as Raspbian) is a Unix-like operating system based on the Debian GNU/Linux distribution for the Raspberry Pi family of compact single-board computers distributed by the Raspberry Pi Foundation. Raspberry Pi OS is highly optimized for the Raspberry Pi with ARM CPUs and uses a modified LXDE desktop environment with the Openbox stacking window manager. Raspberry Pi OS is under active development, with an emphasis on improving the stability and performance of as many Debian packages as possible on Raspberry Pi.
+[Raspberry Pi OS](https://www.raspberrypi.com/software/) (formerly known as Raspbian) is a Unix-like operating system based on the Debian GNU/Linux distribution for the Raspberry Pi family of compact single-board computers distributed by the Raspberry Pi Foundation. Raspberry Pi OS is highly optimized for the Raspberry Pi with ARM CPUs and uses a modified LXDE desktop environment with the Openbox stacking window manager. Raspberry Pi OS is under active development, with an emphasis on improving the stability and performance of as many Debian packages as possible on Raspberry Pi.
## Flash Raspberry Pi OS to Raspberry Pi
@@ -56,7 +57,7 @@ There are two ways of setting up Ultralytics package on Raspberry Pi to build yo
### Start with Docker
-The fastest way to get started with Ultralytics YOLOv8 on Raspberry Pi is to run with pre-built docker image for Raspberry Pi.
+The fastest way to get started with Ultralytics YOLO11 on Raspberry Pi is to run with pre-built docker image for Raspberry Pi.
Execute the below command to pull the Docker container and run on Raspberry Pi. This is based on [arm64v8/debian](https://hub.docker.com/r/arm64v8/debian) docker image which contains Debian 12 (Bookworm) in a Python3 environment.
@@ -94,11 +95,11 @@ Here we will install Ultralytics package on the Raspberry Pi with optional depen
## Use NCNN on Raspberry Pi
-Out of all the model export formats supported by Ultralytics, [NCNN](https://docs.ultralytics.com/integrations/ncnn/) delivers the best inference performance when working with Raspberry Pi devices because NCNN is highly optimized for mobile/ embedded platforms (such as ARM architecture). Therefor our recommendation is to use NCNN with Raspberry Pi.
+Out of all the model export formats supported by Ultralytics, [NCNN](https://docs.ultralytics.com/integrations/ncnn/) delivers the best inference performance when working with Raspberry Pi devices because NCNN is highly optimized for mobile/ embedded platforms (such as ARM architecture).
## Convert Model to NCNN and Run Inference
-The YOLOv8n model in PyTorch format is converted to NCNN to run inference with the exported model.
+The YOLO11n model in PyTorch format is converted to NCNN to run inference with the exported model.
!!! example
@@ -107,14 +108,14 @@ The YOLOv8n model in PyTorch format is converted to NCNN to run inference with t
```python
from ultralytics import YOLO
- # Load a YOLOv8n PyTorch model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
# Export the model to NCNN format
- model.export(format="ncnn") # creates 'yolov8n_ncnn_model'
+ model.export(format="ncnn") # creates 'yolo11n_ncnn_model'
# Load the exported NCNN model
- ncnn_model = YOLO("yolov8n_ncnn_model")
+ ncnn_model = YOLO("yolo11n_ncnn_model")
# Run inference
results = ncnn_model("https://ultralytics.com/images/bus.jpg")
@@ -123,102 +124,67 @@ The YOLOv8n model in PyTorch format is converted to NCNN to run inference with t
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to NCNN format
- yolo export model=yolov8n.pt format=ncnn # creates 'yolov8n_ncnn_model'
+ # Export a YOLO11n PyTorch model to NCNN format
+ yolo export model=yolo11n.pt format=ncnn # creates 'yolo11n_ncnn_model'
# Run inference with the exported model
- yolo predict model='yolov8n_ncnn_model' source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model='yolo11n_ncnn_model' source='https://ultralytics.com/images/bus.jpg'
```
!!! tip
For more details about supported export options, visit the [Ultralytics documentation page on deployment options](https://docs.ultralytics.com/guides/model-deployment-options/).
-## Raspberry Pi 5 vs Raspberry Pi 4 YOLOv8 Benchmarks
+## Raspberry Pi 5 YOLO11 Benchmarks
-YOLOv8 benchmarks were run by the Ultralytics team on nine different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on both Raspberry Pi 5 and Raspberry Pi 4 at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
-
-!!! note
-
- We have only included benchmarks for YOLOv8n and YOLOv8s models because other models sizes are too big to run on the Raspberry Pis and does not offer decent performance.
+YOLO11 benchmarks were run by the Ultralytics team on nine different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on a Raspberry Pi 5 at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
### Comparison Chart
-!!! tip "Performance"
-
- === "YOLOv8n"
-
-
-
-
-
- === "YOLOv8s"
+We have only included benchmarks for YOLO11n and YOLO11s models because other models sizes are too big to run on the Raspberry Pis and does not offer decent performance.
-
-
-
+
+
+ Benchmarked with Ultralytics {{ benchmark_version }}
+
### Detailed Comparison Table
-The below table represents the benchmark results for two different models (YOLOv8n, YOLOv8s) across nine different formats (PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), running on both Raspberry Pi 4 and Raspberry Pi 5, giving us the status, size, mAP50-95(B) metric, and inference time for each combination.
+The below table represents the benchmark results for two different models (YOLO11n, YOLO11s) across nine different formats (PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), running on a Raspberry Pi 5, giving us the status, size, mAP50-95(B) metric, and inference time for each combination.
!!! tip "Performance"
- === "YOLOv8n on RPi5"
-
- | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
- |---------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 6.2 | 0.6381 | 508.61 |
- | TorchScript | โ | 12.4 | 0.6092 | 558.38 |
- | ONNX | โ | 12.2 | 0.6092 | 198.69 |
- | OpenVINO | โ | 12.3 | 0.6092 | 704.70 |
- | TF SavedModel | โ | 30.6 | 0.6092 | 367.64 |
- | TF GraphDef | โ | 12.3 | 0.6092 | 473.22 |
- | TF Lite | โ | 12.3 | 0.6092 | 380.67 |
- | PaddlePaddle | โ | 24.4 | 0.6092 | 703.51 |
- | NCNN | โ | 12.2 | 0.6034 | 94.28 |
-
- === "YOLOv8s on RPi5"
-
- | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
- |---------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 21.5 | 0.6967 | 969.49 |
- | TorchScript | โ | 43.0 | 0.7136 | 1110.04 |
- | ONNX | โ | 42.8 | 0.7136 | 451.37 |
- | OpenVINO | โ | 42.9 | 0.7136 | 873.51 |
- | TF SavedModel | โ | 107.0 | 0.7136 | 658.15 |
- | TF GraphDef | โ | 42.8 | 0.7136 | 946.01 |
- | TF Lite | โ | 42.8 | 0.7136 | 1013.27 |
- | PaddlePaddle | โ | 85.5 | 0.7136 | 1560.23 |
- | NCNN | โ | 42.7 | 0.7204 | 211.26 |
-
- === "YOLOv8n on RPi4"
+ === "YOLO11n"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|---------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 6.2 | 0.6381 | 1068.42 |
- | TorchScript | โ | 12.4 | 0.6092 | 1248.01 |
- | ONNX | โ | 12.2 | 0.6092 | 560.04 |
- | OpenVINO | โ | 12.3 | 0.6092 | 534.93 |
- | TF SavedModel | โ | 30.6 | 0.6092 | 816.50 |
- | TF GraphDef | โ | 12.3 | 0.6092 | 1007.57 |
- | TF Lite | โ | 12.3 | 0.6092 | 950.29 |
- | PaddlePaddle | โ | 24.4 | 0.6092 | 1507.75 |
- | NCNN | โ | 12.2 | 0.6092 | 414.73 |
-
- === "YOLOv8s on RPi4"
+ | PyTorch | โ | 5.4 | 0.6100 | 405.238 |
+ | TorchScript | โ | 10.5 | 0.6082 | 526.628 |
+ | ONNX | โ | 10.2 | 0.6082 | 168.082 |
+ | OpenVINO | โ | 10.4 | 0.6082 | 81.192 |
+ | TF SavedModel | โ | 25.8 | 0.6082 | 377.968 |
+ | TF GraphDef | โ | 10.3 | 0.6082 | 487.244 |
+ | TF Lite | โ | 10.3 | 0.6082 | 317.398 |
+ | PaddlePaddle | โ | 20.4 | 0.6082 | 561.892 |
+ | MNN | โ | 10.1 | 0.6106 | 112.554 |
+ | NCNN | โ | 10.2 | 0.6106 | 88.026 |
+
+ === "YOLO11s"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|---------------|--------|-------------------|-------------|------------------------|
- | PyTorch | โ | 21.5 | 0.6967 | 2589.58 |
- | TorchScript | โ | 43.0 | 0.7136 | 2901.33 |
- | ONNX | โ | 42.8 | 0.7136 | 1436.33 |
- | OpenVINO | โ | 42.9 | 0.7136 | 1225.19 |
- | TF SavedModel | โ | 107.0 | 0.7136 | 1770.95 |
- | TF GraphDef | โ | 42.8 | 0.7136 | 2146.66 |
- | TF Lite | โ | 42.8 | 0.7136 | 2945.03 |
- | PaddlePaddle | โ | 85.5 | 0.7136 | 3962.62 |
- | NCNN | โ | 42.7 | 0.7136 | 1042.39 |
+ | PyTorch | โ | 18.4 | 0.7526 | 1011.60 |
+ | TorchScript | โ | 36.5 | 0.7416 | 1268.502 |
+ | ONNX | โ | 36.3 | 0.7416 | 324.17 |
+ | OpenVINO | โ | 36.4 | 0.7416 | 179.324 |
+ | TF SavedModel | โ | 91.1 | 0.7416 | 714.382 |
+ | TF GraphDef | โ | 36.4 | 0.7416 | 1019.83 |
+ | TF Lite | โ | 36.4 | 0.7416 | 849.86 |
+ | PaddlePaddle | โ | 72.5 | 0.7416 | 1276.34 |
+ | MNN | โ | 36.2 | 0.7409 | 273.032 |
+ | NCNN | โ | 36.2 | 0.7419 | 194.858 |
+
+ Benchmarked with Ultralytics {{ benchmark_version }}
## Reproduce Our Results
@@ -231,25 +197,25 @@ To reproduce the above Ultralytics benchmarks on all [export formats](../modes/e
```python
from ultralytics import YOLO
- # Load a YOLOv8n PyTorch model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
- # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats
- results = model.benchmarks(data="coco8.yaml", imgsz=640)
+ # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats
+ results = model.benchmark(data="coco8.yaml", imgsz=640)
```
=== "CLI"
```bash
- # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats
- yolo benchmark model=yolov8n.pt data=coco8.yaml imgsz=640
+ # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats
+ yolo benchmark model=yolo11n.pt data=coco8.yaml imgsz=640
```
Note that benchmarking results might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. For the most reliable results use a dataset with a large number of images, i.e. `data='coco8.yaml' (4 val images), or `data='coco.yaml'` (5000 val images).
## Use Raspberry Pi Camera
-When using Raspberry Pi for Computer Vision projects, it can be essentially to grab real-time video feeds to perform inference. The onboard MIPI CSI connector on the Raspberry Pi allows you to connect official Raspberry PI camera modules. In this guide, we have used a [Raspberry Pi Camera Module 3](https://www.raspberrypi.com/products/camera-module-3) to grab the video feeds and perform inference using YOLOv8 models.
+When using Raspberry Pi for Computer Vision projects, it can be essentially to grab real-time video feeds to perform inference. The onboard MIPI CSI connector on the Raspberry Pi allows you to connect official Raspberry PI camera modules. In this guide, we have used a [Raspberry Pi Camera Module 3](https://www.raspberrypi.com/products/camera-module-3/) to grab the video feeds and perform inference using YOLO11 models.
!!! tip
@@ -257,7 +223,7 @@ When using Raspberry Pi for Computer Vision projects, it can be essentially to g
!!! note
- Raspberry Pi 5 uses smaller CSI connectors than the Raspberry Pi 4 (15-pin vs 22-pin), so you will need a [15-pin to 22pin adapter cable](https://www.raspberrypi.com/products/camera-cable) to connect to a Raspberry Pi Camera.
+ Raspberry Pi 5 uses smaller CSI connectors than the Raspberry Pi 4 (15-pin vs 22-pin), so you will need a [15-pin to 22pin adapter cable](https://www.raspberrypi.com/products/camera-cable/) to connect to a Raspberry Pi Camera.
### Test the Camera
@@ -273,13 +239,13 @@ rpicam-hello
### Inference with Camera
-There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models.
+There are 2 methods of using the Raspberry Pi Camera to inference YOLO11 models.
!!! usage
=== "Method 1"
- We can use `picamera2`which comes pre-installed with Raspberry Pi OS to access the camera and inference YOLOv8 models.
+ We can use `picamera2`which comes pre-installed with Raspberry Pi OS to access the camera and inference YOLO11 models.
!!! example
@@ -299,14 +265,14 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models.
picam2.configure("preview")
picam2.start()
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
while True:
# Capture frame-by-frame
frame = picam2.capture_array()
- # Run YOLOv8 inference on the frame
+ # Run YOLO11 inference on the frame
results = model(frame)
# Visualize the results on the frame
@@ -340,8 +306,8 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models.
```python
from ultralytics import YOLO
- # Load a YOLOv8n PyTorch model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
# Run inference
results = model("tcp://127.0.0.1:8888")
@@ -350,7 +316,7 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models.
=== "CLI"
```bash
- yolo predict model=yolov8n.pt source="tcp://127.0.0.1:8888"
+ yolo predict model=yolo11n.pt source="tcp://127.0.0.1:8888"
```
!!! tip
@@ -359,7 +325,7 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models.
## Best Practices when using Raspberry Pi
-There are a couple of best practices to follow in order to enable maximum performance on Raspberry Pis running YOLOv8.
+There are a couple of best practices to follow in order to enable maximum performance on Raspberry Pis running YOLO11.
1. Use an SSD
@@ -371,7 +337,7 @@ There are a couple of best practices to follow in order to enable maximum perfor
## Next Steps
-Congratulations on successfully setting up YOLO on your Raspberry Pi! For further learning and support, visit [Ultralytics YOLOv8 Docs](../index.md) and [Kashmir World Foundation](https://www.kashmirworldfoundation.org/).
+Congratulations on successfully setting up YOLO on your Raspberry Pi! For further learning and support, visit [Ultralytics YOLO11 Docs](../index.md) and [Kashmir World Foundation](https://www.kashmirworldfoundation.org/).
## Acknowledgements and Citations
@@ -381,9 +347,9 @@ For more information about Kashmir World Foundation's activities, you can visit
## FAQ
-### How do I set up Ultralytics YOLOv8 on a Raspberry Pi without using Docker?
+### How do I set up Ultralytics YOLO11 on a Raspberry Pi without using Docker?
-To set up Ultralytics YOLOv8 on a Raspberry Pi without Docker, follow these steps:
+To set up Ultralytics YOLO11 on a Raspberry Pi without Docker, follow these steps:
1. Update the package list and install `pip`:
```bash
@@ -402,13 +368,13 @@ To set up Ultralytics YOLOv8 on a Raspberry Pi without Docker, follow these step
For detailed instructions, refer to the [Start without Docker](#start-without-docker) section.
-### Why should I use Ultralytics YOLOv8's NCNN format on Raspberry Pi for AI tasks?
+### Why should I use Ultralytics YOLO11's NCNN format on Raspberry Pi for AI tasks?
-Ultralytics YOLOv8's NCNN format is highly optimized for mobile and embedded platforms, making it ideal for running AI tasks on Raspberry Pi devices. NCNN maximizes inference performance by leveraging ARM architecture, providing faster and more efficient processing compared to other formats. For more details on supported export options, visit the [Ultralytics documentation page on deployment options](../modes/export.md).
+Ultralytics YOLO11's NCNN format is highly optimized for mobile and embedded platforms, making it ideal for running AI tasks on Raspberry Pi devices. NCNN maximizes inference performance by leveraging ARM architecture, providing faster and more efficient processing compared to other formats. For more details on supported export options, visit the [Ultralytics documentation page on deployment options](../modes/export.md).
-### How can I convert a YOLOv8 model to NCNN format for use on Raspberry Pi?
+### How can I convert a YOLO11 model to NCNN format for use on Raspberry Pi?
-You can convert a PyTorch YOLOv8 model to NCNN format using either Python or CLI commands:
+You can convert a PyTorch YOLO11 model to NCNN format using either Python or CLI commands:
!!! example
@@ -417,14 +383,14 @@ You can convert a PyTorch YOLOv8 model to NCNN format using either Python or CLI
```python
from ultralytics import YOLO
- # Load a YOLOv8n PyTorch model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n PyTorch model
+ model = YOLO("yolo11n.pt")
# Export the model to NCNN format
- model.export(format="ncnn") # creates 'yolov8n_ncnn_model'
+ model.export(format="ncnn") # creates 'yolo11n_ncnn_model'
# Load the exported NCNN model
- ncnn_model = YOLO("yolov8n_ncnn_model")
+ ncnn_model = YOLO("yolo11n_ncnn_model")
# Run inference
results = ncnn_model("https://ultralytics.com/images/bus.jpg")
@@ -433,16 +399,16 @@ You can convert a PyTorch YOLOv8 model to NCNN format using either Python or CLI
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to NCNN format
- yolo export model=yolov8n.pt format=ncnn # creates 'yolov8n_ncnn_model'
+ # Export a YOLO11n PyTorch model to NCNN format
+ yolo export model=yolo11n.pt format=ncnn # creates 'yolo11n_ncnn_model'
# Run inference with the exported model
- yolo predict model='yolov8n_ncnn_model' source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model='yolo11n_ncnn_model' source='https://ultralytics.com/images/bus.jpg'
```
For more details, see the [Use NCNN on Raspberry Pi](#use-ncnn-on-raspberry-pi) section.
-### What are the hardware differences between Raspberry Pi 4 and Raspberry Pi 5 relevant to running YOLOv8?
+### What are the hardware differences between Raspberry Pi 4 and Raspberry Pi 5 relevant to running YOLO11?
Key differences include:
@@ -450,11 +416,11 @@ Key differences include:
- **Max CPU Frequency**: Raspberry Pi 4 has a max frequency of 1.8GHz, whereas Raspberry Pi 5 reaches 2.4GHz.
- **Memory**: Raspberry Pi 4 offers up to 8GB of LPDDR4-3200 SDRAM, while Raspberry Pi 5 features LPDDR4X-4267 SDRAM, available in 4GB and 8GB variants.
-These enhancements contribute to better performance benchmarks for YOLOv8 models on Raspberry Pi 5 compared to Raspberry Pi 4. Refer to the [Raspberry Pi Series Comparison](#raspberry-pi-series-comparison) table for more details.
+These enhancements contribute to better performance benchmarks for YOLO11 models on Raspberry Pi 5 compared to Raspberry Pi 4. Refer to the [Raspberry Pi Series Comparison](#raspberry-pi-series-comparison) table for more details.
-### How can I set up a Raspberry Pi Camera Module to work with Ultralytics YOLOv8?
+### How can I set up a Raspberry Pi Camera Module to work with Ultralytics YOLO11?
-There are two methods to set up a Raspberry Pi Camera for YOLOv8 inference:
+There are two methods to set up a Raspberry Pi Camera for YOLO11 inference:
1. **Using `picamera2`**:
@@ -471,7 +437,7 @@ There are two methods to set up a Raspberry Pi Camera for YOLOv8 inference:
picam2.configure("preview")
picam2.start()
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
while True:
frame = picam2.capture_array()
@@ -494,7 +460,7 @@ There are two methods to set up a Raspberry Pi Camera for YOLOv8 inference:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
results = model("tcp://127.0.0.1:8888")
```
diff --git a/docs/en/guides/region-counting.md b/docs/en/guides/region-counting.md
index a27c2b4e533..3d2b9fbb7ed 100644
--- a/docs/en/guides/region-counting.md
+++ b/docs/en/guides/region-counting.md
@@ -1,24 +1,24 @@
---
comments: true
-description: Learn how to use Ultralytics YOLOv8 for precise object counting in specified regions, enhancing efficiency across various applications.
-keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, efficiency, accuracy, automation, real-time, applications, surveillance, monitoring
+description: Learn how to use Ultralytics YOLO11 for precise object counting in specified regions, enhancing efficiency across various applications.
+keywords: object counting, regions, YOLO11, computer vision, Ultralytics, efficiency, accuracy, automation, real-time, applications, surveillance, monitoring
---
-# Object Counting in Different Regions using Ultralytics YOLOv8 ๐
+# Object Counting in Different Regions using Ultralytics YOLO ๐
## What is Object Counting in Regions?
-[Object counting](../guides/object-counting.md) in regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves precisely determining the number of objects within specified areas using advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This approach is valuable for optimizing processes, enhancing security, and improving efficiency in various applications.
+[Object counting](../guides/object-counting.md) in regions with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves precisely determining the number of objects within specified areas using advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This approach is valuable for optimizing processes, enhancing security, and improving efficiency in various applications.
-
- Watch: Ultralytics YOLOv8 Object Counting in Multiple & Movable Regions
+ Watch: Object Counting in Different Regions using Ultralytics YOLO11 | Ultralytics Solutions ๐
## Advantages of Object Counting in Regions?
@@ -31,69 +31,79 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
| Retail | Market Streets |
| :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| People Counting in Different Region using Ultralytics YOLOv8 | Crowd Counting in Different Region using Ultralytics YOLOv8 |
+|  |  |
+| People Counting in Different Region using Ultralytics YOLO11 | Crowd Counting in Different Region using Ultralytics YOLO11 |
-## Steps to Run
+!!! example "Region Counting Example"
-### Step 1: Install Required Libraries
+ === "Python"
-Begin by cloning the Ultralytics repository, installing dependencies, and navigating to the local directory using the provided commands in Step 2.
+ ```python
+ import cv2
-```bash
-# Clone Ultralytics repo
-git clone https://github.com/ultralytics/ultralytics
+ from ultralytics import solutions
-# Navigate to the local directory
-cd ultralytics/examples/YOLOv8-Region-Counter
-```
+ cap = cv2.VideoCapture("Path/to/video/file.mp4")
+ assert cap.isOpened(), "Error reading video file"
+ w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-### Step 2: Run Region Counting Using Ultralytics YOLOv8
+ # Define region points
+ # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Pass region as list
-Execute the following basic commands for inference.
+ # pass region as dictionary
+ region_points = {
+ "region-01": [(50, 50), (250, 50), (250, 250), (50, 250)],
+ "region-02": [(640, 640), (780, 640), (780, 720), (640, 720)],
+ }
-???+ tip "Region is Movable"
+ # Video writer
+ video_writer = cv2.VideoWriter("region_counting.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- During video playback, you can interactively move the region within the video by clicking and dragging using the left mouse button.
+ # Init RegionCounter
+ region = solutions.RegionCounter(
+ show=True,
+ region=region_points,
+ model="yolo11n.pt",
+ )
-```bash
-# Save results
-python yolov8_region_counter.py --source "path/to/video.mp4" --save-img
+ # Process video
+ while cap.isOpened():
+ success, im0 = cap.read()
+ if not success:
+ print("Video frame is empty or video processing has been successfully completed.")
+ break
+ im0 = region.count(im0)
+ video_writer.write(im0)
-# Run model on CPU
-python yolov8_region_counter.py --source "path/to/video.mp4" --device cpu
+ cap.release()
+ video_writer.release()
+ cv2.destroyAllWindows()
+ ```
-# Change model file
-python yolov8_region_counter.py --source "path/to/video.mp4" --weights "path/to/model.pt"
+!!! tip "Ultralytics Example Code"
-# Detect specific classes (e.g., first and third classes)
-python yolov8_region_counter.py --source "path/to/video.mp4" --classes 0 2
+ The Ultralytics region counting module is available in our [examples section](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py). You can explore this example for code customization and modify it to suit your specific use case.
-# View results without saving
-python yolov8_region_counter.py --source "path/to/video.mp4" --view-img
-```
+### Argument `RegionCounter`
-### Optional Arguments
+Here's a table with the `RegionCounter` arguments:
-| Name | Type | Default | Description |
-| -------------------- | ------ | ------------ | --------------------------------------------------------------------------- |
-| `--source` | `str` | `None` | Path to video file, for webcam 0 |
-| `--line_thickness` | `int` | `2` | [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) thickness |
-| `--save-img` | `bool` | `False` | Save the predicted video/image |
-| `--weights` | `str` | `yolov8n.pt` | Weights file path |
-| `--classes` | `list` | `None` | Detect specific classes i.e. --classes 0 2 |
-| `--region-thickness` | `int` | `2` | Region Box thickness |
-| `--track-thickness` | `int` | `2` | Tracking line thickness |
+| Name | Type | Default | Description |
+| ------------ | ------ | -------------------------- | ---------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
## FAQ
-### What is object counting in specified regions using Ultralytics YOLOv8?
+### What is object counting in specified regions using Ultralytics YOLO11?
-Object counting in specified regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves detecting and tallying the number of objects within defined areas using advanced computer vision. This precise method enhances efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) across various applications like manufacturing, surveillance, and traffic monitoring.
+Object counting in specified regions with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) involves detecting and tallying the number of objects within defined areas using advanced computer vision. This precise method enhances efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) across various applications like manufacturing, surveillance, and traffic monitoring.
-### How do I run the object counting script with Ultralytics YOLOv8?
+### How do I run the region based object counting script with Ultralytics YOLO11?
-Follow these steps to run object counting in Ultralytics YOLOv8:
+Follow these steps to run object counting in Ultralytics YOLO11:
1. Clone the Ultralytics repository and navigate to the directory:
@@ -107,11 +117,11 @@ Follow these steps to run object counting in Ultralytics YOLOv8:
python yolov8_region_counter.py --source "path/to/video.mp4" --save-img
```
-For more options, visit the [Run Region Counting](#steps-to-run) section.
+For more options, visit the [Run Region Counting](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/readme.md) section.
-### Why should I use Ultralytics YOLOv8 for object counting in regions?
+### Why should I use Ultralytics YOLO11 for object counting in regions?
-Using Ultralytics YOLOv8 for object counting in regions offers several advantages:
+Using Ultralytics YOLO11 for object counting in regions offers several advantages:
- **Precision and Accuracy:** Minimizes errors often seen in manual counting.
- **Efficiency Improvement:** Provides real-time results and streamlines processes.
@@ -119,13 +129,9 @@ Using Ultralytics YOLOv8 for object counting in regions offers several advantage
Explore deeper benefits in the [Advantages](#advantages-of-object-counting-in-regions) section.
-### Can the defined regions be adjusted during video playback?
-
-Yes, with Ultralytics YOLOv8, regions can be interactively moved during video playback. Simply click and drag with the left mouse button to reposition the region. This feature enhances flexibility for dynamic environments. Learn more in the tip section for [movable regions](#step-2-run-region-counting-using-ultralytics-yolov8).
-
### What are some real-world applications of object counting in regions?
-Object counting with Ultralytics YOLOv8 can be applied to numerous real-world scenarios:
+Object counting with Ultralytics YOLO11 can be applied to numerous real-world scenarios:
- **Retail:** Counting people for foot traffic analysis.
- **Market Streets:** Crowd density management.
diff --git a/docs/en/guides/sahi-tiled-inference.md b/docs/en/guides/sahi-tiled-inference.md
index d44bce0253f..1cc3188a7d0 100644
--- a/docs/en/guides/sahi-tiled-inference.md
+++ b/docs/en/guides/sahi-tiled-inference.md
@@ -1,12 +1,14 @@
---
comments: true
-description: Learn how to implement YOLOv8 with SAHI for sliced inference. Optimize memory usage and enhance detection accuracy for large-scale applications.
-keywords: YOLOv8, SAHI, Sliced Inference, Object Detection, Ultralytics, High-resolution Images, Computational Efficiency, Integration Guide
+description: Learn how to implement YOLO11 with SAHI for sliced inference. Optimize memory usage and enhance detection accuracy for large-scale applications.
+keywords: YOLO11, SAHI, Sliced Inference, Object Detection, Ultralytics, High-resolution Images, Computational Efficiency, Integration Guide
---
-# Ultralytics Docs: Using YOLOv8 with SAHI for Sliced Inference
+# Ultralytics Docs: Using YOLO11 with SAHI for Sliced Inference
-Welcome to the Ultralytics documentation on how to use YOLOv8 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLOv8. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLOv8 for enhanced [object detection](https://www.ultralytics.com/glossary/object-detection) performance.
+
+
+Welcome to the Ultralytics documentation on how to use YOLO11 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLO11. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLO11 for enhanced [object detection](https://www.ultralytics.com/glossary/object-detection) performance.
@@ -24,7 +26,7 @@ SAHI (Slicing Aided Hyper Inference) is an innovative library designed to optimi
allowfullscreen>
- Watch: Inference with SAHI (Slicing Aided Hyper Inference) using Ultralytics YOLOv8
+ Watch: Inference with SAHI (Slicing Aided Hyper Inference) using Ultralytics YOLO11
### Key Features of SAHI
@@ -47,12 +49,12 @@ Sliced Inference refers to the practice of subdividing a large or high-resolutio
-
YOLOv8 without SAHI
-
YOLOv8 with SAHI
+
YOLO11 without SAHI
+
YOLO11 with SAHI
-
-
+
+
@@ -68,7 +70,7 @@ pip install -U ultralytics sahi
### Import Modules and Download Resources
-Here's how to import the necessary modules and download a YOLOv8 model and some test images:
+Here's how to import the necessary modules and download a YOLO11 model and some test images:
```python
from pathlib import Path
@@ -78,11 +80,11 @@ from sahi import AutoDetectionModel
from sahi.predict import get_prediction, get_sliced_prediction, predict
from sahi.utils.cv import read_image
from sahi.utils.file import download_from_url
-from sahi.utils.yolov8 import download_yolov8s_model
+from sahi.utils.ultralytics import download_yolo11n_model
-# Download YOLOv8 model
-yolov8_model_path = "models/yolov8s.pt"
-download_yolov8s_model(yolov8_model_path)
+# Download YOLO11 model
+model_path = "models/yolo11n.pt"
+download_yolo11n_model(model_path)
# Download test images
download_from_url(
@@ -95,16 +97,16 @@ download_from_url(
)
```
-## Standard Inference with YOLOv8
+## Standard Inference with YOLO11
### Instantiate the Model
-You can instantiate a YOLOv8 model for object detection like this:
+You can instantiate a YOLO11 model for object detection like this:
```python
detection_model = AutoDetectionModel.from_pretrained(
- model_type="yolov8",
- model_path=yolov8_model_path,
+ model_type="ultralytics",
+ model_path=model_path,
confidence_threshold=0.3,
device="cpu", # or 'cuda:0'
)
@@ -119,7 +121,7 @@ Perform standard inference using an image path or a numpy image.
result = get_prediction("demo_data/small-vehicles1.jpeg", detection_model)
# With a numpy image
-result = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model)
+result_with_np_image = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model)
```
### Visualize Results
@@ -131,7 +133,7 @@ result.export_visuals(export_dir="demo_data/")
Image("demo_data/prediction_visual.png")
```
-## Sliced Inference with YOLOv8
+## Sliced Inference with YOLO11
Perform sliced inference by specifying the slice dimensions and overlap ratios:
@@ -167,8 +169,8 @@ For batch prediction on a directory of images:
```python
predict(
- model_type="yolov8",
- model_path="path/to/yolov8n.pt",
+ model_type="ultralytics",
+ model_path="path/to/yolo11n.pt",
model_device="cpu", # or 'cuda:0'
model_confidence_threshold=0.4,
source="path/to/dir",
@@ -179,7 +181,7 @@ predict(
)
```
-That's it! Now you're equipped to use YOLOv8 with SAHI for both standard and sliced inference.
+That's it! Now you're equipped to use YOLO11 with SAHI for both standard and sliced inference.
## Citations and Acknowledgments
@@ -204,23 +206,23 @@ We extend our thanks to the SAHI research group for creating and maintaining thi
## FAQ
-### How can I integrate YOLOv8 with SAHI for sliced inference in object detection?
+### How can I integrate YOLO11 with SAHI for sliced inference in object detection?
-Integrating Ultralytics YOLOv8 with SAHI (Slicing Aided Hyper Inference) for sliced inference optimizes your object detection tasks on high-resolution images by partitioning them into manageable slices. This approach improves memory usage and ensures high detection accuracy. To get started, you need to install the ultralytics and sahi libraries:
+Integrating Ultralytics YOLO11 with SAHI (Slicing Aided Hyper Inference) for sliced inference optimizes your object detection tasks on high-resolution images by partitioning them into manageable slices. This approach improves memory usage and ensures high detection accuracy. To get started, you need to install the ultralytics and sahi libraries:
```bash
pip install -U ultralytics sahi
```
-Then, download a YOLOv8 model and test images:
+Then, download a YOLO11 model and test images:
```python
from sahi.utils.file import download_from_url
-from sahi.utils.yolov8 import download_yolov8s_model
+from sahi.utils.ultralytics import download_yolo11n_model
-# Download YOLOv8 model
-yolov8_model_path = "models/yolov8s.pt"
-download_yolov8s_model(yolov8_model_path)
+# Download YOLO11 model
+model_path = "models/yolo11n.pt"
+download_yolo11n_model(model_path)
# Download test images
download_from_url(
@@ -229,11 +231,11 @@ download_from_url(
)
```
-For more detailed instructions, refer to our [Sliced Inference guide](#sliced-inference-with-yolov8).
+For more detailed instructions, refer to our [Sliced Inference guide](#sliced-inference-with-yolo11).
-### Why should I use SAHI with YOLOv8 for object detection on large images?
+### Why should I use SAHI with YOLO11 for object detection on large images?
-Using SAHI with Ultralytics YOLOv8 for object detection on large images offers several benefits:
+Using SAHI with Ultralytics YOLO11 for object detection on large images offers several benefits:
- **Reduced Computational Burden**: Smaller slices are faster to process and consume less memory, making it feasible to run high-quality detections on hardware with limited resources.
- **Maintained Detection Accuracy**: SAHI uses intelligent algorithms to merge overlapping boxes, preserving the detection quality.
@@ -241,9 +243,9 @@ Using SAHI with Ultralytics YOLOv8 for object detection on large images offers s
Learn more about the [benefits of sliced inference](#benefits-of-sliced-inference) in our documentation.
-### Can I visualize prediction results when using YOLOv8 with SAHI?
+### Can I visualize prediction results when using YOLO11 with SAHI?
-Yes, you can visualize prediction results when using YOLOv8 with SAHI. Here's how you can export and visualize the results:
+Yes, you can visualize prediction results when using YOLO11 with SAHI. Here's how you can export and visualize the results:
```python
from IPython.display import Image
@@ -252,11 +254,11 @@ result.export_visuals(export_dir="demo_data/")
Image("demo_data/prediction_visual.png")
```
-This command will save the visualized predictions to the specified directory and you can then load the image to view it in your notebook or application. For a detailed guide, check out the [Standard Inference section](#visualize-results).
+This command will save the visualized predictions to the specified directory, and you can then load the image to view it in your notebook or application. For a detailed guide, check out the [Standard Inference section](#visualize-results).
-### What features does SAHI offer for improving YOLOv8 object detection?
+### What features does SAHI offer for improving YOLO11 object detection?
-SAHI (Slicing Aided Hyper Inference) offers several features that complement Ultralytics YOLOv8 for object detection:
+SAHI (Slicing Aided Hyper Inference) offers several features that complement Ultralytics YOLO11 for object detection:
- **Seamless Integration**: SAHI easily integrates with YOLO models, requiring minimal code adjustments.
- **Resource Efficiency**: It partitions large images into smaller slices, which optimizes memory usage and speed.
@@ -264,9 +266,9 @@ SAHI (Slicing Aided Hyper Inference) offers several features that complement Ult
For a deeper understanding, read about SAHI's [key features](#key-features-of-sahi).
-### How do I handle large-scale inference projects using YOLOv8 and SAHI?
+### How do I handle large-scale inference projects using YOLO11 and SAHI?
-To handle large-scale inference projects using YOLOv8 and SAHI, follow these best practices:
+To handle large-scale inference projects using YOLO11 and SAHI, follow these best practices:
1. **Install Required Libraries**: Ensure that you have the latest versions of ultralytics and sahi.
2. **Configure Sliced Inference**: Determine the optimal slice dimensions and overlap ratios for your specific project.
@@ -278,8 +280,8 @@ Example for batch prediction:
from sahi.predict import predict
predict(
- model_type="yolov8",
- model_path="path/to/yolov8n.pt",
+ model_type="ultralytics",
+ model_path="path/to/yolo11n.pt",
model_device="cpu", # or 'cuda:0'
model_confidence_threshold=0.4,
source="path/to/dir",
diff --git a/docs/en/guides/security-alarm-system.md b/docs/en/guides/security-alarm-system.md
index 8ff085bf751..9eb4b07221e 100644
--- a/docs/en/guides/security-alarm-system.md
+++ b/docs/en/guides/security-alarm-system.md
@@ -1,17 +1,17 @@
---
comments: true
-description: Enhance your security with real-time object detection using Ultralytics YOLOv8. Reduce false positives and integrate seamlessly with existing systems.
-keywords: YOLOv8, Security Alarm System, real-time object detection, Ultralytics, computer vision, integration, false positives
+description: Enhance your security with real-time object detection using Ultralytics YOLO11. Reduce false positives and integrate seamlessly with existing systems.
+keywords: YOLO11, Security Alarm System, real-time object detection, Ultralytics, computer vision, integration, false positives
---
-# Security Alarm System Project Using Ultralytics YOLOv8
+# Security Alarm System Project Using Ultralytics YOLO11
-The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLOv8, developed by Ultralytics, provides real-time object detection, allowing the system to identify and respond to potential security threats promptly. This project offers several advantages:
+The Security Alarm System Project utilizing Ultralytics YOLO11 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLO11, developed by Ultralytics, provides real-time [object detection](https://www.ultralytics.com/glossary/object-detection), allowing the system to identify and respond to potential security threats promptly. This project offers several advantages:
-- **Real-time Detection:** YOLOv8's efficiency enables the Security Alarm System to detect and respond to security incidents in real-time, minimizing response time.
-- **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** YOLOv8 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system.
+- **Real-time Detection:** YOLO11's efficiency enables the Security Alarm System to detect and respond to security incidents in real-time, minimizing response time.
+- **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** YOLO11 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system.
- **Integration Capabilities:** The project can be seamlessly integrated with existing security infrastructure, providing an upgraded layer of intelligent surveillance.
@@ -22,155 +22,59 @@ The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanc
allowfullscreen>
- Watch: Security Alarm System Project with Ultralytics YOLOv8 [Object Detection](https://www.ultralytics.com/glossary/object-detection)
+ Watch: Security Alarm System Project with Ultralytics YOLO11 Object Detection
### Code
-#### Import Libraries
+???+ note
-```python
-import smtplib
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from time import time
+ App Password Generation is necessary
-import cv2
-import numpy as np
-import torch
+- Navigate to [App Password Generator](https://myaccount.google.com/apppasswords), designate an app name such as "security project," and obtain a 16-digit password. Copy this password and paste it into the designated `password` field in the code below.
-from ultralytics import YOLO
-from ultralytics.utils.plotting import Annotator, colors
-```
+!!! example "Security Alarm System using YOLO11 Example"
-#### Set up the parameters of the message
+ === "Python"
-???+ note
+ ```python
+ import cv2
- App Password Generation is necessary
+ from ultralytics import solutions
+
+ cap = cv2.VideoCapture("Path/to/video/file.mp4")
+ assert cap.isOpened(), "Error reading video file"
+
+ # Video writer
+ w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+ video_writer = cv2.VideoWriter("security_alarm_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+
+ from_email = "abc@gmail.com" # The sender email address
+ password = "---- ---- ---- ----" # 16-digits password generated via: https://myaccount.google.com/apppasswords
+ to_email = "xyz@gmail.com" # The receiver email address
+
+ # Init SecurityAlarm
+ security = solutions.SecurityAlarm(
+ show=True, # Display the output
+ model="yolo11n.pt", # i.e. YOLO11s.pt
+ records=1, # Total detections count to send an email about security
+ )
-- Navigate to [App Password Generator](https://myaccount.google.com/apppasswords), designate an app name such as "security project," and obtain a 16-digit password. Copy this password and paste it into the designated password field as instructed.
-
-```python
-password = ""
-from_email = "" # must match the email used to generate the password
-to_email = "" # receiver email
-```
-
-#### Server creation and authentication
-
-```python
-server = smtplib.SMTP("smtp.gmail.com: 587")
-server.starttls()
-server.login(from_email, password)
-```
-
-#### Email Send Function
-
-```python
-def send_email(to_email, from_email, object_detected=1):
- """Sends an email notification indicating the number of objects detected; defaults to 1 object."""
- message = MIMEMultipart()
- message["From"] = from_email
- message["To"] = to_email
- message["Subject"] = "Security Alert"
- # Add in the message body
- message_body = f"ALERT - {object_detected} objects has been detected!!"
-
- message.attach(MIMEText(message_body, "plain"))
- server.sendmail(from_email, to_email, message.as_string())
-```
-
-#### Object Detection and Alert Sender
-
-```python
-class ObjectDetection:
- def __init__(self, capture_index):
- """Initializes an ObjectDetection instance with a given camera index."""
- self.capture_index = capture_index
- self.email_sent = False
-
- # model information
- self.model = YOLO("yolov8n.pt")
-
- # visual information
- self.annotator = None
- self.start_time = 0
- self.end_time = 0
-
- # device information
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
-
- def predict(self, im0):
- """Run prediction using a YOLO model for the input image `im0`."""
- results = self.model(im0)
- return results
-
- def display_fps(self, im0):
- """Displays the FPS on an image `im0` by calculating and overlaying as white text on a black rectangle."""
- self.end_time = time()
- fps = 1 / np.round(self.end_time - self.start_time, 2)
- text = f"FPS: {int(fps)}"
- text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2)[0]
- gap = 10
- cv2.rectangle(
- im0,
- (20 - gap, 70 - text_size[1] - gap),
- (20 + text_size[0] + gap, 70 + gap),
- (255, 255, 255),
- -1,
- )
- cv2.putText(im0, text, (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)
-
- def plot_bboxes(self, results, im0):
- """Plots bounding boxes on an image given detection results; returns annotated image and class IDs."""
- class_ids = []
- self.annotator = Annotator(im0, 3, results[0].names)
- boxes = results[0].boxes.xyxy.cpu()
- clss = results[0].boxes.cls.cpu().tolist()
- names = results[0].names
- for box, cls in zip(boxes, clss):
- class_ids.append(cls)
- self.annotator.box_label(box, label=names[int(cls)], color=colors(int(cls), True))
- return im0, class_ids
-
- def __call__(self):
- """Executes object detection on video frames from a specified camera index, plotting bounding boxes and returning modified frames."""
- cap = cv2.VideoCapture(self.capture_index)
- assert cap.isOpened()
- cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
- cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
- frame_count = 0
- while True:
- self.start_time = time()
- ret, im0 = cap.read()
- assert ret
- results = self.predict(im0)
- im0, class_ids = self.plot_bboxes(results, im0)
-
- if len(class_ids) > 0: # Only send email If not sent before
- if not self.email_sent:
- send_email(to_email, from_email, len(class_ids))
- self.email_sent = True
- else:
- self.email_sent = False
-
- self.display_fps(im0)
- cv2.imshow("YOLOv8 Detection", im0)
- frame_count += 1
- if cv2.waitKey(5) & 0xFF == 27:
- break
- cap.release()
- cv2.destroyAllWindows()
- server.quit()
-```
-
-#### Call the Object Detection class and Run the Inference
-
-```python
-detector = ObjectDetection(capture_index=0)
-detector()
-```
+ security.authenticate(from_email, password, to_email) # Authenticate the email server
+
+ # Process video
+ while cap.isOpened():
+ success, im0 = cap.read()
+ if not success:
+ print("Video frame is empty or video processing has been successfully completed.")
+ break
+ im0 = security.monitor(im0)
+ video_writer.write(im0)
+
+ cap.release()
+ video_writer.release()
+ cv2.destroyAllWindows()
+ ```
That's it! When you execute the code, you'll receive a single notification on your email if any object is detected. The notification is sent immediately, not repeatedly. However, feel free to customize the code to suit your project requirements.
@@ -178,24 +82,39 @@ That's it! When you execute the code, you'll receive a single notification on yo
+### Arguments `SecurityAlarm`
+
+Here's a table with the `SecurityAlarm` arguments:
+
+| Name | Type | Default | Description |
+| ------------ | ------ | ------- | ------------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
+| `records` | `int` | `5` | Total detections count to send an email about security. |
+
+### Arguments `model.track`
+
+{% include "macros/track-args.md" %}
+
## FAQ
-### How does Ultralytics YOLOv8 improve the accuracy of a security alarm system?
+### How does Ultralytics YOLO11 improve the accuracy of a security alarm system?
-Ultralytics YOLOv8 enhances security alarm systems by delivering high-accuracy, real-time object detection. Its advanced algorithms significantly reduce false positives, ensuring that the system only responds to genuine threats. This increased reliability can be seamlessly integrated with existing security infrastructure, upgrading the overall surveillance quality.
+Ultralytics YOLO11 enhances security alarm systems by delivering high-accuracy, real-time object detection. Its advanced algorithms significantly reduce false positives, ensuring that the system only responds to genuine threats. This increased reliability can be seamlessly integrated with existing security infrastructure, upgrading the overall surveillance quality.
-### Can I integrate Ultralytics YOLOv8 with my existing security infrastructure?
+### Can I integrate Ultralytics YOLO11 with my existing security infrastructure?
-Yes, Ultralytics YOLOv8 can be seamlessly integrated with your existing security infrastructure. The system supports various modes and provides flexibility for customization, allowing you to enhance your existing setup with advanced object detection capabilities. For detailed instructions on integrating YOLOv8 in your projects, visit the [integration section](https://docs.ultralytics.com/integrations/).
+Yes, Ultralytics YOLO11 can be seamlessly integrated with your existing security infrastructure. The system supports various modes and provides flexibility for customization, allowing you to enhance your existing setup with advanced object detection capabilities. For detailed instructions on integrating YOLO11 in your projects, visit the [integration section](https://docs.ultralytics.com/integrations/).
-### What are the storage requirements for running Ultralytics YOLOv8?
+### What are the storage requirements for running Ultralytics YOLO11?
-Running Ultralytics YOLOv8 on a standard setup typically requires around 5GB of free disk space. This includes space for storing the YOLOv8 model and any additional dependencies. For cloud-based solutions, Ultralytics HUB offers efficient project management and dataset handling, which can optimize storage needs. Learn more about the [Pro Plan](../hub/pro.md) for enhanced features including extended storage.
+Running Ultralytics YOLO11 on a standard setup typically requires around 5GB of free disk space. This includes space for storing the YOLO11 model and any additional dependencies. For cloud-based solutions, Ultralytics HUB offers efficient project management and dataset handling, which can optimize storage needs. Learn more about the [Pro Plan](../hub/pro.md) for enhanced features including extended storage.
-### What makes Ultralytics YOLOv8 different from other object detection models like Faster R-CNN or SSD?
+### What makes Ultralytics YOLO11 different from other object detection models like Faster R-CNN or SSD?
-Ultralytics YOLOv8 provides an edge over models like Faster R-CNN or SSD with its real-time detection capabilities and higher accuracy. Its unique architecture allows it to process images much faster without compromising on [precision](https://www.ultralytics.com/glossary/precision), making it ideal for time-sensitive applications like security alarm systems. For a comprehensive comparison of object detection models, you can explore our [guide](https://docs.ultralytics.com/models/).
+Ultralytics YOLO11 provides an edge over models like Faster R-CNN or SSD with its real-time detection capabilities and higher accuracy. Its unique architecture allows it to process images much faster without compromising on [precision](https://www.ultralytics.com/glossary/precision), making it ideal for time-sensitive applications like security alarm systems. For a comprehensive comparison of object detection models, you can explore our [guide](https://docs.ultralytics.com/models/).
-### How can I reduce the frequency of false positives in my security system using Ultralytics YOLOv8?
+### How can I reduce the frequency of false positives in my security system using Ultralytics YOLO11?
-To reduce false positives, ensure your Ultralytics YOLOv8 model is adequately trained with a diverse and well-annotated dataset. Fine-tuning hyperparameters and regularly updating the model with new data can significantly improve detection accuracy. Detailed [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) techniques can be found in our [hyperparameter tuning guide](../guides/hyperparameter-tuning.md).
+To reduce false positives, ensure your Ultralytics YOLO11 model is adequately trained with a diverse and well-annotated dataset. Fine-tuning hyperparameters and regularly updating the model with new data can significantly improve detection accuracy. Detailed [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) techniques can be found in our [hyperparameter tuning guide](../guides/hyperparameter-tuning.md).
diff --git a/docs/en/guides/speed-estimation.md b/docs/en/guides/speed-estimation.md
index 6f3726c9219..16a0f710b9d 100644
--- a/docs/en/guides/speed-estimation.md
+++ b/docs/en/guides/speed-estimation.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to estimate object speed using Ultralytics YOLOv8 for applications in traffic control, autonomous navigation, and surveillance.
-keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision, traffic control, autonomous navigation, surveillance, security
+description: Learn how to estimate object speed using Ultralytics YOLO11 for applications in traffic control, autonomous navigation, and surveillance.
+keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision, traffic control, autonomous navigation, surveillance, security
---
-# Speed Estimation using Ultralytics YOLOv8 ๐
+# Speed Estimation using Ultralytics YOLO11 ๐
## What is Speed Estimation?
-[Speed estimation](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) is the process of calculating the rate of movement of an object within a given context, often employed in [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. Using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) you can now calculate the speed of object using [object tracking](../modes/track.md) alongside distance and time data, crucial for tasks like traffic and surveillance. The accuracy of speed estimation directly influences the efficiency and reliability of various applications, making it a key component in the advancement of intelligent systems and real-time decision-making processes.
+[Speed estimation](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) is the process of calculating the rate of movement of an object within a given context, often employed in [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. Using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) you can now calculate the speed of object using [object tracking](../modes/track.md) alongside distance and time data, crucial for tasks like traffic and surveillance. The accuracy of speed estimation directly influences the efficiency and reliability of various applications, making it a key component in the advancement of intelligent systems and real-time decision-making processes.
!!! tip "Check Out Our Blog"
- For deeper insights into speed estimation, check out our blog post: [Ultralytics YOLOv8 for Speed Estimation in Computer Vision Projects](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects)
+ For deeper insights into speed estimation, check out our blog post: [Ultralytics YOLO11 for Speed Estimation in Computer Vision Projects](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects)
## Advantages of Speed Estimation?
@@ -35,46 +35,56 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision
| Transportation | Transportation |
| :------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| Speed Estimation on Road using Ultralytics YOLOv8 | Speed Estimation on Bridge using Ultralytics YOLOv8 |
+|  |  |
+| Speed Estimation on Road using Ultralytics YOLO11 | Speed Estimation on Bridge using Ultralytics YOLO11 |
-!!! example "Speed Estimation using YOLOv8 Example"
+!!! example "Speed Estimation using YOLO11 Example"
- === "Speed Estimation"
+ === "CLI"
+
+ ```bash
+ # Run a speed example
+ yolo solutions speed show=True
+
+ # Pass a source video
+ yolo solutions speed source="path/to/video/file.mp4"
+
+ # Pass region coordinates
+ yolo solutions speed region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
+ ```
+
+ === "Python"
```python
import cv2
- from ultralytics import YOLO, solutions
+ from ultralytics import solutions
- model = YOLO("yolov8n.pt")
- names = model.model.names
-
- cap = cv2.VideoCapture("path/to/video/file.mp4")
+ cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
- video_writer = cv2.VideoWriter("speed_estimation.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+ video_writer = cv2.VideoWriter("speed_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- line_pts = [(0, 360), (1280, 360)]
+ # Define speed region points
+ speed_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
- # Init speed-estimation obj
- speed_obj = solutions.SpeedEstimator(
- reg_pts=line_pts,
- names=names,
- view_img=True,
+ speed = solutions.SpeedEstimator(
+ show=True, # Display the output
+ model="yolo11n.pt", # Path to the YOLO11 model file.
+ region=speed_region, # Pass region points
+ # classes=[0, 2], # If you want to estimate speed of specific classes.
+ # line_width=2, # Adjust the line width for bounding boxes and text display
)
+ # Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
-
- tracks = model.track(im0, persist=True)
-
- im0 = speed_obj.estimate_speed(im0, tracks)
+ out = speed.estimate_speed(im0)
video_writer.write(im0)
cap.release()
@@ -88,13 +98,12 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision
### Arguments `SpeedEstimator`
-| Name | Type | Default | Description |
-| ------------------ | ------ | -------------------------- | ---------------------------------------------------- |
-| `names` | `dict` | `None` | Dictionary of class names. |
-| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of region points for speed estimation. |
-| `view_img` | `bool` | `False` | Whether to display the image with annotations. |
-| `line_thickness` | `int` | `2` | Thickness of the lines for drawing boxes and tracks. |
-| `spdl_dist_thresh` | `int` | `10` | Distance threshold for speed calculation. |
+| Name | Type | Default | Description |
+| ------------ | ------ | -------------------------- | ---------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
### Arguments `model.track`
@@ -102,19 +111,16 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision
## FAQ
-### How do I estimate object speed using Ultralytics YOLOv8?
+### How do I estimate object speed using Ultralytics YOLO11?
-Estimating object speed with Ultralytics YOLOv8 involves combining [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking techniques. First, you need to detect objects in each frame using the YOLOv8 model. Then, track these objects across frames to calculate their movement over time. Finally, use the distance traveled by the object between frames and the frame rate to estimate its speed.
+Estimating object speed with Ultralytics YOLO11 involves combining [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking techniques. First, you need to detect objects in each frame using the YOLO11 model. Then, track these objects across frames to calculate their movement over time. Finally, use the distance traveled by the object between frames and the frame rate to estimate its speed.
**Example**:
```python
import cv2
-from ultralytics import YOLO, solutions
-
-model = YOLO("yolov8n.pt")
-names = model.model.names
+from ultralytics import solutions
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -122,17 +128,16 @@ video_writer = cv2.VideoWriter("speed_estimation.avi", cv2.VideoWriter_fourcc(*"
# Initialize SpeedEstimator
speed_obj = solutions.SpeedEstimator(
- reg_pts=[(0, 360), (1280, 360)],
- names=names,
- view_img=True,
+ region=[(0, 360), (1280, 360)],
+ model="yolo11n.pt",
+ show=True,
)
while cap.isOpened():
success, im0 = cap.read()
if not success:
break
- tracks = model.track(im0, persist=True, show=False)
- im0 = speed_obj.estimate_speed(im0, tracks)
+ im0 = speed_obj.estimate_speed(im0)
video_writer.write(im0)
cap.release()
@@ -142,43 +147,43 @@ cv2.destroyAllWindows()
For more details, refer to our [official blog post](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects).
-### What are the benefits of using Ultralytics YOLOv8 for speed estimation in traffic management?
+### What are the benefits of using Ultralytics YOLO11 for speed estimation in traffic management?
-Using Ultralytics YOLOv8 for speed estimation offers significant advantages in traffic management:
+Using Ultralytics YOLO11 for speed estimation offers significant advantages in traffic management:
- **Enhanced Safety**: Accurately estimate vehicle speeds to detect over-speeding and improve road safety.
-- **Real-Time Monitoring**: Benefit from YOLOv8's real-time object detection capability to monitor traffic flow and congestion effectively.
+- **Real-Time Monitoring**: Benefit from YOLO11's real-time object detection capability to monitor traffic flow and congestion effectively.
- **Scalability**: Deploy the model on various hardware setups, from edge devices to servers, ensuring flexible and scalable solutions for large-scale implementations.
For more applications, see [advantages of speed estimation](#advantages-of-speed-estimation).
-### Can YOLOv8 be integrated with other AI frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or [PyTorch](https://www.ultralytics.com/glossary/pytorch)?
+### Can YOLO11 be integrated with other AI frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or [PyTorch](https://www.ultralytics.com/glossary/pytorch)?
-Yes, YOLOv8 can be integrated with other AI frameworks like TensorFlow and PyTorch. Ultralytics provides support for exporting YOLOv8 models to various formats like ONNX, TensorRT, and CoreML, ensuring smooth interoperability with other ML frameworks.
+Yes, YOLO11 can be integrated with other AI frameworks like TensorFlow and PyTorch. Ultralytics provides support for exporting YOLO11 models to various formats like ONNX, TensorRT, and CoreML, ensuring smooth interoperability with other ML frameworks.
-To export a YOLOv8 model to ONNX format:
+To export a YOLO11 model to ONNX format:
```bash
-yolo export --weights yolov8n.pt --include onnx
+yolo export --weights yolo11n.pt --include onnx
```
Learn more about exporting models in our [guide on export](../modes/export.md).
-### How accurate is the speed estimation using Ultralytics YOLOv8?
+### How accurate is the speed estimation using Ultralytics YOLO11?
-The [accuracy](https://www.ultralytics.com/glossary/accuracy) of speed estimation using Ultralytics YOLOv8 depends on several factors, including the quality of the object tracking, the resolution and frame rate of the video, and environmental variables. While the speed estimator provides reliable estimates, it may not be 100% accurate due to variances in frame processing speed and object occlusion.
+The [accuracy](https://www.ultralytics.com/glossary/accuracy) of speed estimation using Ultralytics YOLO11 depends on several factors, including the quality of the object tracking, the resolution and frame rate of the video, and environmental variables. While the speed estimator provides reliable estimates, it may not be 100% accurate due to variances in frame processing speed and object occlusion.
**Note**: Always consider margin of error and validate the estimates with ground truth data when possible.
For further accuracy improvement tips, check the [Arguments `SpeedEstimator` section](#arguments-speedestimator).
-### Why choose Ultralytics YOLOv8 over other object detection models like TensorFlow Object Detection API?
+### Why choose Ultralytics YOLO11 over other object detection models like TensorFlow Object Detection API?
-Ultralytics YOLOv8 offers several advantages over other object detection models, such as the TensorFlow Object Detection API:
+Ultralytics YOLO11 offers several advantages over other object detection models, such as the TensorFlow Object Detection API:
-- **Real-Time Performance**: YOLOv8 is optimized for real-time detection, providing high speed and accuracy.
-- **Ease of Use**: Designed with a user-friendly interface, YOLOv8 simplifies model training and deployment.
+- **Real-Time Performance**: YOLO11 is optimized for real-time detection, providing high speed and accuracy.
+- **Ease of Use**: Designed with a user-friendly interface, YOLO11 simplifies model training and deployment.
- **Versatility**: Supports multiple tasks, including object detection, segmentation, and pose estimation.
-- **Community and Support**: YOLOv8 is backed by an active community and extensive documentation, ensuring developers have the resources they need.
+- **Community and Support**: YOLO11 is backed by an active community and extensive documentation, ensuring developers have the resources they need.
-For more information on the benefits of YOLOv8, explore our detailed [model page](../models/yolov8.md).
+For more information on the benefits of YOLO11, explore our detailed [model page](../models/yolov8.md).
diff --git a/docs/en/guides/steps-of-a-cv-project.md b/docs/en/guides/steps-of-a-cv-project.md
index 7f50440a37c..72676d72a54 100644
--- a/docs/en/guides/steps-of-a-cv-project.md
+++ b/docs/en/guides/steps-of-a-cv-project.md
@@ -18,15 +18,11 @@ Computer vision is a subfield of [artificial intelligence](https://www.ultralyti
allowfullscreen>
- Watch: How to Do [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Projects | A Step-by-Step Guide
+ Watch: How to Do Computer Vision Projects | A Step-by-Step Guide
Computer vision techniques like [object detection](../tasks/detect.md), [image classification](../tasks/classify.md), and [instance segmentation](../tasks/segment.md) can be applied across various industries, from [autonomous driving](https://www.ultralytics.com/solutions/ai-in-self-driving) to [medical imaging](https://www.ultralytics.com/solutions/ai-in-healthcare) to gain valuable insights.
-
-
-
-
Working on your own computer vision projects is a great way to understand and learn more about computer vision. However, a computer vision project can consist of many steps, and it might seem confusing at first. By the end of this guide, you'll be familiar with the steps involved in a computer vision project. We'll walk through everything from the beginning to the end of a project, explaining why each part is important. Let's get started and make your computer vision project a success!
## An Overview of a Computer Vision Project
@@ -147,7 +143,7 @@ It's important to keep in mind that proper dataset management is vital for effic
It's important to assess your model's performance using various metrics and refine it to improve [accuracy](https://www.ultralytics.com/glossary/accuracy). [Evaluating](../modes/val.md) helps identify areas where the model excels and where it may need improvement. Fine-tuning ensures the model is optimized for the best possible performance.
-- **[Performance Metrics](./yolo-performance-metrics.md):** Use metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), recall, and F1-score to evaluate your model's performance. These metrics provide insights into how well your model is making predictions.
+- **[Performance Metrics](./yolo-performance-metrics.md):** Use metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and F1-score to evaluate your model's performance. These metrics provide insights into how well your model is making predictions.
- **[Hyperparameter Tuning](./hyperparameter-tuning.md):** Adjust hyperparameters to optimize model performance. Techniques like grid search or random search can help find the best hyperparameter values.
- Fine-Tuning: Make small adjustments to the model architecture or training process to enhance performance. This might involve tweaking [learning rates](https://www.ultralytics.com/glossary/learning-rate), [batch sizes](https://www.ultralytics.com/glossary/batch-size), or other model parameters.
@@ -166,7 +162,7 @@ Once your model has been thoroughly tested, it's time to deploy it. Deployment i
- Setting Up the Environment: Configure the necessary infrastructure for your chosen deployment option, whether it's cloud-based (AWS, Google Cloud, Azure) or edge-based (local devices, IoT).
-- **[Exporting the Model](../modes/export.md):** Export your model to the appropriate format (e.g., ONNX, TensorRT, CoreML for YOLOv8) to ensure compatibility with your deployment platform.
+- **[Exporting the Model](../modes/export.md):** Export your model to the appropriate format (e.g., ONNX, TensorRT, CoreML for YOLO11) to ensure compatibility with your deployment platform.
- **Deploying the Model:** Deploy the model by setting up APIs or endpoints and integrating it with your application.
- **Ensuring Scalability**: Implement load balancers, auto-scaling groups, and monitoring tools to manage resources and handle increasing data and user requests.
@@ -188,12 +184,12 @@ Connecting with a community of computer vision enthusiasts can help you tackle a
### Community Resources
-- **GitHub Issues:** Check out the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The active community and maintainers are there to help with specific issues.
+- **GitHub Issues:** Check out the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The active community and maintainers are there to help with specific issues.
- **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to interact with other users and developers, get support, and share insights.
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Explore the [official YOLOv8 documentation](./index.md) for detailed guides with helpful tips on different computer vision tasks and projects.
+- **Ultralytics YOLO11 Documentation:** Explore the [official YOLO11 documentation](./index.md) for detailed guides with helpful tips on different computer vision tasks and projects.
Using these resources will help you overcome challenges and stay updated with the latest trends and best practices in the computer vision community.
@@ -229,7 +225,7 @@ After splitting, apply data augmentation techniques like rotation, scaling, and
### How can I export my trained computer vision model for deployment?
-Exporting your model ensures compatibility with different deployment platforms. Ultralytics provides multiple formats, including ONNX, TensorRT, and CoreML. To export your YOLOv8 model, follow this guide:
+Exporting your model ensures compatibility with different deployment platforms. Ultralytics provides multiple formats, including ONNX, TensorRT, and CoreML. To export your YOLO11 model, follow this guide:
- Use the `export` function with the desired format parameter.
- Ensure the exported model fits the specifications of your deployment environment (e.g., edge devices, cloud).
diff --git a/docs/en/guides/streamlit-live-inference.md b/docs/en/guides/streamlit-live-inference.md
index e8fb5c9165f..68fbe925a22 100644
--- a/docs/en/guides/streamlit-live-inference.md
+++ b/docs/en/guides/streamlit-live-inference.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to set up a real-time object detection application using Streamlit and Ultralytics YOLOv8. Follow this step-by-step guide to implement webcam-based object detection.
-keywords: Streamlit, YOLOv8, Real-time Object Detection, Streamlit Application, YOLOv8 Streamlit Tutorial, Webcam Object Detection
+description: Learn how to set up a real-time object detection application using Streamlit and Ultralytics YOLO11. Follow this step-by-step guide to implement webcam-based object detection.
+keywords: Streamlit, YOLO11, Real-time Object Detection, Streamlit Application, YOLO11 Streamlit Tutorial, Webcam Object Detection
---
-# Live Inference with Streamlit Application using Ultralytics YOLOv8
+# Live Inference with Streamlit Application using Ultralytics YOLO11
## Introduction
-Streamlit makes it simple to build and deploy interactive web applications. Combining this with Ultralytics YOLOv8 allows for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and analysis directly in your browser. YOLOv8 high accuracy and speed ensure seamless performance for live video streams, making it ideal for applications in security, retail, and beyond.
+Streamlit makes it simple to build and deploy interactive web applications. Combining this with Ultralytics YOLO11 allows for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and analysis directly in your browser. YOLO11 high accuracy and speed ensure seamless performance for live video streams, making it ideal for applications in security, retail, and beyond.
@@ -18,19 +18,19 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
allowfullscreen>
- Watch: How to Use Streamlit with Ultralytics for Real-Time [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) in Your Browser
+ Watch: How to Use Streamlit with Ultralytics for Real-Time Computer Vision in Your Browser
| Aquaculture | Animals husbandry |
| :----------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |
-| Fish Detection using Ultralytics YOLOv8 | Animals Detection using Ultralytics YOLOv8 |
+|  |  |
+| Fish Detection using Ultralytics YOLO11 | Animals Detection using Ultralytics YOLO11 |
## Advantages of Live Inference
-- **Seamless Real-Time Object Detection**: Streamlit combined with YOLOv8 enables real-time object detection directly from your webcam feed. This allows for immediate analysis and insights, making it ideal for applications requiring instant feedback.
+- **Seamless Real-Time Object Detection**: Streamlit combined with YOLO11 enables real-time object detection directly from your webcam feed. This allows for immediate analysis and insights, making it ideal for applications requiring instant feedback.
- **User-Friendly Deployment**: Streamlit's interactive interface makes it easy to deploy and use the application without extensive technical knowledge. Users can start live inference with a simple click, enhancing accessibility and usability.
-- **Efficient Resource Utilization**: YOLOv8 optimized algorithm ensure high-speed processing with minimal computational resources. This efficiency allows for smooth and reliable webcam inference even on standard hardware, making advanced computer vision accessible to a wider audience.
+- **Efficient Resource Utilization**: YOLO11 optimized algorithm ensure high-speed processing with minimal computational resources. This efficiency allows for smooth and reliable webcam inference even on standard hardware, making advanced computer vision accessible to a wider audience.
## Streamlit Application Code
@@ -40,23 +40,29 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
!!! example "Streamlit Application"
+ === "CLI"
+
+ ```bash
+ yolo solutions inference
+
+ yolo solutions inference model="path/to/model.pt"
+ ```
+
=== "Python"
```python
from ultralytics import solutions
- solutions.inference()
-
- ### Make sure to run the file using command `streamlit run `
- ```
+ inf = solutions.Inference(
+ model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLO11, or custom trained model
+ )
- === "CLI"
+ inf.inference()
- ```bash
- yolo streamlit-predict
+ ### Make sure to run the file using command `streamlit run `
```
-This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLOv8 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection.
+This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLO11 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection.
You can optionally supply a specific model in Python:
@@ -67,15 +73,18 @@ You can optionally supply a specific model in Python:
```python
from ultralytics import solutions
- # Pass a model as an argument
- solutions.inference(model="path/to/model.pt")
+ inf = solutions.Inference(
+ model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLO11, YOLOv10
+ )
+
+ inf.inference()
### Make sure to run the file using command `streamlit run `
```
## Conclusion
-By following this guide, you have successfully created a real-time object detection application using Streamlit and Ultralytics YOLOv8. This application allows you to experience the power of YOLOv8 in detecting objects through your webcam, with a user-friendly interface and the ability to stop the video stream at any time.
+By following this guide, you have successfully created a real-time object detection application using Streamlit and Ultralytics YOLO11. This application allows you to experience the power of YOLO11 in detecting objects through your webcam, with a user-friendly interface and the ability to stop the video stream at any time.
For further enhancements, you can explore adding more features such as recording the video stream, saving the annotated frames, or integrating with other computer vision libraries.
@@ -90,13 +99,13 @@ Engage with the community to learn more, troubleshoot issues, and share your pro
### Official Documentation
-- **Ultralytics YOLOv8 Documentation:** Refer to the [official YOLOv8 documentation](https://docs.ultralytics.com/) for comprehensive guides and insights on various computer vision tasks and projects.
+- **Ultralytics YOLO11 Documentation:** Refer to the [official YOLO11 documentation](https://docs.ultralytics.com/) for comprehensive guides and insights on various computer vision tasks and projects.
## FAQ
-### How can I set up a real-time object detection application using Streamlit and Ultralytics YOLOv8?
+### How can I set up a real-time object detection application using Streamlit and Ultralytics YOLO11?
-Setting up a real-time object detection application with Streamlit and Ultralytics YOLOv8 is straightforward. First, ensure you have the Ultralytics Python package installed using:
+Setting up a real-time object detection application with Streamlit and Ultralytics YOLO11 is straightforward. First, ensure you have the Ultralytics Python package installed using:
```bash
pip install ultralytics
@@ -111,7 +120,11 @@ Then, you can create a basic Streamlit application to run live inference:
```python
from ultralytics import solutions
- solutions.inference()
+ inf = solutions.Inference(
+ model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLO11, YOLOv10
+ )
+
+ inf.inference()
### Make sure to run the file using command `streamlit run `
```
@@ -119,34 +132,34 @@ Then, you can create a basic Streamlit application to run live inference:
=== "CLI"
```bash
- yolo streamlit-predict
+ yolo solutions inference
```
For more details on the practical setup, refer to the [Streamlit Application Code section](#streamlit-application-code) of the documentation.
-### What are the main advantages of using Ultralytics YOLOv8 with Streamlit for real-time object detection?
+### What are the main advantages of using Ultralytics YOLO11 with Streamlit for real-time object detection?
-Using Ultralytics YOLOv8 with Streamlit for real-time object detection offers several advantages:
+Using Ultralytics YOLO11 with Streamlit for real-time object detection offers several advantages:
- **Seamless Real-Time Detection**: Achieve high-[accuracy](https://www.ultralytics.com/glossary/accuracy), real-time object detection directly from webcam feeds.
- **User-Friendly Interface**: Streamlit's intuitive interface allows easy use and deployment without extensive technical knowledge.
-- **Resource Efficiency**: YOLOv8's optimized algorithms ensure high-speed processing with minimal computational resources.
+- **Resource Efficiency**: YOLO11's optimized algorithms ensure high-speed processing with minimal computational resources.
Discover more about these advantages [here](#advantages-of-live-inference).
### How do I deploy a Streamlit object detection application in my web browser?
-After coding your Streamlit application integrating Ultralytics YOLOv8, you can deploy it by running:
+After coding your Streamlit application integrating Ultralytics YOLO11, you can deploy it by running:
```bash
streamlit run
```
-This command will launch the application in your default web browser, enabling you to select YOLOv8 models, set confidence, and NMS thresholds, and start real-time object detection with a simple click. For a detailed guide, refer to the [Streamlit Application Code](#streamlit-application-code) section.
+This command will launch the application in your default web browser, enabling you to select YOLO11 models, set confidence, and NMS thresholds, and start real-time object detection with a simple click. For a detailed guide, refer to the [Streamlit Application Code](#streamlit-application-code) section.
-### What are some use cases for real-time object detection using Streamlit and Ultralytics YOLOv8?
+### What are some use cases for real-time object detection using Streamlit and Ultralytics YOLO11?
-Real-time object detection using Streamlit and Ultralytics YOLOv8 can be applied in various sectors:
+Real-time object detection using Streamlit and Ultralytics YOLO11 can be applied in various sectors:
- **Security**: Real-time monitoring for unauthorized access.
- **Retail**: Customer counting, shelf management, and more.
@@ -154,12 +167,12 @@ Real-time object detection using Streamlit and Ultralytics YOLOv8 can be applied
For more in-depth use cases and examples, explore [Ultralytics Solutions](https://docs.ultralytics.com/solutions/).
-### How does Ultralytics YOLOv8 compare to other object detection models like YOLOv5 and RCNNs?
+### How does Ultralytics YOLO11 compare to other object detection models like YOLOv5 and RCNNs?
-Ultralytics YOLOv8 provides several enhancements over prior models like YOLOv5 and RCNNs:
+Ultralytics YOLO11 provides several enhancements over prior models like YOLOv5 and RCNNs:
- **Higher Speed and Accuracy**: Improved performance for real-time applications.
- **Ease of Use**: Simplified interfaces and deployment.
- **Resource Efficiency**: Optimized for better speed with minimal computational requirements.
-For a comprehensive comparison, check [Ultralytics YOLOv8 Documentation](https://docs.ultralytics.com/models/yolov8/) and related blog posts discussing model performance.
+For a comprehensive comparison, check [Ultralytics YOLO11 Documentation](https://docs.ultralytics.com/models/yolov8/) and related blog posts discussing model performance.
diff --git a/docs/en/guides/trackzone.md b/docs/en/guides/trackzone.md
new file mode 100644
index 00000000000..09bd0c416a9
--- /dev/null
+++ b/docs/en/guides/trackzone.md
@@ -0,0 +1,173 @@
+---
+comments: true
+description: Discover how TrackZone leverages Ultralytics YOLO11 to precisely track objects within specific zones, enabling real-time insights for crowd analysis, surveillance, and targeted monitoring.
+keywords: TrackZone, object tracking, YOLO11, Ultralytics, real-time object detection, AI, deep learning, crowd analysis, surveillance, zone-based tracking, resource optimization
+---
+
+# TrackZone using Ultralytics YOLO11
+
+
+
+## What is TrackZone?
+
+TrackZone specializes in monitoring objects within designated areas of a frame instead of the whole frame. Built on [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/), it integrates object detection and tracking specifically within zones for videos and live camera feeds. YOLO11's advanced algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) technologies make it a perfect choice for real-time use cases, offering precise and efficient object tracking in applications like crowd monitoring and surveillance.
+
+
+
+
+
+ Watch: How to Track Objects in Region using Ultralytics YOLO11 | TrackZone ๐
+
+
+## Advantages of Object Tracking in Zones (TrackZone)
+
+- **Targeted Analysis:** Tracking objects within specific zones allows for more focused insights, enabling precise monitoring and analysis of areas of interest, such as entry points or restricted zones.
+- **Improved Efficiency:** By narrowing the tracking scope to defined zones, TrackZone reduces computational overhead, ensuring faster processing and optimal performance.
+- **Enhanced Security:** Zonal tracking improves surveillance by monitoring critical areas, aiding in the early detection of unusual activity or security breaches.
+- **Scalable Solutions:** The ability to focus on specific zones makes TrackZone adaptable to various scenarios, from retail spaces to industrial settings, ensuring seamless integration and scalability.
+
+## Real World Applications
+
+| Agriculture | Transportation |
+| :-----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|  |  |
+| Plants Tracking in Field Using Ultralytics YOLO11 | Vehicles Tracking on Road using Ultralytics YOLO11 |
+
+!!! example "TrackZone using YOLO11 Example"
+
+ === "CLI"
+
+ ```bash
+ # Run a trackzone example
+ yolo solutions trackzone show=True
+
+ # Pass a source video
+ yolo solutions trackzone show=True source="path/to/video/file.mp4"
+
+ # Pass region coordinates
+ yolo solutions trackzone show=True region=[(150, 150), (1130, 150), (1130, 570), (150, 570)]
+ ```
+
+ === "Python"
+
+ ```python
+ import cv2
+
+ from ultralytics import solutions
+
+ cap = cv2.VideoCapture("path/to/video/file.mp4")
+ assert cap.isOpened(), "Error reading video file"
+ w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+
+ # Define region points
+ region_points = [(150, 150), (1130, 150), (1130, 570), (150, 570)]
+
+ # Video writer
+ video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+
+ # Init TrackZone (Object Tracking in Zones, not complete frame)
+ trackzone = solutions.TrackZone(
+ show=True, # Display the output
+ region=region_points, # Pass region points
+ model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLOv9, YOLOv10
+ # line_width=2, # Adjust the line width for bounding boxes and text display
+ # classes=[0, 2], # If you want to count specific classes i.e. person and car with COCO pretrained model.
+ )
+
+ # Process video
+ while cap.isOpened():
+ success, im0 = cap.read()
+ if not success:
+ print("Video frame is empty or video processing has been successfully completed.")
+ break
+ im0 = trackzone.trackzone(im0)
+ video_writer.write(im0)
+
+ cap.release()
+ video_writer.release()
+ cv2.destroyAllWindows()
+ ```
+
+### Argument `TrackZone`
+
+Here's a table with the `TrackZone` arguments:
+
+| Name | Type | Default | Description |
+| ------------ | ------ | ---------------------------------------------------- | ---------------------------------------------------- |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
+| `region` | `list` | `[(150, 150), (1130, 150), (1130, 570), (150, 570)]` | List of points defining the object tracking region. |
+| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
+| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
+
+### Arguments `model.track`
+
+{% include "macros/track-args.md" %}
+
+## FAQ
+
+### How do I track objects in a specific area or zone of a video frame using Ultralytics YOLO11?
+
+Tracking objects in a defined area or zone of a video frame is straightforward with Ultralytics YOLO11. Simply use the command provided below to initiate tracking. This approach ensures efficient analysis and accurate results, making it ideal for applications like surveillance, crowd management, or any scenario requiring zonal tracking.
+
+```bash
+yolo solutions trackzone source="path/to/video/file.mp4" show=True
+```
+
+### How can I use TrackZone in Python with Ultralytics YOLO11?
+
+With just a few lines of code, you can set up object tracking in specific zones, making it easy to integrate into your projects.
+
+```python
+import cv2
+
+from ultralytics import solutions
+
+cap = cv2.VideoCapture("path/to/video/file.mp4")
+assert cap.isOpened(), "Error reading video file"
+w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+
+# Define region points
+region_points = [(150, 150), (1130, 150), (1130, 570), (150, 570)]
+
+# Video writer
+video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+
+# Init TrackZone (Object Tracking in Zones, not complete frame)
+trackzone = solutions.TrackZone(
+ show=True, # Display the output
+ region=region_points, # Pass region points
+ model="yolo11n.pt",
+)
+
+# Process video
+while cap.isOpened():
+ success, im0 = cap.read()
+ if not success:
+ print("Video frame is empty or video processing has been successfully completed.")
+ break
+ im0 = trackzone.trackzone(im0)
+ video_writer.write(im0)
+
+cap.release()
+video_writer.release()
+cv2.destroyAllWindows()
+```
+
+### How do I configure the zone points for video processing using Ultralytics TrackZone?
+
+Configuring zone points for video processing with Ultralytics TrackZone is simple and customizable. You can directly define and adjust the zones through a Python script, allowing precise control over the areas you want to monitor.
+
+```python
+# Define region points
+region_points = [(150, 150), (1130, 150), (1130, 570), (150, 570)]
+
+# Init TrackZone (Object Tracking in Zones, not complete frame)
+trackzone = solutions.TrackZone(
+ show=True, # Display the output
+ region=region_points, # Pass region points
+)
+```
diff --git a/docs/en/guides/triton-inference-server.md b/docs/en/guides/triton-inference-server.md
index d1c7921f68b..71b1eb1c137 100644
--- a/docs/en/guides/triton-inference-server.md
+++ b/docs/en/guides/triton-inference-server.md
@@ -1,12 +1,12 @@
---
comments: true
-description: Learn how to integrate Ultralytics YOLOv8 with NVIDIA Triton Inference Server for scalable, high-performance AI model deployment.
-keywords: Triton Inference Server, YOLOv8, Ultralytics, NVIDIA, deep learning, AI model deployment, ONNX, scalable inference
+description: Learn how to integrate Ultralytics YOLO11 with NVIDIA Triton Inference Server for scalable, high-performance AI model deployment.
+keywords: Triton Inference Server, YOLO11, Ultralytics, NVIDIA, deep learning, AI model deployment, ONNX, scalable inference
---
-# Triton Inference Server with Ultralytics YOLOv8
+# Triton Inference Server with Ultralytics YOLO11
-The [Triton Inference Server](https://developer.nvidia.com/triton-inference-server) (formerly known as TensorRT Inference Server) is an open-source software solution developed by NVIDIA. It provides a cloud inference solution optimized for NVIDIA GPUs. Triton simplifies the deployment of AI models at scale in production. Integrating Ultralytics YOLOv8 with Triton Inference Server allows you to deploy scalable, high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference workloads. This guide provides steps to set up and test the integration.
+The [Triton Inference Server](https://developer.nvidia.com/triton-inference-server) (formerly known as TensorRT Inference Server) is an open-source software solution developed by NVIDIA. It provides a cloud inference solution optimized for NVIDIA GPUs. Triton simplifies the deployment of AI models at scale in production. Integrating Ultralytics YOLO11 with Triton Inference Server allows you to deploy scalable, high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference workloads. This guide provides steps to set up and test the integration.
@@ -38,7 +38,7 @@ Ensure you have the following prerequisites before proceeding:
pip install tritonclient[all]
```
-## Exporting YOLOv8 to ONNX Format
+## Exporting YOLO11 to ONNX Format
Before deploying the model on Triton, it must be exported to the ONNX format. ONNX (Open Neural Network Exchange) is a format that allows models to be transferred between different deep learning frameworks. Use the `export` function from the `YOLO` class:
@@ -46,7 +46,17 @@ Before deploying the model on Triton, it must be exported to the ONNX format. ON
from ultralytics import YOLO
# Load a model
-model = YOLO("yolov8n.pt") # load an official model
+model = YOLO("yolo11n.pt") # load an official model
+
+# Retrieve metadata during export
+metadata = []
+
+
+def export_cb(exporter):
+ metadata.append(exporter.metadata)
+
+
+model.add_callback("on_export_end", export_cb)
# Export the model
onnx_file = model.export(format="onnx", dynamic=True)
@@ -80,6 +90,43 @@ The Triton Model Repository is a storage location where Triton can access and lo
# Create config file
(triton_model_path / "config.pbtxt").touch()
+
+ # (Optional) Enable TensorRT for GPU inference
+ # First run will be slow due to TensorRT engine conversion
+ data = """
+ optimization {
+ execution_accelerators {
+ gpu_execution_accelerator {
+ name: "tensorrt"
+ parameters {
+ key: "precision_mode"
+ value: "FP16"
+ }
+ parameters {
+ key: "max_workspace_size_bytes"
+ value: "3221225472"
+ }
+ parameters {
+ key: "trt_engine_cache_enable"
+ value: "1"
+ }
+ parameters {
+ key: "trt_engine_cache_path"
+ value: "/models/yolo/1"
+ }
+ }
+ }
+ }
+ parameters {
+ key: "metadata"
+ value: {
+ string_value: "%s"
+ }
+ }
+ """ % metadata[0]
+
+ with open(triton_model_path / "config.pbtxt", "w") as f:
+ f.write(data)
```
## Running Triton Inference Server
@@ -94,7 +141,7 @@ import time
from tritonclient.http import InferenceServerClient
# Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver
-tag = "nvcr.io/nvidia/tritonserver:23.09-py3" # 6.4 GB
+tag = "nvcr.io/nvidia/tritonserver:24.09-py3" # 8.57 GB
# Pull the image
subprocess.call(f"docker pull {tag}", shell=True)
@@ -102,7 +149,7 @@ subprocess.call(f"docker pull {tag}", shell=True)
# Run the Triton server and capture the container ID
container_id = (
subprocess.check_output(
- f"docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
+ f"docker run -d --rm --gpus 0 -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
shell=True,
)
.decode("utf-8")
@@ -141,21 +188,21 @@ subprocess.call(f"docker kill {container_id}", shell=True)
---
-By following the above steps, you can deploy and run Ultralytics YOLOv8 models efficiently on Triton Inference Server, providing a scalable and high-performance solution for deep learning inference tasks. If you face any issues or have further queries, refer to the [official Triton documentation](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html) or reach out to the Ultralytics community for support.
+By following the above steps, you can deploy and run Ultralytics YOLO11 models efficiently on Triton Inference Server, providing a scalable and high-performance solution for deep learning inference tasks. If you face any issues or have further queries, refer to the [official Triton documentation](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html) or reach out to the Ultralytics community for support.
## FAQ
-### How do I set up Ultralytics YOLOv8 with NVIDIA Triton Inference Server?
+### How do I set up Ultralytics YOLO11 with NVIDIA Triton Inference Server?
-Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) involves a few key steps:
+Setting up [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) involves a few key steps:
-1. **Export YOLOv8 to ONNX format**:
+1. **Export YOLO11 to ONNX format**:
```python
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt") # load an official model
+ model = YOLO("yolo11n.pt") # load an official model
# Export the model to ONNX format
onnx_file = model.export(format="onnx", dynamic=True)
@@ -187,13 +234,13 @@ Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) wit
from tritonclient.http import InferenceServerClient
# Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver
- tag = "nvcr.io/nvidia/tritonserver:23.09-py3"
+ tag = "nvcr.io/nvidia/tritonserver:24.09-py3"
subprocess.call(f"docker pull {tag}", shell=True)
container_id = (
subprocess.check_output(
- f"docker run -d --rm -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
+ f"docker run -d --rm --gpus 0 -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models",
shell=True,
)
.decode("utf-8")
@@ -209,21 +256,21 @@ Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) wit
time.sleep(1)
```
-This setup can help you efficiently deploy YOLOv8 models at scale on Triton Inference Server for high-performance AI model inference.
+This setup can help you efficiently deploy YOLO11 models at scale on Triton Inference Server for high-performance AI model inference.
-### What benefits does using Ultralytics YOLOv8 with NVIDIA Triton Inference Server offer?
+### What benefits does using Ultralytics YOLO11 with NVIDIA Triton Inference Server offer?
-Integrating [Ultralytics YOLOv8](../models/yolov8.md) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) provides several advantages:
+Integrating [Ultralytics YOLO11](../models/yolov8.md) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) provides several advantages:
- **Scalable AI Inference**: Triton allows serving multiple models from a single server instance, supporting dynamic model loading and unloading, making it highly scalable for diverse AI workloads.
- **High Performance**: Optimized for NVIDIA GPUs, Triton Inference Server ensures high-speed inference operations, perfect for real-time applications such as [object detection](https://www.ultralytics.com/glossary/object-detection).
- **Ensemble and Model Versioning**: Triton's ensemble mode enables combining multiple models to improve results, and its model versioning supports A/B testing and rolling updates.
-For detailed instructions on setting up and running YOLOv8 with Triton, you can refer to the [setup guide](#setting-up-triton-model-repository).
+For detailed instructions on setting up and running YOLO11 with Triton, you can refer to the [setup guide](#setting-up-triton-model-repository).
-### Why should I export my YOLOv8 model to ONNX format before using Triton Inference Server?
+### Why should I export my YOLO11 model to ONNX format before using Triton Inference Server?
-Using ONNX (Open Neural Network Exchange) format for your [Ultralytics YOLOv8](../models/yolov8.md) model before deploying it on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) offers several key benefits:
+Using ONNX (Open Neural Network Exchange) format for your [Ultralytics YOLO11](../models/yolov8.md) model before deploying it on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) offers several key benefits:
- **Interoperability**: ONNX format supports transfer between different deep learning frameworks (such as PyTorch, TensorFlow), ensuring broader compatibility.
- **Optimization**: Many deployment environments, including Triton, optimize for ONNX, enabling faster inference and better performance.
@@ -234,15 +281,15 @@ To export your model, use:
```python
from ultralytics import YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
onnx_file = model.export(format="onnx", dynamic=True)
```
You can follow the steps in the [exporting guide](../modes/export.md) to complete the process.
-### Can I run inference using the Ultralytics YOLOv8 model on Triton Inference Server?
+### Can I run inference using the Ultralytics YOLO11 model on Triton Inference Server?
-Yes, you can run inference using the [Ultralytics YOLOv8](../models/yolov8.md) model on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server). Once your model is set up in the Triton Model Repository and the server is running, you can load and run inference on your model as follows:
+Yes, you can run inference using the [Ultralytics YOLO11](../models/yolov8.md) model on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server). Once your model is set up in the Triton Model Repository and the server is running, you can load and run inference on your model as follows:
```python
from ultralytics import YOLO
@@ -254,14 +301,14 @@ model = YOLO("http://localhost:8000/yolo", task="detect")
results = model("path/to/image.jpg")
```
-For an in-depth guide on setting up and running Triton Server with YOLOv8, refer to the [running triton inference server](#running-triton-inference-server) section.
+For an in-depth guide on setting up and running Triton Server with YOLO11, refer to the [running triton inference server](#running-triton-inference-server) section.
-### How does Ultralytics YOLOv8 compare to [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and PyTorch models for deployment?
+### How does Ultralytics YOLO11 compare to [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and PyTorch models for deployment?
-[Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) offers several unique advantages compared to TensorFlow and PyTorch models for deployment:
+[Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) offers several unique advantages compared to TensorFlow and PyTorch models for deployment:
-- **Real-time Performance**: Optimized for real-time object detection tasks, YOLOv8 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, making it ideal for applications requiring live video analytics.
-- **Ease of Use**: YOLOv8 integrates seamlessly with Triton Inference Server and supports diverse export formats (ONNX, TensorRT, CoreML), making it flexible for various deployment scenarios.
-- **Advanced Features**: YOLOv8 includes features like dynamic model loading, model versioning, and ensemble inference, which are crucial for scalable and reliable AI deployments.
+- **Real-time Performance**: Optimized for real-time object detection tasks, YOLO11 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, making it ideal for applications requiring live video analytics.
+- **Ease of Use**: YOLO11 integrates seamlessly with Triton Inference Server and supports diverse export formats (ONNX, TensorRT, CoreML), making it flexible for various deployment scenarios.
+- **Advanced Features**: YOLO11 includes features like dynamic model loading, model versioning, and ensemble inference, which are crucial for scalable and reliable AI deployments.
For more details, compare the deployment options in the [model deployment guide](../modes/export.md).
diff --git a/docs/en/guides/view-results-in-terminal.md b/docs/en/guides/view-results-in-terminal.md
index b159e1a7f60..95d9850d8d4 100644
--- a/docs/en/guides/view-results-in-terminal.md
+++ b/docs/en/guides/view-results-in-terminal.md
@@ -58,7 +58,7 @@ The VSCode compatible protocols for viewing images using the integrated terminal
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Run inference on an image
results = model.predict(source="ultralytics/assets/bus.jpg")
@@ -116,7 +116,7 @@ from sixel import SixelWriter
from ultralytics import YOLO
# Load a model
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
# Run inference on an image
results = model.predict(source="ultralytics/assets/bus.jpg")
@@ -169,7 +169,7 @@ To view YOLO inference results in a VSCode terminal on macOS or Linux, follow th
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
results = model.predict(source="path_to_image")
plot = results[0].plot()
```
diff --git a/docs/en/guides/vision-eye.md b/docs/en/guides/vision-eye.md
index 9dfc036cd35..db449c64807 100644
--- a/docs/en/guides/vision-eye.md
+++ b/docs/en/guides/vision-eye.md
@@ -1,23 +1,23 @@
---
comments: true
-description: Discover VisionEye's object mapping and tracking powered by Ultralytics YOLOv8. Simulate human eye precision, track objects, and calculate distances effortlessly.
-keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, distance calculation, computer vision, AI, machine learning, Python, tutorial
+description: Discover VisionEye's object mapping and tracking powered by Ultralytics YOLO11. Simulate human eye precision, track objects, and calculate distances effortlessly.
+keywords: VisionEye, YOLO11, Ultralytics, object mapping, object tracking, distance calculation, computer vision, AI, machine learning, Python, tutorial
---
-# VisionEye View Object Mapping using Ultralytics YOLOv8 ๐
+# VisionEye View Object Mapping using Ultralytics YOLO11 ๐
## What is VisionEye Object Mapping?
-[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) VisionEye offers the capability for computers to identify and pinpoint objects, simulating the observational [precision](https://www.ultralytics.com/glossary/precision) of the human eye. This functionality enables computers to discern and focus on specific objects, much like the way the human eye observes details from a particular viewpoint.
+[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) VisionEye offers the capability for computers to identify and pinpoint objects, simulating the observational [precision](https://www.ultralytics.com/glossary/precision) of the human eye. This functionality enables computers to discern and focus on specific objects, much like the way the human eye observes details from a particular viewpoint.
## Samples
| VisionEye View | VisionEye View With Object Tracking | VisionEye View With Distance Calculation |
| :----------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|  |  |  |
-| VisionEye View Object Mapping using Ultralytics YOLOv8 | VisionEye View Object Mapping with Object Tracking using Ultralytics YOLOv8 | VisionEye View with Distance Calculation using Ultralytics YOLOv8 |
+|  |  |  |
+| VisionEye View Object Mapping using Ultralytics YOLO11 | VisionEye View Object Mapping with Object Tracking using Ultralytics YOLO11 | VisionEye View with Distance Calculation using Ultralytics YOLO11 |
-!!! example "VisionEye Object Mapping using YOLOv8"
+!!! example "VisionEye Object Mapping using YOLO11"
=== "VisionEye Object Mapping"
@@ -27,7 +27,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
names = model.model.names
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -71,7 +71,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -118,7 +118,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolov8s.pt")
+ model = YOLO("yolo11n.pt")
cap = cv2.VideoCapture("Path/to/video/file.mp4")
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
@@ -180,16 +180,16 @@ For any inquiries, feel free to post your questions in the [Ultralytics Issue Se
## FAQ
-### How do I start using VisionEye Object Mapping with Ultralytics YOLOv8?
+### How do I start using VisionEye Object Mapping with Ultralytics YOLO11?
-To start using VisionEye Object Mapping with Ultralytics YOLOv8, first, you'll need to install the Ultralytics YOLO package via pip. Then, you can use the sample code provided in the documentation to set up [object detection](https://www.ultralytics.com/glossary/object-detection) with VisionEye. Here's a simple example to get you started:
+To start using VisionEye Object Mapping with Ultralytics YOLO11, first, you'll need to install the Ultralytics YOLO package via pip. Then, you can use the sample code provided in the documentation to set up [object detection](https://www.ultralytics.com/glossary/object-detection) with VisionEye. Here's a simple example to get you started:
```python
import cv2
from ultralytics import YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
while True:
@@ -210,12 +210,12 @@ cap.release()
cv2.destroyAllWindows()
```
-### What are the key features of VisionEye's object tracking capability using Ultralytics YOLOv8?
+### What are the key features of VisionEye's object tracking capability using Ultralytics YOLO11?
-VisionEye's object tracking with Ultralytics YOLOv8 allows users to follow the movement of objects within a video frame. Key features include:
+VisionEye's object tracking with Ultralytics YOLO11 allows users to follow the movement of objects within a video frame. Key features include:
1. **Real-Time Object Tracking**: Keeps up with objects as they move.
-2. **Object Identification**: Utilizes YOLOv8's powerful detection algorithms.
+2. **Object Identification**: Utilizes YOLO11's powerful detection algorithms.
3. **Distance Calculation**: Calculates distances between objects and specified points.
4. **Annotation and Visualization**: Provides visual markers for tracked objects.
@@ -226,7 +226,7 @@ import cv2
from ultralytics import YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
while True:
@@ -249,9 +249,9 @@ cv2.destroyAllWindows()
For a comprehensive guide, visit the [VisionEye Object Mapping with Object Tracking](#samples).
-### How can I calculate distances with VisionEye's YOLOv8 model?
+### How can I calculate distances with VisionEye's YOLO11 model?
-Distance calculation with VisionEye and Ultralytics YOLOv8 involves determining the distance of detected objects from a specified point in the frame. It enhances spatial analysis capabilities, useful in applications such as autonomous driving and surveillance.
+Distance calculation with VisionEye and Ultralytics YOLO11 involves determining the distance of detected objects from a specified point in the frame. It enhances spatial analysis capabilities, useful in applications such as autonomous driving and surveillance.
Here's a simplified example:
@@ -262,7 +262,7 @@ import cv2
from ultralytics import YOLO
-model = YOLO("yolov8s.pt")
+model = YOLO("yolo11n.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
center_point = (0, 480) # Example center point
pixel_per_meter = 10
@@ -290,19 +290,19 @@ cv2.destroyAllWindows()
For detailed instructions, refer to the [VisionEye with Distance Calculation](#samples).
-### Why should I use Ultralytics YOLOv8 for object mapping and tracking?
+### Why should I use Ultralytics YOLO11 for object mapping and tracking?
-Ultralytics YOLOv8 is renowned for its speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of integration, making it a top choice for object mapping and tracking. Key advantages include:
+Ultralytics YOLO11 is renowned for its speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of integration, making it a top choice for object mapping and tracking. Key advantages include:
1. **State-of-the-art Performance**: Delivers high accuracy in real-time object detection.
2. **Flexibility**: Supports various tasks such as detection, tracking, and distance calculation.
3. **Community and Support**: Extensive documentation and active GitHub community for troubleshooting and enhancements.
4. **Ease of Use**: Intuitive API simplifies complex tasks, allowing for rapid deployment and iteration.
-For more information on applications and benefits, check out the [Ultralytics YOLOv8 documentation](https://docs.ultralytics.com/models/yolov8/).
+For more information on applications and benefits, check out the [Ultralytics YOLO11 documentation](https://docs.ultralytics.com/models/yolov8/).
### How can I integrate VisionEye with other [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) tools like Comet or ClearML?
-Ultralytics YOLOv8 can integrate seamlessly with various machine learning tools like Comet and ClearML, enhancing experiment tracking, collaboration, and reproducibility. Follow the detailed guides on [how to use YOLOv5 with Comet](https://www.ultralytics.com/blog/how-to-use-yolov5-with-comet) and [integrate YOLOv8 with ClearML](https://docs.ultralytics.com/integrations/clearml/) to get started.
+Ultralytics YOLO11 can integrate seamlessly with various machine learning tools like Comet and ClearML, enhancing experiment tracking, collaboration, and reproducibility. Follow the detailed guides on [how to use YOLOv5 with Comet](https://www.ultralytics.com/blog/how-to-use-yolov5-with-comet) and [integrate YOLO11 with ClearML](https://docs.ultralytics.com/integrations/clearml/) to get started.
For further exploration and integration examples, check our [Ultralytics Integrations Guide](https://docs.ultralytics.com/integrations/).
diff --git a/docs/en/guides/workouts-monitoring.md b/docs/en/guides/workouts-monitoring.md
index 45856316833..3919a708e70 100644
--- a/docs/en/guides/workouts-monitoring.md
+++ b/docs/en/guides/workouts-monitoring.md
@@ -1,12 +1,14 @@
---
comments: true
-description: Optimize your fitness routine with real-time workouts monitoring using Ultralytics YOLOv8. Track and improve your exercise form and performance.
-keywords: workouts monitoring, Ultralytics YOLOv8, pose estimation, fitness tracking, exercise assessment, real-time feedback, exercise form, performance metrics
+description: Optimize your fitness routine with real-time workouts monitoring using Ultralytics YOLO11. Track and improve your exercise form and performance.
+keywords: workouts monitoring, Ultralytics YOLO11, pose estimation, fitness tracking, exercise assessment, real-time feedback, exercise form, performance metrics
---
-# Workouts Monitoring using Ultralytics YOLOv8
+# Workouts Monitoring using Ultralytics YOLO11
-Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) enhances exercise assessment by accurately tracking key body landmarks and joints in real-time. This technology provides instant feedback on exercise form, tracks workout routines, and measures performance metrics, optimizing training sessions for users and trainers alike.
+
+
+Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) enhances exercise assessment by accurately tracking key body landmarks and joints in real-time. This technology provides instant feedback on exercise form, tracks workout routines, and measures performance metrics, optimizing training sessions for users and trainers alike.
@@ -16,7 +18,7 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi
allowfullscreen>
- Watch: Workouts Monitoring using Ultralytics YOLOv8 | Pushups, Pullups, Ab Workouts
+ Watch: Workouts Monitoring using Ultralytics YOLO11 | Push-ups, Pull-ups, Ab Workouts
## Advantages of Workouts Monitoring?
@@ -36,90 +38,68 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi
!!! example "Workouts Monitoring Example"
- === "Workouts Monitoring"
-
- ```python
- import cv2
-
- from ultralytics import YOLO, solutions
+ === "CLI"
- model = YOLO("yolov8n-pose.pt")
- cap = cv2.VideoCapture("path/to/video/file.mp4")
- assert cap.isOpened(), "Error reading video file"
- w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
- gym_object = solutions.AIGym(
- line_thickness=2,
- view_img=True,
- pose_type="pushup",
- kpts_to_check=[6, 8, 10],
- )
+ ```bash
+ # Run a workout example
+ yolo solutions workout show=True
- while cap.isOpened():
- success, im0 = cap.read()
- if not success:
- print("Video frame is empty or video processing has been successfully completed.")
- break
- results = model.track(im0, verbose=False) # Tracking recommended
- # results = model.predict(im0) # Prediction also supported
- im0 = gym_object.start_counting(im0, results)
+ # Pass a source video
+ yolo solutions workout source="path/to/video/file.mp4"
- cv2.destroyAllWindows()
+ # Use keypoints for pushups
+ yolo solutions workout kpts=[6, 8, 10]
```
- === "Workouts Monitoring with Save Output"
+ === "Python"
```python
import cv2
- from ultralytics import YOLO, solutions
+ from ultralytics import solutions
- model = YOLO("yolov8n-pose.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+ # Video writer
video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
- gym_object = solutions.AIGym(
- line_thickness=2,
- view_img=True,
- pose_type="pushup",
- kpts_to_check=[6, 8, 10],
+ # Init AIGym
+ gym = solutions.AIGym(
+ show=True, # Display the frame
+ kpts=[6, 8, 10], # keypoints index of person for monitoring specific exercise, by default it's for pushup
+ model="yolo11n-pose.pt", # Path to the YOLO11 pose estimation model file
+ # line_width=2, # Adjust the line width for bounding boxes and text display
)
+ # Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- results = model.track(im0, verbose=False) # Tracking recommended
- # results = model.predict(im0) # Prediction also supported
- im0 = gym_object.start_counting(im0, results)
+ im0 = gym.monitor(im0)
video_writer.write(im0)
cv2.destroyAllWindows()
video_writer.release()
```
-???+ tip "Support"
-
- "pushup", "pullup" and "abworkout" supported
-
### KeyPoints Map
-
+
### Arguments `AIGym`
-| Name | Type | Default | Description |
-| ----------------- | ------- | -------- | -------------------------------------------------------------------------------------- |
-| `kpts_to_check` | `list` | `None` | List of three keypoints index, for counting specific workout, followed by keypoint Map |
-| `line_thickness` | `int` | `2` | Thickness of the lines drawn. |
-| `view_img` | `bool` | `False` | Flag to display the image. |
-| `pose_up_angle` | `float` | `145.0` | Angle threshold for the 'up' pose. |
-| `pose_down_angle` | `float` | `90.0` | Angle threshold for the 'down' pose. |
-| `pose_type` | `str` | `pullup` | Type of pose to detect (`'pullup`', `pushup`, `abworkout`, `squat`). |
+| Name | Type | Default | Description |
+| ------------ | ------- | ------- | -------------------------------------------------------------------------------------- |
+| `kpts` | `list` | `None` | List of three keypoints index, for counting specific workout, followed by keypoint Map |
+| `line_width` | `int` | `2` | Thickness of the lines drawn. |
+| `show` | `bool` | `False` | Flag to display the image. |
+| `up_angle` | `float` | `145.0` | Angle threshold for the 'up' pose. |
+| `down_angle` | `float` | `90.0` | Angle threshold for the 'down' pose. |
+| `model` | `str` | `None` | Path to Ultralytics YOLO Pose Model File |
### Arguments `model.predict`
@@ -131,25 +111,23 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi
## FAQ
-### How do I monitor my workouts using Ultralytics YOLOv8?
+### How do I monitor my workouts using Ultralytics YOLO11?
-To monitor your workouts using Ultralytics YOLOv8, you can utilize the pose estimation capabilities to track and analyze key body landmarks and joints in real-time. This allows you to receive instant feedback on your exercise form, count repetitions, and measure performance metrics. You can start by using the provided example code for pushups, pullups, or ab workouts as shown:
+To monitor your workouts using Ultralytics YOLO11, you can utilize the pose estimation capabilities to track and analyze key body landmarks and joints in real-time. This allows you to receive instant feedback on your exercise form, count repetitions, and measure performance metrics. You can start by using the provided example code for push-ups, pull-ups, or ab workouts as shown:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8n-pose.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-gym_object = solutions.AIGym(
- line_thickness=2,
- view_img=True,
- pose_type="pushup",
- kpts_to_check=[6, 8, 10],
+gym = solutions.AIGym(
+ line_width=2,
+ show=True,
+ kpts=[6, 8, 10],
)
while cap.isOpened():
@@ -157,17 +135,16 @@ while cap.isOpened():
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- results = model.track(im0, verbose=False)
- im0 = gym_object.start_counting(im0, results)
+ im0 = gym.monitor(im0)
cv2.destroyAllWindows()
```
For further customization and settings, you can refer to the [AIGym](#arguments-aigym) section in the documentation.
-### What are the benefits of using Ultralytics YOLOv8 for workout monitoring?
+### What are the benefits of using Ultralytics YOLO11 for workout monitoring?
-Using Ultralytics YOLOv8 for workout monitoring provides several key benefits:
+Using Ultralytics YOLO11 for workout monitoring provides several key benefits:
- **Optimized Performance:** By tailoring workouts based on monitoring data, you can achieve better results.
- **Goal Achievement:** Easily track and adjust fitness goals for measurable progress.
@@ -177,48 +154,45 @@ Using Ultralytics YOLOv8 for workout monitoring provides several key benefits:
You can watch a [YouTube video demonstration](https://www.youtube.com/watch?v=LGGxqLZtvuw) to see these benefits in action.
-### How accurate is Ultralytics YOLOv8 in detecting and tracking exercises?
+### How accurate is Ultralytics YOLO11 in detecting and tracking exercises?
-Ultralytics YOLOv8 is highly accurate in detecting and tracking exercises due to its state-of-the-art pose estimation capabilities. It can accurately track key body landmarks and joints, providing real-time feedback on exercise form and performance metrics. The model's pretrained weights and robust architecture ensure high [precision](https://www.ultralytics.com/glossary/precision) and reliability. For real-world examples, check out the [real-world applications](#real-world-applications) section in the documentation, which showcases pushups and pullups counting.
+Ultralytics YOLO11 is highly accurate in detecting and tracking exercises due to its state-of-the-art pose estimation capabilities. It can accurately track key body landmarks and joints, providing real-time feedback on exercise form and performance metrics. The model's pretrained weights and robust architecture ensure high [precision](https://www.ultralytics.com/glossary/precision) and reliability. For real-world examples, check out the [real-world applications](#real-world-applications) section in the documentation, which showcases push-ups and pull-ups counting.
-### Can I use Ultralytics YOLOv8 for custom workout routines?
+### Can I use Ultralytics YOLO11 for custom workout routines?
-Yes, Ultralytics YOLOv8 can be adapted for custom workout routines. The `AIGym` class supports different pose types such as "pushup", "pullup", and "abworkout." You can specify keypoints and angles to detect specific exercises. Here is an example setup:
+Yes, Ultralytics YOLO11 can be adapted for custom workout routines. The `AIGym` class supports different pose types such as `pushup`, `pullup`, and `abworkout`. You can specify keypoints and angles to detect specific exercises. Here is an example setup:
```python
from ultralytics import solutions
-gym_object = solutions.AIGym(
- line_thickness=2,
- view_img=True,
- pose_type="squat",
- kpts_to_check=[6, 8, 10],
+gym = solutions.AIGym(
+ line_width=2,
+ show=True,
+ kpts=[6, 8, 10],
)
```
For more details on setting arguments, refer to the [Arguments `AIGym`](#arguments-aigym) section. This flexibility allows you to monitor various exercises and customize routines based on your needs.
-### How can I save the workout monitoring output using Ultralytics YOLOv8?
+### How can I save the workout monitoring output using Ultralytics YOLO11?
To save the workout monitoring output, you can modify the code to include a video writer that saves the processed frames. Here's an example:
```python
import cv2
-from ultralytics import YOLO, solutions
+from ultralytics import solutions
-model = YOLO("yolov8n-pose.pt")
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-gym_object = solutions.AIGym(
- line_thickness=2,
- view_img=True,
- pose_type="pushup",
- kpts_to_check=[6, 8, 10],
+gym = solutions.AIGym(
+ line_width=2,
+ show=True,
+ kpts=[6, 8, 10],
)
while cap.isOpened():
@@ -226,12 +200,11 @@ while cap.isOpened():
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
- results = model.track(im0, verbose=False)
- im0 = gym_object.start_counting(im0, results)
+ im0 = gym.monitor(im0)
video_writer.write(im0)
cv2.destroyAllWindows()
video_writer.release()
```
-This setup writes the monitored video to an output file. For more details, refer to the [Workouts Monitoring with Save Output](#workouts-monitoring-using-ultralytics-yolov8) section.
+This setup writes the monitored video to an output file. For more details, refer to the [Workouts Monitoring with Save Output](#workouts-monitoring-using-ultralytics-yolo11) section.
diff --git a/docs/en/guides/yolo-common-issues.md b/docs/en/guides/yolo-common-issues.md
index 6da5d164e9e..19a5eb421b1 100644
--- a/docs/en/guides/yolo-common-issues.md
+++ b/docs/en/guides/yolo-common-issues.md
@@ -1,7 +1,7 @@
---
comments: true
-description: Comprehensive guide to troubleshoot common YOLOv8 issues, from installation errors to model training challenges. Enhance your Ultralytics projects with our expert tips.
-keywords: YOLO, YOLOv8, troubleshooting, installation errors, model training, GPU issues, Ultralytics, AI, computer vision, deep learning, Python, CUDA, PyTorch, debugging
+description: Comprehensive guide to troubleshoot common YOLO11 issues, from installation errors to model training challenges. Enhance your Ultralytics projects with our expert tips.
+keywords: YOLO, YOLO11, troubleshooting, installation errors, model training, GPU issues, Ultralytics, AI, computer vision, deep learning, Python, CUDA, PyTorch, debugging
---
# Troubleshooting Common YOLO Issues
@@ -12,7 +12,7 @@ keywords: YOLO, YOLOv8, troubleshooting, installation errors, model training, GP
## Introduction
-This guide serves as a comprehensive aid for troubleshooting common issues encountered while working with YOLOv8 on your Ultralytics projects. Navigating through these issues can be a breeze with the right guidance, ensuring your projects remain on track without unnecessary delays.
+This guide serves as a comprehensive aid for troubleshooting common issues encountered while working with YOLO11 on your Ultralytics projects. Navigating through these issues can be a breeze with the right guidance, ensuring your projects remain on track without unnecessary delays.
@@ -22,7 +22,7 @@ This guide serves as a comprehensive aid for troubleshooting common issues encou
allowfullscreen>
- Watch: Ultralytics YOLOv8 Common Issues | Installation Errors, Model Training Issues
+ Watch: Ultralytics YOLO11 Common Issues | Installation Errors, Model Training Issues
## Common Issues
@@ -41,7 +41,7 @@ Installation errors can arise due to various reasons, such as incompatible versi
Additionally, here are some common installation issues users have encountered, along with their respective solutions:
-- Import Errors or Dependency Issues - If you're getting errors during the import of YOLOv8, or you're having issues related to dependencies, consider the following troubleshooting steps:
+- Import Errors or Dependency Issues - If you're getting errors during the import of YOLO11, or you're having issues related to dependencies, consider the following troubleshooting steps:
- **Fresh Installation**: Sometimes, starting with a fresh installation can resolve unexpected issues. Especially with libraries like Ultralytics, where updates might introduce changes to the file tree structure or functionalities.
@@ -53,7 +53,7 @@ Additionally, here are some common installation issues users have encountered, a
- Remember, keeping your libraries and dependencies up-to-date is crucial for a smooth and error-free experience.
-- Running YOLOv8 on GPU - If you're having trouble running YOLOv8 on GPU, consider the following troubleshooting steps:
+- Running YOLO11 on GPU - If you're having trouble running YOLO11 on GPU, consider the following troubleshooting steps:
- **Verify CUDA Compatibility and Installation**: Ensure your GPU is CUDA compatible and that CUDA is correctly installed. Use the `nvidia-smi` command to check the status of your NVIDIA GPU and CUDA version.
@@ -63,7 +63,7 @@ Additionally, here are some common installation issues users have encountered, a
- **Update Your Packages**: Outdated packages might not be compatible with your GPU. Keep them updated.
- - **Program Configuration**: Check if the program or code specifies GPU usage. In YOLOv8, this might be in the settings or configuration.
+ - **Program Configuration**: Check if the program or code specifies GPU usage. In YOLO11, this might be in the settings or configuration.
### Model Training Issues
@@ -119,7 +119,7 @@ You can access these metrics from the training logs or by using tools like Tenso
**Solution**: To track and visualize training progress, you can consider using the following tools:
-- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, [accuracy](https://www.ultralytics.com/glossary/accuracy), and more. You can integrate it with your YOLOv8 training process.
+- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, [accuracy](https://www.ultralytics.com/glossary/accuracy), and more. You can integrate it with your YOLO11 training process.
- [Comet](https://bit.ly/yolov8-readme-comet): Comet provides an extensive toolkit for experiment tracking and comparison. It allows you to track metrics, hyperparameters, and even model weights. Integration with YOLO models is also straightforward, providing you with a complete overview of your experiment cycle.
- [Ultralytics HUB](https://hub.ultralytics.com/): Ultralytics HUB offers a specialized environment for tracking YOLO models, giving you a one-stop platform to manage metrics, datasets, and even collaborate with your team. Given its tailored focus on YOLO, it offers more customized tracking options.
@@ -177,13 +177,13 @@ Here are some things to keep in mind, if you are facing issues related to model
This section will address common issues faced during model prediction.
-#### Getting Bounding Box Predictions With Your YOLOv8 Custom Model
+#### Getting Bounding Box Predictions With Your YOLO11 Custom Model
-**Issue**: When running predictions with a custom YOLOv8 model, there are challenges with the format and visualization of the bounding box coordinates.
+**Issue**: When running predictions with a custom YOLO11 model, there are challenges with the format and visualization of the bounding box coordinates.
**Solution**:
-- Coordinate Format: YOLOv8 provides bounding box coordinates in absolute pixel values. To convert these to relative coordinates (ranging from 0 to 1), you need to divide by the image dimensions. For example, let's say your image size is 640x640. Then you would do the following:
+- Coordinate Format: YOLO11 provides bounding box coordinates in absolute pixel values. To convert these to relative coordinates (ranging from 0 to 1), you need to divide by the image dimensions. For example, let's say your image size is 640x640. Then you would do the following:
```python
# Convert absolute coordinates to relative coordinates
@@ -195,33 +195,33 @@ y2 = y2 / 640
- File Name: To obtain the file name of the image you're predicting on, access the image file path directly from the result object within your prediction loop.
-#### Filtering Objects in YOLOv8 Predictions
+#### Filtering Objects in YOLO11 Predictions
-**Issue**: Facing issues with how to filter and display only specific objects in the prediction results when running YOLOv8 using the Ultralytics library.
+**Issue**: Facing issues with how to filter and display only specific objects in the prediction results when running YOLO11 using the Ultralytics library.
**Solution**: To detect specific classes use the classes argument to specify the classes you want to include in the output. For instance, to detect only cars (assuming 'cars' have class index 2):
```shell
-yolo task=detect mode=segment model=yolov8n-seg.pt source='path/to/car.mp4' show=True classes=2
+yolo task=detect mode=segment model=yolo11n-seg.pt source='path/to/car.mp4' show=True classes=2
```
-#### Understanding Precision Metrics in YOLOv8
+#### Understanding Precision Metrics in YOLO11
-**Issue**: Confusion regarding the difference between box precision, mask precision, and [confusion matrix](https://www.ultralytics.com/glossary/confusion-matrix) precision in YOLOv8.
+**Issue**: Confusion regarding the difference between box precision, mask precision, and [confusion matrix](https://www.ultralytics.com/glossary/confusion-matrix) precision in YOLO11.
**Solution**: Box precision measures the accuracy of predicted bounding boxes compared to the actual ground truth boxes using IoU (Intersection over Union) as the metric. Mask precision assesses the agreement between predicted segmentation masks and ground truth masks in pixel-wise object classification. Confusion matrix precision, on the other hand, focuses on overall classification accuracy across all classes and does not consider the geometric accuracy of predictions. It's important to note that a [bounding box](https://www.ultralytics.com/glossary/bounding-box) can be geometrically accurate (true positive) even if the class prediction is wrong, leading to differences between box precision and confusion matrix precision. These metrics evaluate distinct aspects of a model's performance, reflecting the need for different evaluation metrics in various tasks.
-#### Extracting Object Dimensions in YOLOv8
+#### Extracting Object Dimensions in YOLO11
-**Issue**: Difficulty in retrieving the length and height of detected objects in YOLOv8, especially when multiple objects are detected in an image.
+**Issue**: Difficulty in retrieving the length and height of detected objects in YOLO11, especially when multiple objects are detected in an image.
-**Solution**: To retrieve the bounding box dimensions, first use the Ultralytics YOLOv8 model to predict objects in an image. Then, extract the width and height information of bounding boxes from the prediction results.
+**Solution**: To retrieve the bounding box dimensions, first use the Ultralytics YOLO11 model to predict objects in an image. Then, extract the width and height information of bounding boxes from the prediction results.
```python
from ultralytics import YOLO
-# Load a pre-trained YOLOv8 model
-model = YOLO("yolov8n.pt")
+# Load a pre-trained YOLO11 model
+model = YOLO("yolo11n.pt")
# Specify the source image
source = "https://ultralytics.com/images/bus.jpg"
@@ -264,23 +264,23 @@ for box in boxes:
## Community and Support
-Engaging with a community of like-minded individuals can significantly enhance your experience and success in working with YOLOv8. Below are some channels and resources you may find helpful.
+Engaging with a community of like-minded individuals can significantly enhance your experience and success in working with YOLO11. Below are some channels and resources you may find helpful.
### Forums and Channels for Getting Help
-**GitHub Issues:** The YOLOv8 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems.
+**GitHub Issues:** The YOLO11 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems.
**Ultralytics Discord Server:** Ultralytics has a [Discord server](https://discord.com/invite/ultralytics) where you can interact with other users and the developers.
### Official Documentation and Resources
-**Ultralytics YOLOv8 Docs**: The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting.
+**Ultralytics YOLO11 Docs**: The [official documentation](../index.md) provides a comprehensive overview of YOLO11, along with guides on installation, usage, and troubleshooting.
-These resources should provide a solid foundation for troubleshooting and improving your YOLOv8 projects, as well as connecting with others in the YOLOv8 community.
+These resources should provide a solid foundation for troubleshooting and improving your YOLO11 projects, as well as connecting with others in the YOLO11 community.
## Conclusion
-Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLOv8 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
+Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLO11 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
Remember, the Ultralytics community is a valuable resource. Engaging with fellow developers and experts can provide additional insights and solutions that might not be covered in standard documentation. Always keep learning, experimenting, and sharing your experiences to contribute to the collective knowledge of the community.
@@ -288,11 +288,11 @@ Happy troubleshooting!
## FAQ
-### How do I resolve installation errors with YOLOv8?
+### How do I resolve installation errors with YOLO11?
Installation errors can often be due to compatibility issues or missing dependencies. Ensure you use Python 3.8 or later and have PyTorch 1.8 or later installed. It's beneficial to use virtual environments to avoid conflicts. For a step-by-step installation guide, follow our [official installation guide](../quickstart.md). If you encounter import errors, try a fresh installation or update the library to the latest version.
-### Why is my YOLOv8 model training slow on a single GPU?
+### Why is my YOLO11 model training slow on a single GPU?
Training on a single GPU might be slow due to large batch sizes or insufficient memory. To speed up training, use multiple GPUs. Ensure your system has multiple GPUs available and adjust your `.yaml` configuration file to specify the number of GPUs, e.g., `gpus: 4`. Increase the batch size accordingly to fully utilize the GPUs without exceeding memory limits. Example command:
@@ -300,7 +300,7 @@ Training on a single GPU might be slow due to large batch sizes or insufficient
model.train(data="/path/to/your/data.yaml", batch=32, multi_scale=True)
```
-### How can I ensure my YOLOv8 model is training on the GPU?
+### How can I ensure my YOLO11 model is training on the GPU?
If the 'device' value shows 'null' in the training logs, it generally means the training process is set to automatically use an available GPU. To explicitly assign a specific GPU, set the 'device' value in your `.yaml` configuration file. For instance:
@@ -310,10 +310,10 @@ device: 0
This sets the training process to the first GPU. Consult the `nvidia-smi` command to confirm your CUDA setup.
-### How can I monitor and track my YOLOv8 model training progress?
+### How can I monitor and track my YOLO11 model training progress?
Tracking and visualizing training progress can be efficiently managed through tools like [TensorBoard](https://www.tensorflow.org/tensorboard), [Comet](https://bit.ly/yolov8-readme-comet), and [Ultralytics HUB](https://hub.ultralytics.com/). These tools allow you to log and visualize metrics such as loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP. Implementing [early stopping](#continuous-monitoring-parameters) based on these metrics can also help achieve better training outcomes.
-### What should I do if YOLOv8 is not recognizing my dataset format?
+### What should I do if YOLO11 is not recognizing my dataset format?
Ensure your dataset and labels conform to the expected format. Verify that annotations are accurate and of high quality. If you face any issues, refer to the [Data Collection and Annotation](https://docs.ultralytics.com/guides/data-collection-and-annotation/) guide for best practices. For more dataset-specific guidance, check the [Datasets](https://docs.ultralytics.com/datasets/) section in the documentation.
diff --git a/docs/en/guides/yolo-performance-metrics.md b/docs/en/guides/yolo-performance-metrics.md
index aeed82355d1..27ad142dfa9 100644
--- a/docs/en/guides/yolo-performance-metrics.md
+++ b/docs/en/guides/yolo-performance-metrics.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Explore essential YOLOv8 performance metrics like mAP, IoU, F1 Score, Precision, and Recall. Learn how to calculate and interpret them for model evaluation.
-keywords: YOLOv8 performance metrics, mAP, IoU, F1 Score, Precision, Recall, object detection, Ultralytics
+description: Explore essential YOLO11 performance metrics like mAP, IoU, F1 Score, Precision, and Recall. Learn how to calculate and interpret them for model evaluation.
+keywords: YOLO11 performance metrics, mAP, IoU, F1 Score, Precision, Recall, object detection, Ultralytics
---
# Performance Metrics Deep Dive
## Introduction
-Performance metrics are key tools to evaluate the [accuracy](https://www.ultralytics.com/glossary/accuracy) and efficiency of [object detection](https://www.ultralytics.com/glossary/object-detection) models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLOv8, their significance, and how to interpret them.
+Performance metrics are key tools to evaluate the [accuracy](https://www.ultralytics.com/glossary/accuracy) and efficiency of [object detection](https://www.ultralytics.com/glossary/object-detection) models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLO11, their significance, and how to interpret them.
@@ -18,12 +18,12 @@ Performance metrics are key tools to evaluate the [accuracy](https://www.ultraly
allowfullscreen>
- Watch: Ultralytics YOLOv8 Performance Metrics | MAP, F1 Score, [Precision](https://www.ultralytics.com/glossary/precision), IoU & Accuracy
+ Watch: Ultralytics YOLO11 Performance Metrics | MAP, F1 Score, Precision, IoU & Accuracy
## Object Detection Metrics
-Let's start by discussing some metrics that are not only important to YOLOv8 but are broadly applicable across different object detection models.
+Let's start by discussing some metrics that are not only important to YOLO11 but are broadly applicable across different object detection models.
- **[Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU):** IoU is a measure that quantifies the overlap between a predicted [bounding box](https://www.ultralytics.com/glossary/bounding-box) and a ground truth bounding box. It plays a fundamental role in evaluating the accuracy of object localization.
@@ -35,9 +35,9 @@ Let's start by discussing some metrics that are not only important to YOLOv8 but
- **F1 Score:** The F1 Score is the harmonic mean of precision and recall, providing a balanced assessment of a model's performance while considering both false positives and false negatives.
-## How to Calculate Metrics for YOLOv8 Model
+## How to Calculate Metrics for YOLO11 Model
-Now, we can explore [YOLOv8's Validation mode](../modes/val.md) that can be used to compute the above discussed evaluation metrics.
+Now, we can explore [YOLO11's Validation mode](../modes/val.md) that can be used to compute the above discussed evaluation metrics.
Using the validation mode is simple. Once you have a trained model, you can invoke the model.val() function. This function will then process the validation dataset and return a variety of performance metrics. But what do these metrics mean? And how should you interpret them?
@@ -91,7 +91,7 @@ The model.val() function, apart from producing numeric metrics, also yields visu
- **Validation Batch Labels (`val_batchX_labels.jpg`)**: These images depict the ground truth labels for distinct batches from the validation dataset. They provide a clear picture of what the objects are and their respective locations as per the dataset.
-- **Validation Batch Predictions (`val_batchX_pred.jpg`)**: Contrasting the label images, these visuals display the predictions made by the YOLOv8 model for the respective batches. By comparing these to the label images, you can easily assess how well the model detects and classifies objects visually.
+- **Validation Batch Predictions (`val_batchX_pred.jpg`)**: Contrasting the label images, these visuals display the predictions made by the YOLO11 model for the respective batches. By comparing these to the label images, you can easily assess how well the model detects and classifies objects visually.
#### Results Storage
@@ -153,56 +153,56 @@ Real-world examples can help clarify how these metrics work in practice.
## Connect and Collaborate
-Tapping into a community of enthusiasts and experts can amplify your journey with YOLOv8. Here are some avenues that can facilitate learning, troubleshooting, and networking.
+Tapping into a community of enthusiasts and experts can amplify your journey with YOLO11. Here are some avenues that can facilitate learning, troubleshooting, and networking.
### Engage with the Broader Community
-- **GitHub Issues:** The YOLOv8 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems.
+- **GitHub Issues:** The YOLO11 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems.
- **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://discord.com/invite/ultralytics) where you can interact with other users and the developers.
### Official Documentation and Resources:
-- **Ultralytics YOLOv8 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting.
+- **Ultralytics YOLO11 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLO11, along with guides on installation, usage, and troubleshooting.
-Using these resources will not only guide you through any challenges but also keep you updated with the latest trends and best practices in the YOLOv8 community.
+Using these resources will not only guide you through any challenges but also keep you updated with the latest trends and best practices in the YOLO11 community.
## Conclusion
-In this guide, we've taken a close look at the essential performance metrics for YOLOv8. These metrics are key to understanding how well a model is performing and are vital for anyone aiming to fine-tune their models. They offer the necessary insights for improvements and to make sure the model works effectively in real-life situations.
+In this guide, we've taken a close look at the essential performance metrics for YOLO11. These metrics are key to understanding how well a model is performing and are vital for anyone aiming to fine-tune their models. They offer the necessary insights for improvements and to make sure the model works effectively in real-life situations.
-Remember, the YOLOv8 and Ultralytics community is an invaluable asset. Engaging with fellow developers and experts can open doors to insights and solutions not found in standard documentation. As you journey through object detection, keep the spirit of learning alive, experiment with new strategies, and share your findings. By doing so, you contribute to the community's collective wisdom and ensure its growth.
+Remember, the YOLO11 and Ultralytics community is an invaluable asset. Engaging with fellow developers and experts can open doors to insights and solutions not found in standard documentation. As you journey through object detection, keep the spirit of learning alive, experiment with new strategies, and share your findings. By doing so, you contribute to the community's collective wisdom and ensure its growth.
Happy object detecting!
## FAQ
-### What is the significance of [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) in evaluating YOLOv8 model performance?
+### What is the significance of [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) in evaluating YOLO11 model performance?
-Mean Average Precision (mAP) is crucial for evaluating YOLOv8 models as it provides a single metric encapsulating precision and recall across multiple classes. mAP@0.50 measures precision at an IoU threshold of 0.50, focusing on the model's ability to detect objects correctly. mAP@0.50:0.95 averages precision across a range of IoU thresholds, offering a comprehensive assessment of detection performance. High mAP scores indicate that the model effectively balances precision and recall, essential for applications like autonomous driving and surveillance.
+Mean Average Precision (mAP) is crucial for evaluating YOLO11 models as it provides a single metric encapsulating precision and recall across multiple classes. mAP@0.50 measures precision at an IoU threshold of 0.50, focusing on the model's ability to detect objects correctly. mAP@0.50:0.95 averages precision across a range of IoU thresholds, offering a comprehensive assessment of detection performance. High mAP scores indicate that the model effectively balances precision and recall, essential for applications like autonomous driving and surveillance.
-### How do I interpret the Intersection over Union (IoU) value for YOLOv8 object detection?
+### How do I interpret the Intersection over Union (IoU) value for YOLO11 object detection?
Intersection over Union (IoU) measures the overlap between the predicted and ground truth bounding boxes. IoU values range from 0 to 1, where higher values indicate better localization accuracy. An IoU of 1.0 means perfect alignment. Typically, an IoU threshold of 0.50 is used to define true positives in metrics like mAP. Lower IoU values suggest that the model struggles with precise object localization, which can be improved by refining bounding box regression or increasing annotation accuracy.
-### Why is the F1 Score important for evaluating YOLOv8 models in object detection?
+### Why is the F1 Score important for evaluating YOLO11 models in object detection?
-The F1 Score is important for evaluating YOLOv8 models because it provides a harmonic mean of precision and recall, balancing both false positives and false negatives. It is particularly valuable when dealing with imbalanced datasets or applications where either precision or recall alone is insufficient. A high F1 Score indicates that the model effectively detects objects while minimizing both missed detections and false alarms, making it suitable for critical applications like security systems and medical imaging.
+The F1 Score is important for evaluating YOLO11 models because it provides a harmonic mean of precision and recall, balancing both false positives and false negatives. It is particularly valuable when dealing with imbalanced datasets or applications where either precision or recall alone is insufficient. A high F1 Score indicates that the model effectively detects objects while minimizing both missed detections and false alarms, making it suitable for critical applications like security systems and medical imaging.
-### What are the key advantages of using Ultralytics YOLOv8 for real-time object detection?
+### What are the key advantages of using Ultralytics YOLO11 for real-time object detection?
-Ultralytics YOLOv8 offers multiple advantages for real-time object detection:
+Ultralytics YOLO11 offers multiple advantages for real-time object detection:
- **Speed and Efficiency**: Optimized for high-speed inference, suitable for applications requiring low latency.
- **High Accuracy**: Advanced algorithm ensures high mAP and IoU scores, balancing precision and recall.
- **Flexibility**: Supports various tasks including object detection, segmentation, and classification.
- **Ease of Use**: User-friendly interfaces, extensive documentation, and seamless integration with platforms like Ultralytics HUB ([HUB Quickstart](../hub/quickstart.md)).
-This makes YOLOv8 ideal for diverse applications from autonomous vehicles to smart city solutions.
+This makes YOLO11 ideal for diverse applications from autonomous vehicles to smart city solutions.
-### How can validation metrics from YOLOv8 help improve model performance?
+### How can validation metrics from YOLO11 help improve model performance?
-Validation metrics from YOLOv8 like precision, recall, mAP, and IoU help diagnose and improve model performance by providing insights into different aspects of detection:
+Validation metrics from YOLO11 like precision, recall, mAP, and IoU help diagnose and improve model performance by providing insights into different aspects of detection:
- **Precision**: Helps identify and minimize false positives.
- **Recall**: Ensures all relevant objects are detected.
diff --git a/docs/en/guides/yolo-thread-safe-inference.md b/docs/en/guides/yolo-thread-safe-inference.md
index c086685152b..b66af30ab07 100644
--- a/docs/en/guides/yolo-thread-safe-inference.md
+++ b/docs/en/guides/yolo-thread-safe-inference.md
@@ -33,7 +33,7 @@ from threading import Thread
from ultralytics import YOLO
# Instantiate the model outside the thread
-shared_model = YOLO("yolov8n.pt")
+shared_model = YOLO("yolo11n.pt")
def predict(image_path):
@@ -60,8 +60,8 @@ from threading import Thread
from ultralytics import YOLO
# Instantiate multiple models outside the thread
-shared_model_1 = YOLO("yolov8n_1.pt")
-shared_model_2 = YOLO("yolov8n_2.pt")
+shared_model_1 = YOLO("yolo11n_1.pt")
+shared_model_2 = YOLO("yolo11n_2.pt")
def predict(model, image_path):
@@ -94,7 +94,7 @@ from ultralytics import YOLO
def thread_safe_predict(image_path):
"""Predict on an image using a new YOLO model instance in a thread-safe manner; takes image path as input."""
- local_model = YOLO("yolov8n.pt")
+ local_model = YOLO("yolo11n.pt")
results = local_model.predict(image_path)
# Process results
@@ -128,7 +128,7 @@ from ultralytics import YOLO
def thread_safe_predict(image_path):
"""Predict on an image in a thread-safe manner."""
- local_model = YOLO("yolov8n.pt")
+ local_model = YOLO("yolo11n.pt")
results = local_model.predict(image_path)
# Process results
@@ -157,7 +157,7 @@ from ultralytics import YOLO
def thread_safe_predict(image_path):
"""Runs inference in a thread-safe manner with a new YOLO model instance."""
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
results = model.predict(image_path)
# Process results
diff --git a/docs/en/help/CI.md b/docs/en/help/CI.md
index 93b1ad32228..140303859cd 100644
--- a/docs/en/help/CI.md
+++ b/docs/en/help/CI.md
@@ -12,8 +12,8 @@ Continuous Integration (CI) is an essential aspect of software development which
Here's a brief description of our CI actions:
-- **[CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml):** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository.
-- **[Docker Deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml):** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly.
+- **[CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml):** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository.
+- **[Docker Deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml):** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly.
- **[Broken Links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml):** This test scans the codebase for any broken or dead links in our markdown or HTML files.
- **[CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml):** CodeQL is a tool from GitHub that performs semantic analysis on our code, helping to find potential security vulnerabilities and maintain high-quality code.
- **[PyPI Publishing](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml):** This test checks if the project can be packaged and published to PyPi without any errors.
@@ -22,13 +22,18 @@ Here's a brief description of our CI actions:
Below is the table showing the status of these CI tests for our main repositories:
-| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPI and Docs Publishing |
-| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [yolov3](https://github.com/ultralytics/yolov3) | [](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml) | |
-| [yolov5](https://github.com/ultralytics/yolov5) | [](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml) | |
-| [ultralytics](https://github.com/ultralytics/ultralytics) | [](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) |
-| [hub](https://github.com/ultralytics/hub) | [](https://github.com/ultralytics/hub/actions/workflows/ci.yaml) | | [](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | |
-| [docs](https://github.com/ultralytics/docs) | | | [](https://github.com/ultralytics/docs/actions/workflows/links.yml)[](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) | | [](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) |
+| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPI and Docs Publishing |
+| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [yolov3](https://github.com/ultralytics/yolov3) | [](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [](https://github.com/ultralytics/yolov3/actions/workflows/github-code-scanning/codeql) | |
+| [yolov5](https://github.com/ultralytics/yolov5) | [](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [](https://github.com/ultralytics/yolov5/actions/workflows/github-code-scanning/codeql) | |
+| [ultralytics](https://github.com/ultralytics/ultralytics) | [](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql) | [](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) |
+| [hub-sdk](https://github.com/ultralytics/hub-sdk) | [](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml) | | [](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml) | [](https://github.com/ultralytics/hub-sdk/actions/workflows/github-code-scanning/codeql) | [](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml) |
+| [hub](https://github.com/ultralytics/hub) | [](https://github.com/ultralytics/hub/actions/workflows/ci.yml) | | [](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | |
+| [mkdocs](https://github.com/ultralytics/mkdocs) | [](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml) | | | [](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql) | [](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml) |
+| [thop](https://github.com/ultralytics/thop) | [](https://github.com/ultralytics/thop/actions/workflows/format.yml) | | | [](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql) | [](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml) |
+| [actions](https://github.com/ultralytics/mkdocs) | [](https://github.com/ultralytics/actions/actions/workflows/format.yml) | | | [](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql) | [](https://github.com/ultralytics/actions/actions/workflows/publish.yml) |
+| [docs](https://github.com/ultralytics/docs) | [](https://github.com/ultralytics/docs/actions/workflows/format.yml) | | [](https://github.com/ultralytics/docs/actions/workflows/links.yml)[](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) | | [](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) |
+| [handbook](https://github.com/ultralytics/handbook) | [](https://github.com/ultralytics/handbook/actions/workflows/format.yml) | | [](https://github.com/ultralytics/handbook/actions/workflows/links.yml) | | [](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment) |
Each badge shows the status of the last run of the corresponding CI test on the `main` branch of the respective repository. If a test fails, the badge will display a "failing" status, and if it passes, it will display a "passing" status.
@@ -56,7 +61,7 @@ To quickly get a glimpse of the code coverage status of the `ultralytics` python
In the sunburst graphic below, the innermost circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
-
+
@@ -64,7 +69,7 @@ In the sunburst graphic below, the innermost circle is the entire project, movin
### What is Continuous Integration (CI) in Ultralytics?
-Continuous Integration (CI) in Ultralytics involves automatically integrating and testing code changes to ensure high-quality standards. Our CI setup includes running [unit tests, linting checks, and comprehensive tests](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml). Additionally, we perform [Docker deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml), [broken link checks](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml), [CodeQL analysis](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) for security vulnerabilities, and [PyPI publishing](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) to package and distribute our software.
+Continuous Integration (CI) in Ultralytics involves automatically integrating and testing code changes to ensure high-quality standards. Our CI setup includes running [unit tests, linting checks, and comprehensive tests](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml). Additionally, we perform [Docker deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml), [broken link checks](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml), [CodeQL analysis](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) for security vulnerabilities, and [PyPI publishing](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) to package and distribute our software.
### How does Ultralytics check for broken links in documentation and code?
@@ -76,7 +81,7 @@ Ultralytics uses a specific CI action to [check for broken links](https://github
### How does Ultralytics utilize Docker for deployment?
-Ultralytics employs Docker to validate the deployment of our projects through a dedicated CI action. This process ensures that our [Dockerfile and associated scripts](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) are functioning correctly, allowing for consistent and reproducible deployment environments which are critical for scalable and reliable AI solutions.
+Ultralytics employs Docker to validate the deployment of our projects through a dedicated CI action. This process ensures that our [Dockerfile and associated scripts](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml) are functioning correctly, allowing for consistent and reproducible deployment environments which are critical for scalable and reliable AI solutions.
### What is the role of automated PyPI publishing in Ultralytics?
diff --git a/docs/en/help/CLA.md b/docs/en/help/CLA.md
index ebee3c4dc0d..58765c2f2e4 100644
--- a/docs/en/help/CLA.md
+++ b/docs/en/help/CLA.md
@@ -5,46 +5,126 @@ keywords: Ultralytics, Contributor License Agreement, open source, contributions
# Ultralytics Individual Contributor License Agreement
-Thank you for your interest in contributing to open source software projects (โProjectsโ) made available by Ultralytics Inc. (โUltralyticsโ). This Individual Contributor License Agreement (โAgreementโ) sets out the terms governing any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that you submit or have submitted, in any form and in any manner, to Ultralytics in respect of any Projects (collectively โContributionsโ). If you have any questions respecting this Agreement, please contact hello@ultralytics.com.
+Thank you for your interest in contributing to software projects managed by Ultralytics Inc. ("**Ultralytics**", "**We**" or "**Us**"). This Contributor License Agreement ("**Agreement**") sets out the rights granted by contributors ("**You**" or "**Your**") to Us and the terms governing any contributions as defined in Section 1. This license is for your protection as a Contributor as well as the protection of Ultralytics; it does not change your rights to use your own Contributions for any other purpose.
-You agree that the following terms apply to all of your past, present and future Contributions. Except for the licenses granted in this Agreement, you retain all of your right, title and interest in and to your Contributions.
+By accepting and agreeing to these terms and conditions You accept and agree to the following terms and conditions for Your past, present and future Contributions submitted to Ultralytics. Except for the license granted herein to Ultralytics and recipients of software distributed by Ultralytics, You reserve all right, title, and interest in and to Your Contributions.
-**Copyright License.** You hereby grant, and agree to grant, to Ultralytics a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute your Contributions and such derivative works, with the right to sublicense the foregoing rights through multiple tiers of sublicensees.
+If you have any questions respecting this Agreement, please contact hello@ultralytics.com.
-**Patent License.** You hereby grant, and agree to grant, to Ultralytics a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer your Contributions, where such license applies only to those patent claims licensable by you that are necessarily infringed by your Contributions alone or by combination of your Contributions with the Project to which such Contributions were submitted, with the right to sublicense the foregoing rights through multiple tiers of sublicensees.
+## 1. Definitions
-**Moral Rights.** To the fullest extent permitted under applicable law, you hereby waive, and agree not to assert, all of your โmoral rightsโ in or relating to your Contributions for the benefit of Ultralytics, its assigns, and their respective direct and indirect sublicensees.
+### 1.1 "You" or "Your"
-**Third Party Content/Rights.** If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that were not authored by you (โThird Party Contentโ) or if you are aware of any third party intellectual property or proprietary rights associated with your Contribution (โThird Party Rightsโ), then you agree to include with the submission of your Contribution full details respecting such Third Party Content and Third Party Rights, including, without limitation, identification of which aspects of your Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights do not apply to any portion of a Project that is incorporated into your Contribution to that same Project.
+Shall mean the individual who submits a Contribution to Ultralytics or the legal entity authorized by the copyright owner that is making this Agreement with Ultralytics. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
-**Representations.** You represent that, other than the Third Party Content and Third Party Rights identified by you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were created in the course of your employment with your past or present employer(s), you represent that such employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer(s) has waived all of their right, title or interest in or to your Contributions.
+### 1.2 "Contribution"
-**Disclaimer.** To the fullest extent permitted under applicable law, your Contributions are provided on an "asis" basis, without any warranties or conditions, express or implied, including, without limitation, any implied warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not required to provide support for your Contributions, except to the extent you desire to provide support.
+Shall mean any original work of authorship, including but not limited to source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information, or any other works of authorship, that is intentionally submitted by You to Ultralytics, in any form and in any manner, for inclusion in, or documentation of, any of the projects managed by Ultralytics (the "**Work**"). This includes any modifications or additions to existing works that are submitted for the purpose of contributing to a Project and improving the Work.
-**No Obligation.** You acknowledge that Ultralytics is under no obligation to use or incorporate your Contributions into any of the Projects. The decision to use or incorporate your Contributions into any of the Projects will be made at the sole discretion of Ultralytics or its authorized delegates.
+### 1.3 "Copyright"
-**Disputes.** This Agreement shall be governed by and construed in accordance with the laws of the State of New York, United States of America, without giving effect to its principles or rules regarding conflicts of laws, other than such principles directing application of New York law. The parties hereby submit to venue in, and jurisdiction of the courts located in New York, New York for purposes relating to this Agreement. In the event that any of the provisions of this Agreement shall be held by a court or other tribunal of competent jurisdiction to be unenforceable, the remaining portions hereof shall remain in full force and effect.
+Means all rights protecting works of authorship owned or controlled by You, including copyright, moral and neighboring rights, as appropriate, for the full term of their existence including any extensions by You.
-**Assignment.** You agree that Ultralytics may assign this Agreement, and all of its rights, obligations and licenses hereunder.
+### 1.4 "Submit" or "Submission" or "Submitted"
+
+Or any derivatives shall mean any form of electronic, verbal, or written communication sent to Ultralytics or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, Ultralytics for the purpose of discussing and improving the Project, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution."
+
+### 1.5 "Project"
+
+Shall mean any of the software projects owned, managed, or maintained by Ultralytics, including but not limited to open-source projects made available by Ultralytics to which Contributions may be submitted.
+
+## 2. Grant of Rights
+
+### 2.1 Copyright License
+
+To the maximum extent permitted by the relevant law, and subject to the terms and conditions of this Agreement, You hereby grant to Ultralytics and to recipients of software distributed by Ultralytics a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works.
+
+### 2.2 Patent License
+
+To the maximum extent permitted by the relevant law, and subject to the terms and conditions of this Agreement, You hereby grant to Ultralytics and to recipients of software distributed by Ultralytics a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed.
+
+### 2.3 Outbound License
+
+Based on the grant of rights in Sections 2.1 and 2.2, if We include Your Contribution in a Material, We may license the Contribution under any license, including copyleft, permissive, commercial, or proprietary licenses.
+
+### 2.4 Moral Rights
+
+To the fullest extent permitted by law, You hereby waive, and agree not to assert, all of Your "moral rights" in or relating to Your Contributions for the benefit of Ultralytics, its assigns, and their respective direct and indirect sublicensees.
+
+## 3. Representations and Warranties
+
+You represent that:
+
+(a) You have the legal authority to enter into this Agreement.
+
+(b) You own the Copyright and patent claims covering the Contribution which are required to grant the rights under Section 2.
+
+(c) The grant of rights under Section 2 does not violate any grant of rights which You have made to third parties, including Your employer. If Your Contributions were created in the course of Your employment with Your past or present employer(s), You represent that such employer(s) has authorized You to make Contributions on behalf of such employer(s) or such employer(s) has waived all of their right, title, or interest in or to Your Contributions.
+
+(d) You have followed the instructions provided by Ultralytics if You do not own the Copyright in the entire work of authorship submitted.
+
+(e) Should You wish to submit work that is not Your original creation, You may submit it to Ultralytics separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which You are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]."
+
+(f) You agree to notify Ultralytics of any facts or circumstances of which You become aware that would make these representations inaccurate in any respect.
+
+## 4. Disclaimer of Warranties
+
+EXCEPT FOR THE EXPRESS WARRANTIES IN SECTION 3, THE CONTRIBUTION IS PROVIDED "AS IS". MORE PARTICULARLY, ALL EXPRESS OR IMPLIED WARRANTIES INCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED BY YOU TO US. TO THE EXTENT THAT ANY SUCH WARRANTIES CANNOT BE DISCLAIMED, SUCH WARRANTY IS LIMITED IN DURATION TO THE MINIMUM PERIOD PERMITTED BY LAW.
+
+## 5. Miscellaneous
+
+### 5.1 Governing Law and Jurisdiction
+
+This Agreement will be governed by and construed in accordance with the laws of the State of New York, United States of America, excluding its conflicts of law provisions. The parties submit to venue in, and jurisdiction of, the courts located in New York, New York, for purposes relating to this Agreement. You waive all defenses of lack of personal jurisdiction and forum non-conveniens.
+
+### 5.2 Entire Agreement
+
+This Agreement sets out the entire agreement between You and Ultralytics for Your Contributions and overrides all other agreements or understandings.
+
+### 5.3 Assignment
+
+Ultralytics may assign this Agreement, and all of its rights, obligations, and licenses hereunder, without Your prior consent.
+
+### 5.4 Waiver of Performance
+
+The failure of either party to require performance by the other party of any provision of this Agreement in one situation shall not affect the right of a party to require such performance at any time in the future. A waiver of performance under a provision in one situation shall not be considered a waiver of the performance of the provision in the future or a waiver of the provision in its entirety.
+
+### 5.5 Severability
+
+If any provision of this Agreement is found void and unenforceable, such provision will be replaced to the extent possible with a provision that comes closest to the meaning of the original provision and which is enforceable. The terms and conditions set forth in this Agreement shall apply notwithstanding any failure of essential purpose of this Agreement or any limited remedy to the maximum extent possible under law.
+
+### 5.6 No Obligation
+
+You acknowledge that Ultralytics is under no obligation to use or incorporate your Contributions into any of the Work. The decision to use or incorporate your Contributions into any of the Work will be made at the sole discretion of Ultralytics or its authorized delegates.
+
+### 5.7 Effective Date
+
+The Effective Date of this Agreement shall be the date You execute this Agreement or the date You first Submit a Contribution to Ultralytics, whichever is earlier.
## FAQ
-### What is the purpose of the Ultralytics Individual Contributor License Agreement?
+### What is the purpose of the Ultralytics Contributor License Agreement (CLA)?
-The Ultralytics Individual Contributor License Agreement (ICLA) governs the terms under which you contribute to Ultralytics' open-source projects. It sets out the rights and obligations related to your contributions, including granting copyright and patent licenses, waiving moral rights, and disclosing any third-party content.
+The Ultralytics CLA defines the terms under which you contribute to Ultralytics' software projects. It outlines the rights and obligations related to your contributions, including granting copyright and patent licenses, and addressing the handling of third-party content.
-### Why do I need to agree to the Copyright License in the ICLA?
+### Why do I need to agree to the Copyright License in the CLA?
-Agreeing to the Copyright License allows Ultralytics to use and distribute your contributions, including making derivative works. This ensures that your contributions can be integrated into Ultralytics projects and shared with the community, fostering collaboration and software development.
+Agreeing to the Copyright License allows Ultralytics and its users to use, modify, distribute, and create derivative works from your contributions. This ensures that your contributions can be integrated into Ultralytics projects and shared with the community, fostering collaboration and software development.
### How does the Patent License benefit both contributors and Ultralytics?
-The Patent License grants Ultralytics the rights to use, make, and sell contributions covered by your patents, which is crucial for product development and commercialization. In return, it allows your patented innovations to be more widely used and recognized, promoting innovation within the community.
+The Patent License grants Ultralytics the rights to use, make, and sell contributions covered by your patents. This is essential for product development and commercialization. In return, your patented innovations gain wider use and recognition, promoting innovation within the community.
+
+### What should I do if my contribution includes third-party content?
+
+If your contribution includes third-party content, you must clearly mark it and provide comprehensive details about its source and any applicable licenses or restrictions. This ensures proper attribution and legal compliance within Ultralytics projects, maintaining transparency and respecting the rights of original content creators.
-### What should I do if my contribution contains third-party content?
+### What happens if Ultralytics decides not to use my contribution?
-If your contribution includes third-party content or you are aware of any third-party intellectual property rights, you must provide full details of such content and rights when submitting your contribution. This includes identifying the third-party content, its author, and the applicable license terms. For more information on third-party content, refer to the Third Party Content/Rights section of the Agreement.
+Ultralytics is not obligated to use or incorporate your contributions into any projects. The decision to use your contributions is entirely at Ultralytics' discretion, meaning that while your contributions are valuable, they may not always align with the project's current needs or directions.
+
+---
-### What happens if Ultralytics does not use my contributions?
+**Need More Help?**
-Ultralytics is not obligated to use or incorporate your contributions into any projects. The decision to use or integrate contributions is at Ultralytics' sole discretion. This means that while your contributions are valuable, they may not always align with the project's current needs or directions. For further details, see the No Obligation section.
+If you have any further questions or need clarification regarding the Contributor License Agreement, please contact us at hello@ultralytics.com.
diff --git a/docs/en/help/FAQ.md b/docs/en/help/FAQ.md
index 234fb9e82fd..0272d8b71b0 100644
--- a/docs/en/help/FAQ.md
+++ b/docs/en/help/FAQ.md
@@ -14,7 +14,7 @@ This FAQ section addresses common questions and issues users might encounter whi
Ultralytics is a [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) AI company specializing in state-of-the-art object detection and [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) models, with a focus on the YOLO (You Only Look Once) family. Their offerings include:
-- Open-source implementations of [YOLOv5](https://docs.ultralytics.com/models/yolov5/) and [YOLOv8](https://docs.ultralytics.com/models/yolov8/)
+- Open-source implementations of [YOLO11](https://docs.ultralytics.com/models/yolov8/) and [YOLO11](https://docs.ultralytics.com/models/yolo11/)
- A wide range of [pre-trained models](https://docs.ultralytics.com/models/) for various computer vision tasks
- A comprehensive [Python package](https://docs.ultralytics.com/usage/python/) for seamless integration of YOLO models into projects
- Versatile [tools](https://docs.ultralytics.com/modes/) for training, testing, and deploying models
@@ -54,9 +54,9 @@ Recommended setup:
For troubleshooting common issues, visit the [YOLO Common Issues](https://docs.ultralytics.com/guides/yolo-common-issues/) page.
-### How can I train a custom YOLOv8 model on my own dataset?
+### How can I train a custom YOLO11 model on my own dataset?
-To train a custom YOLOv8 model:
+To train a custom YOLO11 model:
1. Prepare your dataset in YOLO format (images and corresponding label txt files).
2. Create a YAML file describing your dataset structure and classes.
@@ -77,11 +77,11 @@ For a more in-depth guide, including data preparation and advanced training opti
### What pretrained models are available in Ultralytics?
-Ultralytics offers a diverse range of pretrained YOLOv8 models for various tasks:
+Ultralytics offers a diverse range of pretrained YOLO11 models for various tasks:
-- Object Detection: YOLOv8n, YOLOv8s, YOLOv8m, YOLOv8l, YOLOv8x
-- [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation): YOLOv8n-seg, YOLOv8s-seg, YOLOv8m-seg, YOLOv8l-seg, YOLOv8x-seg
-- Classification: YOLOv8n-cls, YOLOv8s-cls, YOLOv8m-cls, YOLOv8l-cls, YOLOv8x-cls
+- Object Detection: YOLO11n, YOLO11s, YOLO11m, YOLO11l, YOLO11x
+- [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation): YOLO11n-seg, YOLO11s-seg, YOLO11m-seg, YOLO11l-seg, YOLO11x-seg
+- Classification: YOLO11n-cls, YOLO11s-cls, YOLO11m-cls, YOLO11l-cls, YOLO11x-cls
These models vary in size and complexity, offering different trade-offs between speed and [accuracy](https://www.ultralytics.com/glossary/accuracy). Explore the full range of [pretrained models](https://docs.ultralytics.com/models/yolov8/) to find the best fit for your project.
@@ -118,17 +118,17 @@ Absolutely! Ultralytics models are designed for versatile deployment across vari
Ultralytics provides export functions to convert models to various formats for deployment. Explore the wide range of [deployment options](https://docs.ultralytics.com/guides/model-deployment-options/) to find the best solution for your use case.
-### What's the difference between YOLOv5 and YOLOv8?
+### What's the difference between YOLOv8 and YOLO11?
Key distinctions include:
-- Architecture: YOLOv8 features an improved backbone and head design for enhanced performance.
-- Performance: YOLOv8 generally offers superior accuracy and speed compared to YOLOv5.
-- Tasks: YOLOv8 natively supports [object detection](https://www.ultralytics.com/glossary/object-detection), instance segmentation, and classification in a unified framework.
-- Codebase: YOLOv8 is implemented with a more modular and extensible architecture, facilitating easier customization and extension.
-- Training: YOLOv8 incorporates advanced training techniques like multi-dataset training and hyperparameter evolution for improved results.
+- Architecture: YOLO11 features an improved backbone and head design for enhanced performance.
+- Performance: YOLO11 generally offers superior accuracy and speed compared to YOLOv8.
+- Tasks: YOLO11 natively supports [object detection](https://www.ultralytics.com/glossary/object-detection), instance segmentation, and classification in a unified framework.
+- Codebase: YOLO11 is implemented with a more modular and extensible architecture, facilitating easier customization and extension.
+- Training: YOLO11 incorporates advanced training techniques like multi-dataset training and hyperparameter evolution for improved results.
-For an in-depth comparison of features and performance metrics, visit the [YOLOv5 vs YOLOv8](https://www.ultralytics.com/yolo) comparison page.
+For an in-depth comparison of features and performance metrics, visit the [YOLO](https://www.ultralytics.com/yolo) comparison page.
### How can I contribute to the Ultralytics open-source project?
@@ -176,7 +176,7 @@ Enhancing your YOLO model's performance can be achieved through several techniqu
1. [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning): Experiment with different hyperparameters using the [Hyperparameter Tuning Guide](https://docs.ultralytics.com/guides/hyperparameter-tuning/) to optimize model performance.
2. [Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation): Implement techniques like flip, scale, rotate, and color adjustments to enhance your training dataset and improve model generalization.
-3. [Transfer Learning](https://www.ultralytics.com/glossary/transfer-learning): Leverage pre-trained models and fine-tune them on your specific dataset using the [Train YOLOv8](https://docs.ultralytics.com/modes/train/) guide.
+3. [Transfer Learning](https://www.ultralytics.com/glossary/transfer-learning): Leverage pre-trained models and fine-tune them on your specific dataset using the [Train YOLO11](https://docs.ultralytics.com/modes/train/) guide.
4. Export to Efficient Formats: Convert your model to optimized formats like TensorRT or ONNX for faster inference using the [Export guide](../modes/export.md).
5. Benchmarking: Utilize the [Benchmark Mode](https://docs.ultralytics.com/modes/benchmark/) to measure and improve inference speed and accuracy systematically.
@@ -195,22 +195,22 @@ Performing inference with a trained Ultralytics YOLO model is straightforward:
1. Load the Model:
-```python
-from ultralytics import YOLO
+ ```python
+ from ultralytics import YOLO
-model = YOLO("path/to/your/model.pt")
-```
+ model = YOLO("path/to/your/model.pt")
+ ```
2. Run Inference:
-```python
-results = model("path/to/image.jpg")
+ ```python
+ results = model("path/to/image.jpg")
-for r in results:
- print(r.boxes) # print bounding box predictions
- print(r.masks) # print mask predictions
- print(r.probs) # print class probabilities
-```
+ for r in results:
+ print(r.boxes) # print bounding box predictions
+ print(r.masks) # print mask predictions
+ print(r.probs) # print class probabilities
+ ```
For advanced inference techniques, including batch processing, video inference, and custom preprocessing, refer to the detailed [prediction guide](https://docs.ultralytics.com/modes/predict/).
diff --git a/docs/en/help/code_of_conduct.md b/docs/en/help/code-of-conduct.md
similarity index 100%
rename from docs/en/help/code_of_conduct.md
rename to docs/en/help/code-of-conduct.md
diff --git a/docs/en/help/contributing.md b/docs/en/help/contributing.md
index 1dad4f53141..29dd28e14db 100644
--- a/docs/en/help/contributing.md
+++ b/docs/en/help/contributing.md
@@ -11,21 +11,20 @@ Welcome! We're thrilled that you're considering contributing to our [Ultralytics
-## Table of Contents
-
-1. [Code of Conduct](#code-of-conduct)
-2. [Contributing via Pull Requests](#contributing-via-pull-requests)
- - [CLA Signing](#cla-signing)
- - [Google-Style Docstrings](#google-style-docstrings)
- - [GitHub Actions CI Tests](#github-actions-ci-tests)
-3. [Reporting Bugs](#reporting-bugs)
-4. [License](#license)
-5. [Conclusion](#conclusion)
-6. [FAQ](#faq)
+
+
+
+
+ Watch: How to Contribute to Ultralytics Repository | Ultralytics Models, Datasets and Documentation ๐
+
## Code of Conduct
-To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct/). Respect, kindness, and professionalism are at the heart of our community.
+To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code-of-conduct/). Respect, kindness, and professionalism are at the heart of our community.
## Contributing via Pull Requests
@@ -123,7 +122,7 @@ All pull requests must pass the GitHub Actions [Continuous Integration](https://
## Reporting Bugs
-We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/)โa simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem.
+We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/)โa simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem.
## License
@@ -131,6 +130,118 @@ Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https:/
We encourage all contributors to familiarize themselves with the terms of the AGPL-3.0 license to contribute effectively and ethically to the Ultralytics open-source community.
+## Open-Sourcing Your Projects with YOLO and AGPL-3.0 Compliance
+
+If you're planning to develop and release your own project using YOLO models, the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.gnu.org/licenses/agpl-3.0.html) ensures that all derivative works remain open and accessible. This section provides guidance, including steps, best practices, and requirements, to help you open-source your project while complying with AGPL-3.0.
+
+### Options for Starting Your Project
+
+You can kick-start your project using one of these approaches:
+
+1. **Fork the Ultralytics YOLO Repository**
+ Fork the official Ultralytics YOLO repository directly from [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics).
+
+ - Use this option if you plan to build directly on the latest YOLO implementation.
+ - Modify the forked code as needed while ensuring compliance with AGPL-3.0.
+
+2. **Start from the Ultralytics Template Repository**
+ Use the Ultralytics template repository available at [https://github.com/ultralytics/template](https://github.com/ultralytics/template).
+ - Ideal for starting a clean, modular project with pre-configured best practices.
+ - This option provides a lightweight starting point for projects that integrate or extend YOLO models.
+
+### What You Need to Open-Source
+
+To comply with AGPL-3.0, you must make the following components of your project openly available:
+
+1. **Your Entire Project Source Code**:
+
+ - Include all code for the larger project containing your YOLO models, scripts, and utilities.
+
+2. **Model Weights** (if modified):
+
+ - Share any fine-tuned or modified model weights as part of the open-source project.
+
+3. **Configuration Files**:
+
+ - Provide configuration files such as `.yaml` or `.json` that define the training setup, hyperparameters, or deployment configurations.
+
+4. **Training Data (if redistributable)**:
+
+ - If you include preprocessed or generated data that is redistributable, ensure it is part of the repository or clearly linked.
+
+5. **Web Application Components**:
+
+ - Include all backend and frontend source code if your project is a web application, especially server-side components.
+
+6. **Documentation**:
+
+ - Include clear documentation on how to use, build, and extend your project.
+
+7. **Build and Deployment Scripts**:
+
+ - Share scripts for setting up the environment, building the application, and deploying it, such as `Dockerfiles`, `requirements.txt`, or `Makefiles`.
+
+8. **Testing Framework**:
+
+ - Open-source your test cases, such as unit and integration tests, to ensure reproducibility and reliability.
+
+9. **Third-Party Modifications**:
+ - Provide source code for any third-party libraries you've modified.
+
+### Steps to Open-Source Your Project
+
+1. **Choose Your Starting Point**:
+
+ - Fork the Ultralytics YOLO repository or start from the Ultralytics template repository.
+
+2. **Set Your License**:
+
+ - Add a `LICENSE` file containing the AGPL-3.0 text.
+
+3. **Credit Upstream Contributions**:
+
+ - Include attribution to Ultralytics YOLO in your README. For example:
+ ```
+ This project builds on [Ultralytics YOLO](https://github.com/ultralytics/ultralytics), licensed under AGPL-3.0.
+ ```
+
+4. **Make Your Code Public**:
+
+ - Push your entire project (including the components listed above) to a public GitHub repository.
+
+5. **Document Your Project**:
+
+ - Write a clear `README.md` with instructions for setup, usage, and contributions.
+
+6. **Enable Contributions**:
+ - Set up an issue tracker and contribution guidelines to foster collaboration.
+
+By following these steps and ensuring you include all necessary components, you'll comply with AGPL-3.0 and contribute meaningfully to the open-source community. Let's continue fostering collaboration and innovation in computer vision together! ๐
+
+### Example Repository Structure
+
+Below is an example structure for an AGPL-3.0 project. See [https://github.com/ultralytics/template](https://github.com/ultralytics/template) for details.
+
+```
+my-yolo-project/
+โ
+โโโ LICENSE # AGPL-3.0 license text
+โโโ README.md # Project overview and license information
+โโโ src/ # Source code for the project
+โ โโโ model.py # YOLO-based model implementation
+โ โโโ utils.py # Utility scripts
+โ โโโ ...
+โโโ pyproject.toml # Python dependencies
+โโโ tests/ # Unit and integration tests
+โโโ .github/ # GitHub Actions for CI
+โ โโโ workflows/
+โ โโโ ci.yml # Continuous integration configuration
+โโโ docs/ # Project documentation
+ โโโ index.md
+```
+
+By following this guide, you can ensure your project remains compliant with AGPL-3.0 while contributing to the open-source community. Your adherence strengthens the ethos of collaboration, transparency, and accessibility that drives the success of projects like YOLO.
+
## Conclusion
Thank you for your interest in contributing to [Ultralytics](https://www.ultralytics.com/) [open-source](https://github.com/ultralytics) YOLO projects. Your participation is essential in shaping the future of our software and building a vibrant community of innovation and collaboration. Whether you're enhancing code, reporting bugs, or suggesting new features, your contributions are invaluable.
@@ -165,4 +276,4 @@ the project's quality standards. Review the CI output and fix any issues. For de
### How do I report a bug in Ultralytics YOLO repositories?
-To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section.
+To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section.
diff --git a/docs/en/help/index.md b/docs/en/help/index.md
index e8f2eecd7ab..cc9b6c60211 100644
--- a/docs/en/help/index.md
+++ b/docs/en/help/index.md
@@ -10,8 +10,8 @@ Welcome to the Ultralytics Help page! We are dedicated to providing you with det
- [Contributing Guide](contributing.md): Discover the protocols for making contributions, including how to submit pull requests, report bugs, and more.
- [Continuous Integration (CI) Guide](CI.md): Gain insights into the CI processes we employ, complete with status reports for each Ultralytics repository.
- [Contributor License Agreement (CLA)](CLA.md): Review the CLA to understand the rights and responsibilities associated with contributing to Ultralytics projects.
-- [Minimum Reproducible Example (MRE) Guide](minimum_reproducible_example.md): Learn the process for creating an MRE, which is crucial for the timely and effective resolution of bug reports.
-- [Code of Conduct](code_of_conduct.md): Our community guidelines support a respectful and open atmosphere for all collaborators.
+- [Minimum Reproducible Example (MRE) Guide](minimum-reproducible-example.md): Learn the process for creating an MRE, which is crucial for the timely and effective resolution of bug reports.
+- [Code of Conduct](code-of-conduct.md): Our community guidelines support a respectful and open atmosphere for all collaborators.
- [Environmental, Health and Safety (EHS) Policy](environmental-health-safety.md): Delve into our commitment to sustainability and the well-being of all our stakeholders.
- [Security Policy](security.md): Familiarize yourself with our security protocols and the procedure for reporting vulnerabilities.
- [Privacy Policy](privacy.md): Read our privacy policy to understand how we protect your data and respect your privacy in all our services and operations.
@@ -22,17 +22,17 @@ We encourage you to review these resources for a seamless and productive experie
### What is Ultralytics YOLO and how does it benefit my [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) projects?
-Ultralytics YOLO (You Only Look Once) is a state-of-the-art, real-time [object detection](https://www.ultralytics.com/glossary/object-detection) model. Its latest version, YOLOv8, enhances speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and versatility, making it ideal for a wide range of applications, from real-time video analytics to advanced machine learning research. YOLO's efficiency in detecting objects in images and videos has made it the go-to solution for businesses and researchers looking to integrate robust [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities into their projects.
+Ultralytics YOLO (You Only Look Once) is a state-of-the-art, real-time [object detection](https://www.ultralytics.com/glossary/object-detection) model. Its latest version, YOLO11, enhances speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and versatility, making it ideal for a wide range of applications, from real-time video analytics to advanced machine learning research. YOLO's efficiency in detecting objects in images and videos has made it the go-to solution for businesses and researchers looking to integrate robust [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities into their projects.
-For more details on YOLOv8, visit the [YOLOv8 documentation](../tasks/detect.md).
+For more details on YOLO11, visit the [YOLO11 documentation](../tasks/detect.md).
### How do I contribute to Ultralytics YOLO repositories?
-Contributing to Ultralytics YOLO repositories is straightforward. Start by reviewing the [Contributing Guide](../help/contributing.md) to understand the protocols for submitting pull requests, reporting bugs, and more. You'll also need to sign the [Contributor License Agreement (CLA)](../help/CLA.md) to ensure your contributions are legally recognized. For effective bug reporting, refer to the [Minimum Reproducible Example (MRE) Guide](../help/minimum_reproducible_example.md).
+Contributing to Ultralytics YOLO repositories is straightforward. Start by reviewing the [Contributing Guide](../help/contributing.md) to understand the protocols for submitting pull requests, reporting bugs, and more. You'll also need to sign the [Contributor License Agreement (CLA)](../help/CLA.md) to ensure your contributions are legally recognized. For effective bug reporting, refer to the [Minimum Reproducible Example (MRE) Guide](../help/minimum-reproducible-example.md).
### Why should I use Ultralytics HUB for my machine learning projects?
-Ultralytics HUB offers a seamless, no-code solution for managing your machine learning projects. It enables you to generate, train, and deploy AI models like YOLOv8 effortlessly. Unique features include cloud training, real-time tracking, and intuitive dataset management. Ultralytics HUB simplifies the entire workflow, from data processing to [model deployment](https://www.ultralytics.com/glossary/model-deployment), making it an indispensable tool for both beginners and advanced users.
+Ultralytics HUB offers a seamless, no-code solution for managing your machine learning projects. It enables you to generate, train, and deploy AI models like YOLO11 effortlessly. Unique features include cloud training, real-time tracking, and intuitive dataset management. Ultralytics HUB simplifies the entire workflow, from data processing to [model deployment](https://www.ultralytics.com/glossary/model-deployment), making it an indispensable tool for both beginners and advanced users.
To get started, visit [Ultralytics HUB Quickstart](../hub/quickstart.md).
diff --git a/docs/en/help/minimum_reproducible_example.md b/docs/en/help/minimum-reproducible-example.md
similarity index 100%
rename from docs/en/help/minimum_reproducible_example.md
rename to docs/en/help/minimum-reproducible-example.md
diff --git a/docs/en/help/privacy.md b/docs/en/help/privacy.md
index 567a72aea55..fc669286d92 100644
--- a/docs/en/help/privacy.md
+++ b/docs/en/help/privacy.md
@@ -153,7 +153,8 @@ Ultralytics collects three primary types of data using Google Analytics:
- **Usage Metrics**: These include how often and in what ways the YOLO Python package is used, preferred features, and typical command-line arguments.
- **System Information**: General non-identifiable information about the computing environments where the package is run.
- **Performance Data**: Metrics related to the performance of models during training, validation, and inference.
- This data helps us enhance user experience and optimize software performance. Learn more in the [Anonymized Google Analytics](#anonymized-google-analytics) section.
+
+This data helps us enhance user experience and optimize software performance. Learn more in the [Anonymized Google Analytics](#anonymized-google-analytics) section.
### How can I disable data collection in the Ultralytics YOLO package?
diff --git a/docs/en/help/security.md b/docs/en/help/security.md
index 39fe3829ff9..73d5e99c4d4 100644
--- a/docs/en/help/security.md
+++ b/docs/en/help/security.md
@@ -17,7 +17,7 @@ We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct compreh
Our security strategy includes GitHub's [CodeQL](https://docs.github.com/en/code-security/code-scanning/introduction-to-code-scanning/about-code-scanning-with-codeql) scanning. CodeQL delves deep into our codebase, identifying complex vulnerabilities like SQL injection and XSS by analyzing the code's semantic structure. This advanced level of analysis ensures early detection and resolution of potential security risks.
-[](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml)
+[](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql)
## GitHub Dependabot Alerts
diff --git a/docs/en/hub/app/android.md b/docs/en/hub/app/android.md
index bca298fa9d5..d5d19ef91d4 100644
--- a/docs/en/hub/app/android.md
+++ b/docs/en/hub/app/android.md
@@ -6,7 +6,7 @@ keywords: Ultralytics, Android app, real-time object detection, YOLO models, Ten
# Ultralytics Android App: Real-time [Object Detection](https://www.ultralytics.com/glossary/object-detection) with YOLO Models
-
+
๐ Hello from the [Ultralytics](https://www.ultralytics.com/) Team! We've been working hard these last few months to launch [Ultralytics HUB](https://www.ultralytics.com/hub), a new web tool for training and deploying all your YOLOv5 and YOLOv8 ๐ models from one spot!
-We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!
+We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!
@@ -44,7 +44,7 @@ We hope that the resources here will help you get the most out of HUB. Please br
-
+
## Introduction
@@ -52,16 +52,16 @@ We hope that the resources here will help you get the most out of HUB. Please br
[Ultralytics HUB](https://www.ultralytics.com/hub) is designed to be user-friendly and intuitive, allowing users to quickly upload their datasets and train new YOLO models. It also offers a range of pre-trained models to choose from, making it extremely easy for users to get started. Once a model is trained, it can be effortlessly previewed in the [Ultralytics HUB App](app/index.md) before being deployed for real-time classification, [object detection](https://www.ultralytics.com/glossary/object-detection), and [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) tasks.
-
- Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB
+ Watch: How to train Ultralytics YOLO11 on Custom Dataset using Ultralytics HUB | HUB Datasets ๐
-We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!
+We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!
- [**Quickstart**](quickstart.md): Start training and deploying models in seconds.
- [**Datasets**](datasets.md): Learn how to prepare and upload your datasets.
diff --git a/docs/en/hub/inference-api.md b/docs/en/hub/inference-api.md
index b532e8150ce..fce59c8b21c 100644
--- a/docs/en/hub/inference-api.md
+++ b/docs/en/hub/inference-api.md
@@ -49,15 +49,9 @@ To shut down the dedicated endpoint, click on the **Stop Endpoint** button.
To use the [Ultralytics HUB](https://www.ultralytics.com/hub) Shared Inference API, follow the guides below.
-Free users have the following usage limits:
+The [Ultralytics HUB](https://www.ultralytics.com/hub) Shared Inference API has the following usage limits:
- 100 calls / hour
-- 1000 calls / month
-
-[Pro](./pro.md) users have the following usage limits:
-
-- 1000 calls / hour
-- 10000 calls / month
## Python
diff --git a/docs/en/hub/models.md b/docs/en/hub/models.md
index db098669ac3..c123f25b6e8 100644
--- a/docs/en/hub/models.md
+++ b/docs/en/hub/models.md
@@ -1,7 +1,7 @@
---
comments: true
-description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLOv8. Start training today!.
-keywords: Ultralytics HUB, YOLOv8, custom AI models, model training, model deployment, model analysis, vision AI
+description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLO11. Start training today!.
+keywords: Ultralytics HUB, YOLO11, custom AI models, model training, model deployment, model analysis, vision AI
---
# Ultralytics HUB Models
@@ -66,7 +66,7 @@ In this step, you have to choose the project in which you want to create your mo
!!! info
- You can read more about the available [YOLOv8](https://docs.ultralytics.com/models/yolov8/) (and [YOLOv5](https://docs.ultralytics.com/models/yolov5/)) architectures in our documentation.
+ You can read more about the available [YOLO models](https://docs.ultralytics.com/models/) and architectures in our documentation.
By default, your model will use a pre-trained model (trained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset) to reduce training time. You can change this behavior and tweak your model's configuration by opening the **Advanced Model Configuration** accordion.
@@ -221,6 +221,16 @@ Furthermore, you can preview your model in real-time directly on your [iOS](http
After you [train a model](#train-model), you can export it to 13 different formats, including ONNX, OpenVINO, CoreML, [TensorFlow](https://www.ultralytics.com/glossary/tensorflow), Paddle and many others.
+
+
+
+ Watch: How to Export the Ultralytics YOLO11 to ONNX, OpenVINO and Other Formats using Ultralytics HUB ๐
+
+

??? tip
diff --git a/docs/en/hub/quickstart.md b/docs/en/hub/quickstart.md
index 3fbcf23af14..6e377d27be7 100644
--- a/docs/en/hub/quickstart.md
+++ b/docs/en/hub/quickstart.md
@@ -9,13 +9,13 @@ keywords: Ultralytics HUB, Quickstart, YOLO models, dataset upload, project mana
[Ultralytics HUB](https://www.ultralytics.com/hub) is designed to be user-friendly and intuitive, allowing users to quickly upload their datasets and train new YOLO models. It also offers a range of pre-trained models to choose from, making it extremely easy for users to get started. Once a model is trained, it can be effortlessly previewed in the [Ultralytics HUB App](app/index.md) before being deployed for real-time classification, [object detection](https://www.ultralytics.com/glossary/object-detection), and [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) tasks.
-
- Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB
+ Watch: How to train Ultralytics YOLO11 on Custom Dataset using Ultralytics HUB | HUB Datasets ๐
## Get Started
@@ -98,4 +98,4 @@ You can report a bug, request a feature, or ask a question on Discord community for questions and discussions!
+ You can join our Discord community for questions and discussions!
diff --git a/docs/en/index.md b/docs/en/index.md
index 71abe212e65..91b8f87b913 100644
--- a/docs/en/index.md
+++ b/docs/en/index.md
@@ -6,32 +6,33 @@ keywords: Ultralytics, YOLO, YOLO11, object detection, image segmentation, deep
-Introducing [Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics), the latest version of the acclaimed real-time object detection and image segmentation model. YOLO11 is built on cutting-edge advancements in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), offering unparalleled performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy). Its streamlined design makes it suitable for various applications and easily adaptable to different hardware platforms, from edge devices to cloud APIs.
+Introducing [Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics), the latest version of the acclaimed real-time object detection and image segmentation model. YOLO11 is built on cutting-edge advancements in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) and [computer vision](https://www.ultralytics.com/blog/everything-you-need-to-know-about-computer-vision-in-2025), offering unparalleled performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy). Its streamlined design makes it suitable for various applications and easily adaptable to different hardware platforms, from edge devices to cloud APIs.
Explore the Ultralytics Docs, a comprehensive resource designed to help you understand and utilize its features and capabilities. Whether you are a seasoned [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) practitioner or new to the field, this hub aims to maximize YOLO's potential in your projects
@@ -49,16 +50,74 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde
-
+
## Where to Start
-- **Install** `ultralytics` with pip and get up and running in minutes [:material-clock-fast: Get Started](quickstart.md){ .md-button }
-- **Predict** new images and videos with YOLO [:octicons-image-16: Predict on Images](modes/predict.md){ .md-button }
-- **Train** a new YOLO model on your own custom dataset [:fontawesome-solid-brain: Train a Model](modes/train.md){ .md-button }
-- **Tasks** YOLO tasks like segment, classify, pose and track [:material-magnify-expand: Explore Tasks](tasks/index.md){ .md-button }
-- **[YOLO11](models/yolo11.md) NEW ๐**: Ultralytics' latest SOTA models [:material-magnify-expand: Explore a Dataset](models/yolo11.md){ .md-button }
+
+
+- :material-clock-fast:{ .lg .middle } **Getting Started**
+
+ ***
+
+ Install `ultralytics` with pip and get up and running in minutes to train a YOLO model
+
+ ***
+
+ [:octicons-arrow-right-24: Quickstart](quickstart.md)
+
+- :material-image:{ .lg .middle } **Predict**
+
+ ***
+
+ Predict on new images, videos and streams with YOLO
+
+ ***
+
+ [:octicons-arrow-right-24: Learn more](modes/predict.md)
+
+- :fontawesome-solid-brain:{ .lg .middle } **Train a Model**
+
+ ***
+
+ Train a new YOLO model on your own custom dataset from scratch or load and train on a pretrained model
+
+ ***
+
+ [:octicons-arrow-right-24: Learn more](modes/train.md)
+
+- :material-magnify-expand:{ .lg .middle } **Explore computer vision tasks**
+
+ ***
+
+ Discover YOLO tasks like detect, segment, classify, pose, OBB and track
+
+ ***
+
+ [:octicons-arrow-right-24: Explore Tasks](tasks/index.md)
+
+- :rocket:{ .lg .middle } **Explore YOLO11 NEW**
+
+ ***
+
+ Discover Ultralytics' latest state-of-the-art YOLO11 models and their capabilities
+
+ ***
+
+ [:octicons-arrow-right-24: YOLO11 Models ๐ NEW](models/yolo11.md)
+
+- :material-scale-balance:{ .lg .middle } **Open Source, AGPL-3.0**
+
+ ***
+
+ Ultralytics offers two YOLO licenses: AGPL-3.0 and Enterprise. Explore YOLO on [GitHub](https://github.com/ultralytics/ultralytics).
+
+ ***
+
+ [:octicons-arrow-right-24: YOLO License](https://www.ultralytics.com/license)
+
+
@@ -79,12 +138,12 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde
- [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), launched in 2018, further enhanced the model's performance using a more efficient backbone network, multiple anchors and spatial pyramid pooling.
- [YOLOv4](https://arxiv.org/abs/2004.10934) was released in 2020, introducing innovations like Mosaic [data augmentation](https://www.ultralytics.com/glossary/data-augmentation), a new anchor-free detection head, and a new [loss function](https://www.ultralytics.com/glossary/loss-function).
- [YOLOv5](https://github.com/ultralytics/yolov5) further improved the model's performance and added new features such as hyperparameter optimization, integrated experiment tracking and automatic export to popular export formats.
-- [YOLOv6](https://github.com/meituan/YOLOv6) was open-sourced by [Meituan](https://about.meituan.com/) in 2022 and is in use in many of the company's autonomous delivery robots.
+- [YOLOv6](https://github.com/meituan/YOLOv6) was open-sourced by [Meituan](https://www.meituan.com/) in 2022 and is in use in many of the company's autonomous delivery robots.
- [YOLOv7](https://github.com/WongKinYiu/yolov7) added additional tasks such as pose estimation on the COCO keypoints dataset.
-- [YOLOv8](https://github.com/ultralytics/ultralytics) is the latest version of YOLO by Ultralytics. As a cutting-edge, state-of-the-art (SOTA) model, YOLOv8 builds on the success of previous versions, introducing new features and improvements for enhanced performance, flexibility, and efficiency. YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains.
+- [YOLOv8](https://github.com/ultralytics/ultralytics) released in 2023 by Ultralytics. YOLOv8 introduced new features and improvements for enhanced performance, flexibility, and efficiency, supporting a full range of vision AI tasks,
- [YOLOv9](models/yolov9.md) introduces innovative methods like Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN).
- [YOLOv10](models/yolov10.md) is created by researchers from [Tsinghua University](https://www.tsinghua.edu.cn/en/) using the [Ultralytics](https://www.ultralytics.com/) [Python package](https://pypi.org/project/ultralytics/). This version provides real-time [object detection](tasks/detect.md) advancements by introducing an End-to-End head that eliminates Non-Maximum Suppression (NMS) requirements.
-- **[YOLO11](models/yolo11.md) NEW ๐**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks.
+- **[YOLO11](models/yolo11.md) ๐ NEW**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks, including [object detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md), leverage capabilities across diverse AI applications and domains.
## YOLO Licenses: How is Ultralytics YOLO licensed?
@@ -103,11 +162,15 @@ Ultralytics YOLO is the latest advancement in the acclaimed YOLO (You Only Look
### How can I get started with YOLO installation and setup?
-Getting started with YOLO is quick and straightforward. You can install the Ultralytics package using pip and get up and running in minutes. Here's a basic installation command:
+Getting started with YOLO is quick and straightforward. You can install the Ultralytics package using [pip](https://pypi.org/project/ultralytics/) and get up and running in minutes. Here's a basic installation command:
-```bash
-pip install ultralytics
-```
+!!! example "Installation using pip"
+
+ === "CLI"
+
+ ```bash
+ pip install ultralytics
+ ```
For a comprehensive step-by-step guide, visit our [quickstart guide](quickstart.md). This resource will help you with installation instructions, initial setup, and running your first model.
@@ -117,13 +180,30 @@ Training a custom YOLO model on your dataset involves a few detailed steps:
1. Prepare your annotated dataset.
2. Configure the training parameters in a YAML file.
-3. Use the `yolo train` command to start training.
+3. Use the `yolo TASK train` command to start training. (Each `TASK` has its own argument)
+
+Here's example code for the Object Detection Task:
-Here's an example command:
+!!! example "Train Example for Object Detection Task"
-```bash
-yolo train model=yolo11n.pt data=coco128.yaml epochs=100 imgsz=640
-```
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a pre-trained YOLO model (you can choose n, s, m, l, or x versions)
+ model = YOLO("yolo11n.pt")
+
+ # Start training on your custom dataset
+ model.train(data="path/to/dataset.yaml", epochs=100, imgsz=640)
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Train a YOLO model from the command line
+ yolo detect train data=path/to/dataset.yaml epochs=100 imgsz=640
+ ```
For a detailed walkthrough, check out our [Train a Model](modes/train.md) guide, which includes examples and tips for optimizing your training process.
@@ -140,8 +220,27 @@ For more details, visit our [Licensing](https://www.ultralytics.com/license) pag
Ultralytics YOLO supports efficient and customizable multi-object tracking. To utilize tracking capabilities, you can use the `yolo track` command as shown below:
-```bash
-yolo track model=yolo11n.pt source=video.mp4
-```
+!!! example "Example for Object Tracking on a Video"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a pre-trained YOLO model
+ model = YOLO("yolo11n.pt")
+
+ # Start tracking objects in a video
+ # You can also use live video streams or webcam input
+ model.track(source="path/to/video.mp4")
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Perform object tracking on a video from the command line
+ # You can specify different sources like webcam (0) or RTSP streams
+ yolo track source=path/to/video.mp4
+ ```
For a detailed guide on setting up and running object tracking, check our [tracking mode](modes/track.md) documentation, which explains the configuration and practical applications in real-time scenarios.
diff --git a/docs/en/integrations/albumentations.md b/docs/en/integrations/albumentations.md
new file mode 100644
index 00000000000..1302d039bf7
--- /dev/null
+++ b/docs/en/integrations/albumentations.md
@@ -0,0 +1,199 @@
+---
+comments: true
+description: Learn how to use Albumentations with YOLO11 to enhance data augmentation, improve model performance, and streamline your computer vision projects.
+keywords: Albumentations, YOLO11, data augmentation, Ultralytics, computer vision, object detection, model training, image transformations, machine learning
+---
+
+# Enhance Your Dataset to Train YOLO11 Using Albumentations
+
+When you are building [computer vision models](../models/index.md), the quality and variety of your [training data](../datasets/index.md) can play a big role in how well your model performs. Albumentations offers a fast, flexible, and efficient way to apply a wide range of image transformations that can improve your model's ability to adapt to real-world scenarios. It easily integrates with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) and can help you create robust datasets for [object detection](../tasks/detect.md), [segmentation](../tasks/segment.md), and [classification](../tasks/classify.md) tasks.
+
+By using Albumentations, you can boost your YOLO11 training data with techniques like geometric transformations and color adjustments. In this article, we'll see how Albumentations can improve your [data augmentation](../guides/preprocessing_annotated_data.md) process and make your [YOLO11 projects](../solutions/index.md) even more impactful. Let's get started!
+
+## Albumentations for Image Augmentation
+
+[Albumentations](https://albumentations.ai/) is an open-source image augmentation library created in [June 2018](https://arxiv.org/pdf/1809.06839). It is designed to simplify and accelerate the image augmentation process in [computer vision](https://www.ultralytics.com/blog/exploring-image-processing-computer-vision-and-machine-vision). Created with [performance](https://www.ultralytics.com/blog/measuring-ai-performance-to-weigh-the-impact-of-your-innovations) and flexibility in mind, it supports many diverse augmentation techniques, ranging from simple transformations like rotations and flips to more complex adjustments like brightness and contrast changes. Albumentations helps developers generate rich, varied datasets for tasks like [image classification](https://www.youtube.com/watch?v=5BO0Il_YYAg), [object detection](https://www.youtube.com/watch?v=5ku7npMrW40&t=1s), and [segmentation](https://www.youtube.com/watch?v=o4Zd-IeMlSY).
+
+You can use Albumentations to easily apply augmentations to images, [segmentation masks](https://www.ultralytics.com/glossary/image-segmentation), [bounding boxes](https://www.ultralytics.com/glossary/bounding-box), and [key points](../datasets/pose/index.md), and make sure that all elements of your dataset are transformed together. It works seamlessly with popular deep learning frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md), making it accessible for a wide range of projects.
+
+Also, Albumentations is a great option for augmentation whether you're handling small datasets or large-scale [computer vision tasks](../tasks/index.md). It ensures fast and efficient processing, cutting down the time spent on data preparation. At the same time, it helps improve [model performance](../guides/yolo-performance-metrics.md), making your models more effective in real-world applications.
+
+## Key Features of Albumentations
+
+Albumentations offers many useful features that simplify complex image augmentations for a wide range of [computer vision applications](https://www.ultralytics.com/blog/exploring-how-the-applications-of-computer-vision-work). Here are some of the key features:
+
+- **Wide Range of Transformations**: Albumentations offers over [70 different transformations](https://github.com/albumentations-team/albumentations?tab=readme-ov-file#list-of-augmentations), including geometric changes (e.g., rotation, flipping), color adjustments (e.g., brightness, contrast), and noise addition (e.g., Gaussian noise). Having multiple options enables the creation of highly diverse and robust training datasets.
+
+
+
+
+
+- **High Performance Optimization**: Built on OpenCV and NumPy, Albumentations uses advanced optimization techniques like SIMD (Single Instruction, Multiple Data), which processes multiple data points simultaneously to speed up processing. It handles large datasets quickly, making it one of the fastest options available for image augmentation.
+
+- **Three Levels of Augmentation**: Albumentations supports three levels of augmentation: pixel-level transformations, spatial-level transformations, and mixing-level transformation. Pixel-level transformations only affect the input images without altering masks, bounding boxes, or key points. Meanwhile, both the image and its elements, like masks and bounding boxes, are transformed using spatial-level transformations. Furthermore, mixing-level transformations are a unique way to augment data as it combines multiple images into one.
+
+
+
+- **[Benchmarking Results](https://albumentations.ai/docs/benchmarking_results/)**: When it comes to benchmarking, Albumentations consistently outperforms other libraries, especially with large datasets.
+
+## Why Should You Use Albumentations for Your Vision AI Projects?
+
+With respect to image augmentation, Albumentations stands out as a reliable tool for computer vision tasks. Here are a few key reasons why you should consider using it for your Vision AI projects:
+
+- **Easy-to-Use API**: Albumentations provides a single, straightforward API for applying a wide range of augmentations to images, masks, bounding boxes, and keypoints. It's designed to adapt easily to different datasets, making [data preparation](../guides/data-collection-and-annotation.md) simpler and more efficient.
+
+- **Rigorous Bug Testing**: Bugs in the augmentation pipeline can silently corrupt input data, often going unnoticed but ultimately degrading model performance. Albumentations addresses this with a thorough test suite that helps catch bugs early in development.
+
+- **Extensibility**: Albumentations can be used to easily add new augmentations and use them in computer vision pipelines through a single interface along with built-in transformations.
+
+## How to Use Albumentations to Augment Data for YOLO11 Training
+
+Now that we've covered what Albumentations is and what it can do, let's look at how to use it to augment your data for YOLO11 model training. It's easy to set up because it integrates directly into [Ultralytics' training mode](../modes/train.md) and applies automatically if you have the Albumentations package installed.
+
+### Installation
+
+To use Albumentations with YOLO11, start by making sure you have the necessary packages installed. If Albumentations isn't installed, the augmentations won't be applied during training. Once set up, you'll be ready to create an augmented dataset for training, with Albumentations integrated to enhance your model automatically.
+
+!!! tip "Installation"
+
+ === "CLI"
+
+ ```bash
+ # Install the required packages
+ pip install albumentations ultralytics
+ ```
+
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+
+### Usage
+
+After installing the necessary packages, you're ready to start using Albumentations with YOLO11. When you train YOLO11, a set of augmentations is automatically applied through its integration with Albumentations, making it easy to enhance your model's performance.
+
+!!! example "Usage"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a pre-trained model
+ model = YOLO("yolo11n.pt")
+
+ # Train the model
+ results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
+ ```
+
+Next, let's take look a closer look at the specific augmentations that are applied during training.
+
+### Blur
+
+The Blur transformation in Albumentations applies a simple blur effect to the image by averaging pixel values within a small square area, or kernel. This is done using OpenCV `cv2.blur` function, which helps reduce noise in the image, though it also slightly reduces image details.
+
+Here are the parameters and values used in this integration:
+
+- **blur_limit**: This controls the size range of the blur effect. The default range is (3, 7), meaning the kernel size for the blur can vary between 3 and 7 pixels, with only odd numbers allowed to keep the blur centered.
+
+- **p**: The probability of applying the blur. In the integration, p=0.01, so there's a 1% chance that this blur will be applied to each image. The low probability allows for occasional blur effects, introducing a bit of variation to help the model generalize without over-blurring the images.
+
+
+
+### Median Blur
+
+The MedianBlur transformation in Albumentations applies a median blur effect to the image, which is particularly useful for reducing noise while preserving edges. Unlike typical blurring methods, MedianBlur uses a median filter, which is especially effective at removing salt-and-pepper noise while maintaining sharpness around the edges.
+
+Here are the parameters and values used in this integration:
+
+- **blur_limit**: This parameter controls the maximum size of the blurring kernel. In this integration, it defaults to a range of (3, 7), meaning the kernel size for the blur is randomly chosen between 3 and 7 pixels, with only odd values allowed to ensure proper alignment.
+
+- **p**: Sets the probability of applying the median blur. Here, p=0.01, so the transformation has a 1% chance of being applied to each image. This low probability ensures that the median blur is used sparingly, helping the model generalize by occasionally seeing images with reduced noise and preserved edges.
+
+The image below shows an example of this augmentation applied to an image.
+
+
+
+### Grayscale
+
+The ToGray transformation in Albumentations converts an image to grayscale, reducing it to a single-channel format and optionally replicating this channel to match a specified number of output channels. Different methods can be used to adjust how grayscale brightness is calculated, ranging from simple averaging to more advanced techniques for realistic perception of contrast and brightness.
+
+Here are the parameters and values used in this integration:
+
+- **num_output_channels**: Sets the number of channels in the output image. If this value is more than 1, the single grayscale channel will be replicated to create a multichannel grayscale image. By default, it's set to 3, giving a grayscale image with three identical channels.
+
+- **method**: Defines the grayscale conversion method. The default method, "weighted_average", applies a formula (0.299R + 0.587G + 0.114B) that closely aligns with human perception, providing a natural-looking grayscale effect. Other options, like "from_lab", "desaturation", "average", "max", and "pca", offer alternative ways to create grayscale images based on various needs for speed, brightness emphasis, or detail preservation.
+
+- **p**: Controls how often the grayscale transformation is applied. With p=0.01, there is a 1% chance of converting each image to grayscale, making it possible for a mix of color and grayscale images to help the model generalize better.
+
+The image below shows an example of this grayscale transformation applied.
+
+
+
+### Contrast Limited Adaptive Histogram Equalization (CLAHE)
+
+The CLAHE transformation in Albumentations applies Contrast Limited Adaptive Histogram Equalization (CLAHE), a technique that enhances image contrast by equalizing the histogram in localized regions (tiles) instead of across the whole image. CLAHE produces a balanced enhancement effect, avoiding the overly amplified contrast that can result from standard histogram equalization, especially in areas with initially low contrast.
+
+Here are the parameters and values used in this integration:
+
+- **clip_limit**: Controls the contrast enhancement range. Set to a default range of (1, 4), it determines the maximum contrast allowed in each tile. Higher values are used for more contrast but may also introduce noise.
+
+- **tile_grid_size**: Defines the size of the grid of tiles, typically as (rows, columns). The default value is (8, 8), meaning the image is divided into a 8x8 grid. Smaller tile sizes provide more localized adjustments, while larger ones create effects closer to global equalization.
+
+- **p**: The probability of applying CLAHE. Here, p=0.01 introduces the enhancement effect only 1% of the time, ensuring that contrast adjustments are applied sparingly for occasional variation in training images.
+
+The image below shows an example of the CLAHE transformation applied.
+
+
+
+## Keep Learning about Albumentations
+
+If you are interested in learning more about Albumentations, check out the following resources for more in-depth instructions and examples:
+
+- **[Albumentations Documentation](https://albumentations.ai/docs/)**: The official documentation provides a full range of supported transformations and advanced usage techniques.
+
+- **[Ultralytics Albumentations Guide](https://docs.ultralytics.com/reference/data/augment/?h=albumentation#ultralytics.data.augment.Albumentations)**: Get a closer look at the details of the function that facilitate this integration.
+
+- **[Albumentations GitHub Repository](https://github.com/albumentations-team/albumentations/)**: The repository includes examples, benchmarks, and discussions to help you get started with customizing augmentations.
+
+## Key Takeaways
+
+In this guide, we explored the key aspects of Albumentations, a great Python library for image augmentation. We discussed its wide range of transformations, optimized performance, and how you can use it in your next YOLO11 project.
+
+Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find valuable resources and insights there.
+
+## FAQ
+
+### How can I integrate Albumentations with YOLO11 for improved data augmentation?
+
+Albumentations integrates seamlessly with YOLO11 and applies automatically during training if you have the package installed. Here's how to get started:
+
+```python
+# Install required packages
+# !pip install albumentations ultralytics
+from ultralytics import YOLO
+
+# Load and train model with automatic augmentations
+model = YOLO("yolo11n.pt")
+model.train(data="coco8.yaml", epochs=100)
+```
+
+The integration includes optimized augmentations like blur, median blur, grayscale conversion, and CLAHE with carefully tuned probabilities to enhance model performance.
+
+### What are the key benefits of using Albumentations over other augmentation libraries?
+
+Albumentations stands out for several reasons:
+
+1. Performance: Built on OpenCV and NumPy with SIMD optimization for superior speed
+2. Flexibility: Supports 70+ transformations across pixel-level, spatial-level, and mixing-level augmentations
+3. Compatibility: Works seamlessly with popular frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md)
+4. Reliability: Extensive test suite prevents silent data corruption
+5. Ease of use: Single unified API for all augmentation types
+
+### What types of computer vision tasks can benefit from Albumentations augmentation?
+
+Albumentations enhances various [computer vision tasks](../tasks/index.md) including:
+
+- [Object Detection](../tasks/detect.md): Improves model robustness to lighting, scale, and orientation variations
+- [Instance Segmentation](../tasks/segment.md): Enhances mask prediction accuracy through diverse transformations
+- [Classification](../tasks/classify.md): Increases model generalization with color and geometric augmentations
+- [Pose Estimation](../tasks/pose.md): Helps models adapt to different viewpoints and lighting conditions
+
+The library's diverse augmentation options make it valuable for any vision task requiring robust model performance.
diff --git a/docs/en/integrations/amazon-sagemaker.md b/docs/en/integrations/amazon-sagemaker.md
index 9a82037ac24..366a4f13884 100644
--- a/docs/en/integrations/amazon-sagemaker.md
+++ b/docs/en/integrations/amazon-sagemaker.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn step-by-step how to deploy Ultralytics' YOLOv8 on Amazon SageMaker Endpoints, from setup to testing, for powerful real-time inference with AWS services.
-keywords: YOLOv8, Amazon SageMaker, AWS, Ultralytics, machine learning, computer vision, model deployment, AWS CloudFormation, AWS CDK, real-time inference
+description: Learn step-by-step how to deploy Ultralytics' YOLO11 on Amazon SageMaker Endpoints, from setup to testing, for powerful real-time inference with AWS services.
+keywords: YOLO11, Amazon SageMaker, AWS, Ultralytics, machine learning, computer vision, model deployment, AWS CloudFormation, AWS CDK, real-time inference
---
-# A Guide to Deploying YOLOv8 on Amazon SageMaker Endpoints
+# A Guide to Deploying YOLO11 on Amazon SageMaker Endpoints
-Deploying advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLOv8](https://github.com/ultralytics/ultralytics) on Amazon SageMaker Endpoints opens up a wide range of possibilities for various [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) applications. The key to effectively using these models lies in understanding their setup, configuration, and deployment processes. YOLOv8 becomes even more powerful when integrated seamlessly with Amazon SageMaker, a robust and scalable machine learning service by AWS.
+Deploying advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLO11](https://github.com/ultralytics/ultralytics) on Amazon SageMaker Endpoints opens up a wide range of possibilities for various [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) applications. The key to effectively using these models lies in understanding their setup, configuration, and deployment processes. YOLO11 becomes even more powerful when integrated seamlessly with Amazon SageMaker, a robust and scalable machine learning service by AWS.
-This guide will take you through the process of deploying YOLOv8 [PyTorch](https://www.ultralytics.com/glossary/pytorch) models on Amazon SageMaker Endpoints step by step. You'll learn the essentials of preparing your AWS environment, configuring the model appropriately, and using tools like AWS CloudFormation and the AWS Cloud Development Kit (CDK) for deployment.
+This guide will take you through the process of deploying YOLO11 [PyTorch](https://www.ultralytics.com/glossary/pytorch) models on Amazon SageMaker Endpoints step by step. You'll learn the essentials of preparing your AWS environment, configuring the model appropriately, and using tools like AWS CloudFormation and the AWS Cloud Development Kit (CDK) for deployment.
## Amazon SageMaker
@@ -18,9 +18,9 @@ This guide will take you through the process of deploying YOLOv8 [PyTorch](https
[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a machine learning service from Amazon Web Services (AWS) that simplifies the process of building, training, and deploying machine learning models. It provides a broad range of tools for handling various aspects of machine learning workflows. This includes automated features for tuning models, options for training models at scale, and straightforward methods for deploying models into production. SageMaker supports popular machine learning frameworks, offering the flexibility needed for diverse projects. Its features also cover data labeling, workflow management, and performance analysis.
-## Deploying YOLOv8 on Amazon SageMaker Endpoints
+## Deploying YOLO11 on Amazon SageMaker Endpoints
-Deploying YOLOv8 on Amazon SageMaker lets you use its managed environment for real-time inference and take advantage of features like autoscaling. Take a look at the AWS architecture below.
+Deploying YOLO11 on Amazon SageMaker lets you use its managed environment for real-time inference and take advantage of features like autoscaling. Take a look at the AWS architecture below.
@@ -40,9 +40,9 @@ First, ensure you have the following prerequisites in place:
- Adequate Service Quota: Confirm that you have sufficient quotas for two separate resources in Amazon SageMaker: one for `ml.m5.4xlarge` for endpoint usage and another for `ml.m5.4xlarge` for notebook instance usage. Each of these requires a minimum of one quota value. If your current quotas are below this requirement, it's important to request an increase for each. You can request a quota increase by following the detailed instructions in the [AWS Service Quotas documentation](https://docs.aws.amazon.com/servicequotas/latest/userguide/request-quota-increase.html#quota-console-increase).
-### Step 2: Clone the YOLOv8 SageMaker Repository
+### Step 2: Clone the YOLO11 SageMaker Repository
-The next step is to clone the specific AWS repository that contains the resources for deploying YOLOv8 on SageMaker. This repository, hosted on GitHub, includes the necessary CDK scripts and configuration files.
+The next step is to clone the specific AWS repository that contains the resources for deploying YOLO11 on SageMaker. This repository, hosted on GitHub, includes the necessary CDK scripts and configuration files.
- Clone the GitHub Repository: Execute the following command in your terminal to clone the host-yolov8-on-sagemaker-endpoint repository:
@@ -104,11 +104,11 @@ cdk bootstrap
cdk deploy
```
-### Step 5: Deploy the YOLOv8 Model
+### Step 5: Deploy the YOLO Model
-Before diving into the deployment instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
+Before diving into the deployment instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
-After creating the AWS CloudFormation Stack, the next step is to deploy YOLOv8.
+After creating the AWS CloudFormation Stack, the next step is to deploy YOLO11.
- Open the Notebook Instance: Go to the AWS Console and navigate to the Amazon SageMaker service. Select "Notebook Instances" from the dashboard, then locate the notebook instance that was created by your CDK deployment script. Open the notebook instance to access the Jupyter environment.
@@ -136,18 +136,18 @@ def output_fn(prediction_output):
return json.dumps(infer)
```
-- Deploy the Endpoint Using 1_DeployEndpoint.ipynb: In the Jupyter environment, open the 1_DeployEndpoint.ipynb notebook located in the sm-notebook directory. Follow the instructions in the notebook and run the cells to download the YOLOv8 model, package it with the updated inference code, and upload it to an Amazon S3 bucket. The notebook will guide you through creating and deploying a SageMaker endpoint for the YOLOv8 model.
+- Deploy the Endpoint Using 1_DeployEndpoint.ipynb: In the Jupyter environment, open the 1_DeployEndpoint.ipynb notebook located in the sm-notebook directory. Follow the instructions in the notebook and run the cells to download the YOLO11 model, package it with the updated inference code, and upload it to an Amazon S3 bucket. The notebook will guide you through creating and deploying a SageMaker endpoint for the YOLO11 model.
### Step 6: Testing Your Deployment
-Now that your YOLOv8 model is deployed, it's important to test its performance and functionality.
+Now that your YOLO11 model is deployed, it's important to test its performance and functionality.
- Open the Test Notebook: In the same Jupyter environment, locate and open the 2_TestEndpoint.ipynb notebook, also in the sm-notebook directory.
- Run the Test Notebook: Follow the instructions within the notebook to test the deployed SageMaker endpoint. This includes sending an image to the endpoint and running inferences. Then, you'll plot the output to visualize the model's performance and [accuracy](https://www.ultralytics.com/glossary/accuracy), as shown below.
-
+
- Clean-Up Resources: The test notebook will also guide you through the process of cleaning up the endpoint and the hosted model. This is an important step to manage costs and resources effectively, especially if you do not plan to use the deployed model immediately.
@@ -160,24 +160,24 @@ After testing, continuous monitoring and management of your deployed model are e
- Manage the Endpoint: Use the SageMaker console for ongoing management of the endpoint. This includes scaling, updating, or redeploying the model as required.
-By completing these steps, you will have successfully deployed and tested a YOLOv8 model on Amazon SageMaker Endpoints. This process not only equips you with practical experience in using AWS services for machine learning deployment but also lays the foundation for deploying other advanced models in the future.
+By completing these steps, you will have successfully deployed and tested a YOLO11 model on Amazon SageMaker Endpoints. This process not only equips you with practical experience in using AWS services for machine learning deployment but also lays the foundation for deploying other advanced models in the future.
## Summary
-This guide took you step by step through deploying YOLOv8 on Amazon SageMaker Endpoints using AWS CloudFormation and the AWS Cloud Development Kit (CDK). The process includes cloning the necessary GitHub repository, setting up the CDK environment, deploying the model using AWS services, and testing its performance on SageMaker.
+This guide took you step by step through deploying YOLO11 on Amazon SageMaker Endpoints using AWS CloudFormation and the AWS Cloud Development Kit (CDK). The process includes cloning the necessary GitHub repository, setting up the CDK environment, deploying the model using AWS services, and testing its performance on SageMaker.
For more technical details, refer to [this article](https://aws.amazon.com/blogs/machine-learning/hosting-yolov8-pytorch-model-on-amazon-sagemaker-endpoints/) on the AWS Machine Learning Blog. You can also check out the official [Amazon SageMaker Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints.html) for more insights into various features and functionalities.
-Are you interested in learning more about different YOLOv8 integrations? Visit the [Ultralytics integrations guide page](../integrations/index.md) to discover additional tools and capabilities that can enhance your machine-learning projects.
+Are you interested in learning more about different YOLO11 integrations? Visit the [Ultralytics integrations guide page](../integrations/index.md) to discover additional tools and capabilities that can enhance your machine-learning projects.
## FAQ
-### How do I deploy the Ultralytics YOLOv8 model on Amazon SageMaker Endpoints?
+### How do I deploy the Ultralytics YOLO11 model on Amazon SageMaker Endpoints?
-To deploy the Ultralytics YOLOv8 model on Amazon SageMaker Endpoints, follow these steps:
+To deploy the Ultralytics YOLO11 model on Amazon SageMaker Endpoints, follow these steps:
1. **Set Up Your AWS Environment**: Ensure you have an AWS Account, IAM roles with necessary permissions, and the AWS CLI configured. Install AWS CDK if not already done (refer to the [AWS CDK instructions](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install)).
-2. **Clone the YOLOv8 SageMaker Repository**:
+2. **Clone the YOLO11 SageMaker Repository**:
```bash
git clone https://github.com/aws-samples/host-yolov8-on-sagemaker-endpoint.git
cd host-yolov8-on-sagemaker-endpoint/yolov8-pytorch-cdk
@@ -196,11 +196,11 @@ To deploy the Ultralytics YOLOv8 model on Amazon SageMaker Endpoints, follow the
cdk deploy
```
-For further details, review the [documentation section](#step-5-deploy-the-yolov8-model).
+For further details, review the [documentation section](#step-5-deploy-the-yolo-model).
-### What are the prerequisites for deploying YOLOv8 on Amazon SageMaker?
+### What are the prerequisites for deploying YOLO11 on Amazon SageMaker?
-To deploy YOLOv8 on Amazon SageMaker, ensure you have the following prerequisites:
+To deploy YOLO11 on Amazon SageMaker, ensure you have the following prerequisites:
1. **AWS Account**: Active AWS account ([sign up here](https://aws.amazon.com/)).
2. **IAM Roles**: Configured IAM roles with permissions for SageMaker, CloudFormation, and Amazon S3.
@@ -210,9 +210,9 @@ To deploy YOLOv8 on Amazon SageMaker, ensure you have the following prerequisite
For detailed setup, refer to [this section](#step-1-setup-your-aws-environment).
-### Why should I use Ultralytics YOLOv8 on Amazon SageMaker?
+### Why should I use Ultralytics YOLO11 on Amazon SageMaker?
-Using Ultralytics YOLOv8 on Amazon SageMaker offers several advantages:
+Using Ultralytics YOLO11 on Amazon SageMaker offers several advantages:
1. **Scalability and Management**: SageMaker provides a managed environment with features like autoscaling, which helps in real-time inference needs.
2. **Integration with AWS Services**: Seamlessly integrate with other AWS services, such as S3 for data storage, CloudFormation for infrastructure as code, and CloudWatch for monitoring.
@@ -221,9 +221,9 @@ Using Ultralytics YOLOv8 on Amazon SageMaker offers several advantages:
Explore more about the advantages of using SageMaker in the [introduction section](#amazon-sagemaker).
-### Can I customize the inference logic for YOLOv8 on Amazon SageMaker?
+### Can I customize the inference logic for YOLO11 on Amazon SageMaker?
-Yes, you can customize the inference logic for YOLOv8 on Amazon SageMaker:
+Yes, you can customize the inference logic for YOLO11 on Amazon SageMaker:
1. **Modify `inference.py`**: Locate and customize the `output_fn` function in the `inference.py` file to tailor output formats.
@@ -243,11 +243,11 @@ Yes, you can customize the inference logic for YOLOv8 on Amazon SageMaker:
2. **Deploy Updated Model**: Ensure you redeploy the model using Jupyter notebooks provided (`1_DeployEndpoint.ipynb`) to include these changes.
-Refer to the [detailed steps](#step-5-deploy-the-yolov8-model) for deploying the modified model.
+Refer to the [detailed steps](#step-5-deploy-the-yolo-model) for deploying the modified model.
-### How can I test the deployed YOLOv8 model on Amazon SageMaker?
+### How can I test the deployed YOLO11 model on Amazon SageMaker?
-To test the deployed YOLOv8 model on Amazon SageMaker:
+To test the deployed YOLO11 model on Amazon SageMaker:
1. **Open the Test Notebook**: Locate the `2_TestEndpoint.ipynb` notebook in the SageMaker Jupyter environment.
2. **Run the Notebook**: Follow the notebook's instructions to send an image to the endpoint, perform inference, and display results.
diff --git a/docs/en/integrations/clearml.md b/docs/en/integrations/clearml.md
index 5245d887408..465f2fa8d15 100644
--- a/docs/en/integrations/clearml.md
+++ b/docs/en/integrations/clearml.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Discover how to integrate YOLOv8 with ClearML to streamline your MLOps workflow, automate experiments, and enhance model management effortlessly.
-keywords: YOLOv8, ClearML, MLOps, Ultralytics, machine learning, object detection, model training, automation, experiment management
+description: Discover how to integrate YOLO11 with ClearML to streamline your MLOps workflow, automate experiments, and enhance model management effortlessly.
+keywords: YOLO11, ClearML, MLOps, Ultralytics, machine learning, object detection, model training, automation, experiment management
---
-# Training YOLOv8 with ClearML: Streamlining Your MLOps Workflow
+# Training YOLO11 with ClearML: Streamlining Your MLOps Workflow
MLOps bridges the gap between creating and deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models in real-world settings. It focuses on efficient deployment, scalability, and ongoing management to ensure models perform well in practical applications.
-[Ultralytics YOLOv8](https://www.ultralytics.com/) effortlessly integrates with ClearML, streamlining and enhancing your [object detection](https://www.ultralytics.com/glossary/object-detection) model's training and management. This guide will walk you through the integration process, detailing how to set up ClearML, manage experiments, automate model management, and collaborate effectively.
+[Ultralytics YOLO11](https://www.ultralytics.com/) effortlessly integrates with ClearML, streamlining and enhancing your [object detection](https://www.ultralytics.com/glossary/object-detection) model's training and management. This guide will walk you through the integration process, detailing how to set up ClearML, manage experiments, automate model management, and collaborate effectively.
## ClearML
@@ -18,9 +18,9 @@ MLOps bridges the gap between creating and deploying [machine learning](https://
[ClearML](https://clear.ml/) is an innovative open-source MLOps platform that is skillfully designed to automate, monitor, and orchestrate machine learning workflows. Its key features include automated logging of all training and inference data for full experiment reproducibility, an intuitive web UI for easy [data visualization](https://www.ultralytics.com/glossary/data-visualization) and analysis, advanced hyperparameter [optimization algorithms](https://www.ultralytics.com/glossary/optimization-algorithm), and robust model management for efficient deployment across various platforms.
-## YOLOv8 Training with ClearML
+## YOLO11 Training with ClearML
-You can bring automation and efficiency to your machine learning workflow by improving your training process by integrating YOLOv8 with ClearML.
+You can bring automation and efficiency to your machine learning workflow by improving your training process by integrating YOLO11 with ClearML.
## Installation
@@ -31,11 +31,11 @@ To install the required packages, run:
=== "CLI"
```bash
- # Install the required packages for YOLOv8 and ClearML
+ # Install the required packages for YOLO11 and ClearML
pip install ultralytics clearml
```
-For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
## Configuring ClearML
@@ -56,7 +56,7 @@ After executing this command, visit the [ClearML Settings page](https://app.clea
## Usage
-Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
+Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
!!! example "Usage"
@@ -70,11 +70,11 @@ Before diving into the usage instructions, be sure to check out the range of [YO
# Step 1: Creating a ClearML Task
task = Task.init(project_name="my_project", task_name="my_yolov8_task")
- # Step 2: Selecting the YOLOv8 Model
- model_variant = "yolov8n"
+ # Step 2: Selecting the YOLO11 Model
+ model_variant = "yolo11n"
task.set_parameter("model_variant", model_variant)
- # Step 3: Loading the YOLOv8 Model
+ # Step 3: Loading the YOLO11 Model
model = YOLO(f"{model_variant}.pt")
# Step 4: Setting Up Training Arguments
@@ -91,11 +91,11 @@ Let's understand the steps showcased in the usage code snippet above.
**Step 1: Creating a ClearML Task**: A new task is initialized in ClearML, specifying your project and task names. This task will track and manage your model's training.
-**Step 2: Selecting the YOLOv8 Model**: The `model_variant` variable is set to 'yolov8n', one of the YOLOv8 models. This variant is then logged in ClearML for tracking.
+**Step 2: Selecting the YOLO11 Model**: The `model_variant` variable is set to 'yolo11n', one of the YOLO11 models. This variant is then logged in ClearML for tracking.
-**Step 3: Loading the YOLOv8 Model**: The selected YOLOv8 model is loaded using Ultralytics' YOLO class, preparing it for training.
+**Step 3: Loading the YOLO11 Model**: The selected YOLO11 model is loaded using Ultralytics' YOLO class, preparing it for training.
-**Step 4: Setting Up Training Arguments**: Key training arguments like the dataset (`coco8.yaml`) and the number of [epochs](https://www.ultralytics.com/glossary/epoch) (`16`) are organized in a dictionary and connected to the ClearML task. This allows for tracking and potential modification via the ClearML UI. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+**Step 4: Setting Up Training Arguments**: Key training arguments like the dataset (`coco8.yaml`) and the number of [epochs](https://www.ultralytics.com/glossary/epoch) (`16`) are organized in a dictionary and connected to the ClearML task. This allows for tracking and potential modification via the ClearML UI. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md).
**Step 5: Initiating Model Training**: The model training is started with the specified arguments. The results of the training process are captured in the `results` variable.
@@ -106,7 +106,7 @@ Upon running the usage code snippet above, you can expect the following output:
- A confirmation message indicating the creation of a new ClearML task, along with its unique ID.
- An informational message about the script code being stored, indicating that the code execution is being tracked by ClearML.
- A URL link to the ClearML results page where you can monitor the training progress and view detailed logs.
-- Download progress for the YOLOv8 model and the specified dataset, followed by a summary of the model architecture and training configuration.
+- Download progress for the YOLO11 model and the specified dataset, followed by a summary of the model architecture and training configuration.
- Initialization messages for various training components like TensorBoard, Automatic [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision) (AMP), and dataset preparation.
- Finally, the training process starts, with progress updates as the model trains on the specified dataset. For an in-depth understanding of the performance metrics used during training, read [our guide on performance metrics](../guides/yolo-performance-metrics.md).
@@ -151,7 +151,7 @@ For a visual walkthrough of what the ClearML Results Page looks like, watch the
allowfullscreen>
- Watch: YOLOv8 MLOps Integration using ClearML
+ Watch: YOLO11 MLOps Integration using ClearML
### Advanced Features in ClearML
@@ -180,7 +180,7 @@ ClearML's user-friendly interface allows easy cloning, editing, and enqueuing of
## Summary
-This guide has led you through the process of integrating ClearML with Ultralytics' YOLOv8. Covering everything from initial setup to advanced model management, you've discovered how to leverage ClearML for efficient training, experiment tracking, and workflow optimization in your machine learning projects.
+This guide has led you through the process of integrating ClearML with Ultralytics' YOLO11. Covering everything from initial setup to advanced model management, you've discovered how to leverage ClearML for efficient training, experiment tracking, and workflow optimization in your machine learning projects.
For further details on usage, visit [ClearML's official documentation](https://clear.ml/docs/latest/docs/integrations/yolov8/).
@@ -188,9 +188,9 @@ Additionally, explore more integrations and capabilities of Ultralytics by visit
## FAQ
-### What is the process for integrating Ultralytics YOLOv8 with ClearML?
+### What is the process for integrating Ultralytics YOLO11 with ClearML?
-Integrating Ultralytics YOLOv8 with ClearML involves a series of steps to streamline your MLOps workflow. First, install the necessary packages:
+Integrating Ultralytics YOLO11 with ClearML involves a series of steps to streamline your MLOps workflow. First, install the necessary packages:
```bash
pip install ultralytics clearml
@@ -202,19 +202,19 @@ Next, initialize the ClearML SDK in your environment using:
clearml-init
```
-You then configure ClearML with your credentials from the [ClearML Settings page](https://app.clear.ml/settings/workspace-configuration). Detailed instructions on the entire setup process, including model selection and training configurations, can be found in our [YOLOv8 Model Training guide](../modes/train.md).
+You then configure ClearML with your credentials from the [ClearML Settings page](https://app.clear.ml/settings/workspace-configuration). Detailed instructions on the entire setup process, including model selection and training configurations, can be found in our [YOLO11 Model Training guide](../modes/train.md).
-### Why should I use ClearML with Ultralytics YOLOv8 for my machine learning projects?
+### Why should I use ClearML with Ultralytics YOLO11 for my machine learning projects?
-Using ClearML with Ultralytics YOLOv8 enhances your machine learning projects by automating experiment tracking, streamlining workflows, and enabling robust model management. ClearML offers real-time metrics tracking, resource utilization monitoring, and a user-friendly interface for comparing experiments. These features help optimize your model's performance and make the development process more efficient. Learn more about the benefits and procedures in our [MLOps Integration guide](../modes/train.md).
+Using ClearML with Ultralytics YOLO11 enhances your machine learning projects by automating experiment tracking, streamlining workflows, and enabling robust model management. ClearML offers real-time metrics tracking, resource utilization monitoring, and a user-friendly interface for comparing experiments. These features help optimize your model's performance and make the development process more efficient. Learn more about the benefits and procedures in our [MLOps Integration guide](../modes/train.md).
-### How do I troubleshoot common issues during YOLOv8 and ClearML integration?
+### How do I troubleshoot common issues during YOLO11 and ClearML integration?
-If you encounter issues during the integration of YOLOv8 with ClearML, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. Typical problems might involve package installation errors, credential setup, or configuration issues. This guide provides step-by-step troubleshooting instructions to resolve these common issues efficiently.
+If you encounter issues during the integration of YOLO11 with ClearML, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. Typical problems might involve package installation errors, credential setup, or configuration issues. This guide provides step-by-step troubleshooting instructions to resolve these common issues efficiently.
-### How do I set up the ClearML task for YOLOv8 model training?
+### How do I set up the ClearML task for YOLO11 model training?
-Setting up a ClearML task for YOLOv8 training involves initializing a task, selecting the model variant, loading the model, setting up training arguments, and finally, starting the model training. Here's a simplified example:
+Setting up a ClearML task for YOLO11 training involves initializing a task, selecting the model variant, loading the model, setting up training arguments, and finally, starting the model training. Here's a simplified example:
```python
from clearml import Task
@@ -224,11 +224,11 @@ from ultralytics import YOLO
# Step 1: Creating a ClearML Task
task = Task.init(project_name="my_project", task_name="my_yolov8_task")
-# Step 2: Selecting the YOLOv8 Model
-model_variant = "yolov8n"
+# Step 2: Selecting the YOLO11 Model
+model_variant = "yolo11n"
task.set_parameter("model_variant", model_variant)
-# Step 3: Loading the YOLOv8 Model
+# Step 3: Loading the YOLO11 Model
model = YOLO(f"{model_variant}.pt")
# Step 4: Setting Up Training Arguments
@@ -241,6 +241,6 @@ results = model.train(**args)
Refer to our [Usage guide](#usage) for a detailed breakdown of these steps.
-### Where can I view the results of my YOLOv8 training in ClearML?
+### Where can I view the results of my YOLO11 training in ClearML?
-After running your YOLOv8 training script with ClearML, you can view the results on the ClearML results page. The output will include a URL link to the ClearML dashboard, where you can track metrics, compare experiments, and monitor resource usage. For more details on how to view and interpret the results, check our section on [Viewing the ClearML Results Page](#viewing-the-clearml-results-page).
+After running your YOLO11 training script with ClearML, you can view the results on the ClearML results page. The output will include a URL link to the ClearML dashboard, where you can track metrics, compare experiments, and monitor resource usage. For more details on how to view and interpret the results, check our section on [Viewing the ClearML Results Page](#viewing-the-clearml-results-page).
diff --git a/docs/en/integrations/comet.md b/docs/en/integrations/comet.md
index 2591973515f..24d69c04453 100644
--- a/docs/en/integrations/comet.md
+++ b/docs/en/integrations/comet.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn to simplify the logging of YOLOv8 training with Comet ML. This guide covers installation, setup, real-time insights, and custom logging.
-keywords: YOLOv8, Comet ML, logging, machine learning, training, model checkpoints, metrics, installation, configuration, real-time insights, custom logging
+description: Learn to simplify the logging of YOLO11 training with Comet ML. This guide covers installation, setup, real-time insights, and custom logging.
+keywords: YOLO11, Comet ML, logging, machine learning, training, model checkpoints, metrics, installation, configuration, real-time insights, custom logging
---
-# Elevating YOLOv8 Training: Simplify Your Logging Process with Comet ML
+# Elevating YOLO11 Training: Simplify Your Logging Process with Comet ML
Logging key training details such as parameters, metrics, image predictions, and model checkpoints is essential in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml)โit keeps your project transparent, your progress measurable, and your results repeatable.
-[Ultralytics YOLOv8](https://www.ultralytics.com/) seamlessly integrates with Comet ML, efficiently capturing and optimizing every aspect of your YOLOv8 [object detection](https://www.ultralytics.com/glossary/object-detection) model's training process. In this guide, we'll cover the installation process, Comet ML setup, real-time insights, custom logging, and offline usage, ensuring that your YOLOv8 training is thoroughly documented and fine-tuned for outstanding results.
+[Ultralytics YOLO11](https://www.ultralytics.com/) seamlessly integrates with Comet ML, efficiently capturing and optimizing every aspect of your YOLO11 [object detection](https://www.ultralytics.com/glossary/object-detection) model's training process. In this guide, we'll cover the installation process, Comet ML setup, real-time insights, custom logging, and offline usage, ensuring that your YOLO11 training is thoroughly documented and fine-tuned for outstanding results.
## Comet ML
@@ -18,9 +18,9 @@ Logging key training details such as parameters, metrics, image predictions, and
[Comet ML](https://www.comet.com/site/) is a platform for tracking, comparing, explaining, and optimizing machine learning models and experiments. It allows you to log metrics, parameters, media, and more during your model training and monitor your experiments through an aesthetically pleasing web interface. Comet ML helps data scientists iterate more rapidly, enhances transparency and reproducibility, and aids in the development of production models.
-## Harnessing the Power of YOLOv8 and Comet ML
+## Harnessing the Power of YOLO11 and Comet ML
-By combining Ultralytics YOLOv8 with Comet ML, you unlock a range of benefits. These include simplified experiment management, real-time insights for quick adjustments, flexible and tailored logging options, and the ability to log experiments offline when internet access is limited. This integration empowers you to make data-driven decisions, analyze performance metrics, and achieve exceptional results.
+By combining Ultralytics YOLO11 with Comet ML, you unlock a range of benefits. These include simplified experiment management, real-time insights for quick adjustments, flexible and tailored logging options, and the ability to log experiments offline when internet access is limited. This integration empowers you to make data-driven decisions, analyze performance metrics, and achieve exceptional results.
## Installation
@@ -31,7 +31,7 @@ To install the required packages, run:
=== "CLI"
```bash
- # Install the required packages for YOLOv8 and Comet ML
+ # Install the required packages for YOLO11 and Comet ML
pip install ultralytics comet_ml torch torchvision
```
@@ -50,17 +50,21 @@ After installing the required packages, you'll need to sign up, get a [Comet API
Then, you can initialize your Comet project. Comet will automatically detect the API key and proceed with the setup.
-```python
-import comet_ml
+!!! example "Initialize Comet project"
-comet_ml.login(project_name="comet-example-yolov8-coco128")
-```
+ === "Python"
+
+ ```python
+ import comet_ml
+
+ comet_ml.login(project_name="comet-example-yolo11-coco128")
+ ```
If you are using a Google Colab notebook, the code above will prompt you to enter your API key for initialization.
## Usage
-Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
+Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/yolo11.md). This will help you choose the most appropriate model for your project requirements.
!!! example "Usage"
@@ -70,12 +74,12 @@ Before diving into the usage instructions, be sure to check out the range of [YO
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Train the model
results = model.train(
data="coco8.yaml",
- project="comet-example-yolov8-coco128",
+ project="comet-example-yolo11-coco128",
batch=32,
save_period=1,
save_json=True,
@@ -83,13 +87,13 @@ Before diving into the usage instructions, be sure to check out the range of [YO
)
```
-After running the training code, Comet ML will create an experiment in your Comet workspace to track the run automatically. You will then be provided with a link to view the detailed logging of your [YOLOv8 model's training](../modes/train.md) process.
+After running the training code, Comet ML will create an experiment in your Comet workspace to track the run automatically. You will then be provided with a link to view the detailed logging of your [YOLO11 model's training](../modes/train.md) process.
Comet automatically logs the following data with no additional configuration: metrics such as mAP and loss, hyperparameters, model checkpoints, interactive confusion matrix, and image [bounding box](https://www.ultralytics.com/glossary/bounding-box) predictions.
## Understanding Your Model's Performance with Comet ML Visualizations
-Let's dive into what you'll see on the Comet ML dashboard once your YOLOv8 model begins training. The dashboard is where all the action happens, presenting a range of automatically logged information through visuals and statistics. Here's a quick tour:
+Let's dive into what you'll see on the Comet ML dashboard once your YOLO11 model begins training. The dashboard is where all the action happens, presenting a range of automatically logged information through visuals and statistics. Here's a quick tour:
**Experiment Panels**
@@ -169,19 +173,19 @@ os.environ["COMET_MODE"] = "offline"
## Summary
-This guide has walked you through integrating Comet ML with Ultralytics' YOLOv8. From installation to customization, you've learned to streamline experiment management, gain real-time insights, and adapt logging to your project's needs.
+This guide has walked you through integrating Comet ML with Ultralytics' YOLO11. From installation to customization, you've learned to streamline experiment management, gain real-time insights, and adapt logging to your project's needs.
-Explore [Comet ML's official documentation](https://www.comet.com/docs/v2/integrations/third-party-tools/yolov8/) for more insights on integrating with YOLOv8.
+Explore [Comet ML's official documentation](https://www.comet.com/docs/v2/integrations/third-party-tools/yolov8/) for more insights on integrating with YOLO11.
-Furthermore, if you're looking to dive deeper into the practical applications of YOLOv8, specifically for [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) tasks, this detailed guide on [fine-tuning YOLOv8 with Comet ML](https://www.comet.com/site/blog/fine-tuning-yolov8-for-image-segmentation-with-comet/) offers valuable insights and step-by-step instructions to enhance your model's performance.
+Furthermore, if you're looking to dive deeper into the practical applications of YOLO11, specifically for [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) tasks, this detailed guide on [fine-tuning YOLO11 with Comet ML](https://www.comet.com/site/blog/fine-tuning-yolov8-for-image-segmentation-with-comet/) offers valuable insights and step-by-step instructions to enhance your model's performance.
Additionally, to explore other exciting integrations with Ultralytics, check out the [integration guide page](../integrations/index.md), which offers a wealth of resources and information.
## FAQ
-### How do I integrate Comet ML with Ultralytics YOLOv8 for training?
+### How do I integrate Comet ML with Ultralytics YOLO11 for training?
-To integrate Comet ML with Ultralytics YOLOv8, follow these steps:
+To integrate Comet ML with Ultralytics YOLO11, follow these steps:
1. **Install the required packages**:
@@ -200,18 +204,18 @@ To integrate Comet ML with Ultralytics YOLOv8, follow these steps:
```python
import comet_ml
- comet_ml.login(project_name="comet-example-yolov8-coco128")
+ comet_ml.login(project_name="comet-example-yolo11-coco128")
```
-4. **Train your YOLOv8 model and log metrics**:
+4. **Train your YOLO11 model and log metrics**:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
results = model.train(
data="coco8.yaml",
- project="comet-example-yolov8-coco128",
+ project="comet-example-yolo11-coco128",
batch=32,
save_period=1,
save_json=True,
@@ -221,9 +225,9 @@ To integrate Comet ML with Ultralytics YOLOv8, follow these steps:
For more detailed instructions, refer to the [Comet ML configuration section](#configuring-comet-ml).
-### What are the benefits of using Comet ML with YOLOv8?
+### What are the benefits of using Comet ML with YOLO11?
-By integrating Ultralytics YOLOv8 with Comet ML, you can:
+By integrating Ultralytics YOLO11 with Comet ML, you can:
- **Monitor real-time insights**: Get instant feedback on your training results, allowing for quick adjustments.
- **Log extensive metrics**: Automatically capture essential metrics such as mAP, loss, hyperparameters, and model checkpoints.
@@ -232,7 +236,7 @@ By integrating Ultralytics YOLOv8 with Comet ML, you can:
By leveraging these features, you can optimize your machine learning workflows for better performance and reproducibility. For more information, visit the [Comet ML integration guide](../integrations/index.md).
-### How do I customize the logging behavior of Comet ML during YOLOv8 training?
+### How do I customize the logging behavior of Comet ML during YOLO11 training?
Comet ML allows for extensive customization of its logging behavior using environment variables:
@@ -262,9 +266,9 @@ Comet ML allows for extensive customization of its logging behavior using enviro
Refer to the [Customizing Comet ML Logging](#customizing-comet-ml-logging) section for more customization options.
-### How do I view detailed metrics and visualizations of my YOLOv8 training on Comet ML?
+### How do I view detailed metrics and visualizations of my YOLO11 training on Comet ML?
-Once your YOLOv8 model starts training, you can access a wide range of metrics and visualizations on the Comet ML dashboard. Key features include:
+Once your YOLO11 model starts training, you can access a wide range of metrics and visualizations on the Comet ML dashboard. Key features include:
- **Experiment Panels**: View different runs and their metrics, including segment mask loss, class loss, and mean average [precision](https://www.ultralytics.com/glossary/precision).
- **Metrics**: Examine metrics in tabular format for detailed analysis.
@@ -273,7 +277,7 @@ Once your YOLOv8 model starts training, you can access a wide range of metrics a
For a detailed overview of these features, visit the [Understanding Your Model's Performance with Comet ML Visualizations](#understanding-your-models-performance-with-comet-ml-visualizations) section.
-### Can I use Comet ML for offline logging when training YOLOv8 models?
+### Can I use Comet ML for offline logging when training YOLO11 models?
Yes, you can enable offline logging in Comet ML by setting the `COMET_MODE` environment variable to "offline":
diff --git a/docs/en/integrations/coreml.md b/docs/en/integrations/coreml.md
index 352e1753048..41ff20d10f7 100644
--- a/docs/en/integrations/coreml.md
+++ b/docs/en/integrations/coreml.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to export YOLOv8 models to CoreML for optimized, on-device machine learning on iOS and macOS. Follow step-by-step instructions.
-keywords: CoreML export, YOLOv8 models, CoreML conversion, Ultralytics, iOS object detection, macOS machine learning, AI deployment, machine learning integration
+description: Learn how to export YOLO11 models to CoreML for optimized, on-device machine learning on iOS and macOS. Follow step-by-step instructions.
+keywords: CoreML export, YOLO11 models, CoreML conversion, Ultralytics, iOS object detection, macOS machine learning, AI deployment, machine learning integration
---
-# CoreML Export for YOLOv8 Models
+# CoreML Export for YOLO11 Models
Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models on Apple devices like iPhones and Macs requires a format that ensures seamless performance.
-The CoreML export format allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for efficient [object detection](https://www.ultralytics.com/glossary/object-detection) in iOS and macOS applications. In this guide, we'll walk you through the steps for converting your models to the CoreML format, making it easier for your models to perform well on Apple devices.
+The CoreML export format allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for efficient [object detection](https://www.ultralytics.com/glossary/object-detection) in iOS and macOS applications. In this guide, we'll walk you through the steps for converting your models to the CoreML format, making it easier for your models to perform well on Apple devices.
## CoreML
@@ -40,7 +40,7 @@ Apple's CoreML framework offers robust features for on-device machine learning.
## CoreML Deployment Options
-Before we look at the code for exporting YOLOv8 models to the CoreML format, let's understand where CoreML models are usually used.
+Before we look at the code for exporting YOLO11 models to the CoreML format, let's understand where CoreML models are usually used.
CoreML offers various deployment options for machine learning models, including:
@@ -52,9 +52,9 @@ CoreML offers various deployment options for machine learning models, including:
- **Cloud-Based Deployment**: CoreML models are hosted on servers and accessed by the iOS app through API requests. This scalable and flexible option enables easy model updates without app revisions. It's ideal for complex models or large-scale apps requiring regular updates. However, it does require an internet connection and may pose latency and security issues.
-## Exporting YOLOv8 Models to CoreML
+## Exporting YOLO11 Models to CoreML
-Exporting YOLOv8 to CoreML enables optimized, on-device machine learning performance within Apple's ecosystem, offering benefits in terms of efficiency, security, and seamless integration with iOS, macOS, watchOS, and tvOS platforms.
+Exporting YOLO11 to CoreML enables optimized, on-device machine learning performance within Apple's ecosystem, offering benefits in terms of efficiency, security, and seamless integration with iOS, macOS, watchOS, and tvOS platforms.
### Installation
@@ -65,15 +65,15 @@ To install the required package, run:
=== "CLI"
```bash
- # Install the required package for YOLOv8
+ # Install the required package for YOLO11
pip install ultralytics
```
-For detailed instructions and best practices related to the installation process, check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
### Usage
-Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
+Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
!!! example "Usage"
@@ -82,14 +82,14 @@ Before diving into the usage instructions, be sure to check out the range of [YO
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to CoreML format
- model.export(format="coreml") # creates 'yolov8n.mlpackage'
+ model.export(format="coreml") # creates 'yolo11n.mlpackage'
# Load the exported CoreML model
- coreml_model = YOLO("yolov8n.mlpackage")
+ coreml_model = YOLO("yolo11n.mlpackage")
# Run inference
results = coreml_model("https://ultralytics.com/images/bus.jpg")
@@ -98,18 +98,18 @@ Before diving into the usage instructions, be sure to check out the range of [YO
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to CoreML format
- yolo export model=yolov8n.pt format=coreml # creates 'yolov8n.mlpackage''
+ # Export a YOLO11n PyTorch model to CoreML format
+ yolo export model=yolo11n.pt format=coreml # creates 'yolo11n.mlpackage''
# Run inference with the exported model
- yolo predict model=yolov8n.mlpackage source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n.mlpackage source='https://ultralytics.com/images/bus.jpg'
```
For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md).
-## Deploying Exported YOLOv8 CoreML Models
+## Deploying Exported YOLO11 CoreML Models
-Having successfully exported your Ultralytics YOLOv8 models to CoreML, the next critical phase is deploying these models effectively. For detailed guidance on deploying CoreML models in various environments, check out these resources:
+Having successfully exported your Ultralytics YOLO11 models to CoreML, the next critical phase is deploying these models effectively. For detailed guidance on deploying CoreML models in various environments, check out these resources:
- **[CoreML Tools](https://apple.github.io/coremltools/docs-guides/)**: This guide includes instructions and examples to convert models from [TensorFlow](https://www.ultralytics.com/glossary/tensorflow), PyTorch, and other libraries to Core ML.
@@ -119,17 +119,17 @@ Having successfully exported your Ultralytics YOLOv8 models to CoreML, the next
## Summary
-In this guide, we went over how to export Ultralytics YOLOv8 models to CoreML format. By following the steps outlined in this guide, you can ensure maximum compatibility and performance when exporting YOLOv8 models to CoreML.
+In this guide, we went over how to export Ultralytics YOLO11 models to CoreML format. By following the steps outlined in this guide, you can ensure maximum compatibility and performance when exporting YOLO11 models to CoreML.
For further details on usage, visit the [CoreML official documentation](https://developer.apple.com/documentation/coreml).
-Also, if you'd like to know more about other Ultralytics YOLOv8 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of valuable resources and insights there.
+Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of valuable resources and insights there.
## FAQ
-### How do I export YOLOv8 models to CoreML format?
+### How do I export YOLO11 models to CoreML format?
-To export your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models to CoreML format, you'll first need to ensure you have the `ultralytics` package installed. You can install it using:
+To export your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to CoreML format, you'll first need to ensure you have the `ultralytics` package installed. You can install it using:
!!! example "Installation"
@@ -148,21 +148,21 @@ Next, you can export the model using the following Python or CLI commands:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
model.export(format="coreml")
```
=== "CLI"
```bash
- yolo export model=yolov8n.pt format=coreml
+ yolo export model=yolo11n.pt format=coreml
```
-For further details, refer to the [Exporting YOLOv8 Models to CoreML](../modes/export.md) section of our documentation.
+For further details, refer to the [Exporting YOLO11 Models to CoreML](../modes/export.md) section of our documentation.
-### What are the benefits of using CoreML for deploying YOLOv8 models?
+### What are the benefits of using CoreML for deploying YOLO11 models?
-CoreML provides numerous advantages for deploying [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models on Apple devices:
+CoreML provides numerous advantages for deploying [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models on Apple devices:
- **On-device Processing**: Enables local model inference on devices, ensuring [data privacy](https://www.ultralytics.com/glossary/data-privacy) and minimizing latency.
- **Performance Optimization**: Leverages the full potential of the device's CPU, GPU, and Neural Engine, optimizing both speed and efficiency.
@@ -171,9 +171,9 @@ CoreML provides numerous advantages for deploying [Ultralytics YOLOv8](https://g
For more details on integrating your CoreML model into an iOS app, check out the guide on [Integrating a Core ML Model into Your App](https://developer.apple.com/documentation/coreml/integrating-a-core-ml-model-into-your-app).
-### What are the deployment options for YOLOv8 models exported to CoreML?
+### What are the deployment options for YOLO11 models exported to CoreML?
-Once you export your YOLOv8 model to CoreML format, you have multiple deployment options:
+Once you export your YOLO11 model to CoreML format, you have multiple deployment options:
1. **On-Device Deployment**: Directly integrate CoreML models into your app for enhanced privacy and offline functionality. This can be done as:
@@ -184,9 +184,9 @@ Once you export your YOLOv8 model to CoreML format, you have multiple deployment
For detailed guidance on deploying CoreML models, refer to [CoreML Deployment Options](#coreml-deployment-options).
-### How does CoreML ensure optimized performance for YOLOv8 models?
+### How does CoreML ensure optimized performance for YOLO11 models?
-CoreML ensures optimized performance for [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models by utilizing various optimization techniques:
+CoreML ensures optimized performance for [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models by utilizing various optimization techniques:
- **Hardware Acceleration**: Uses the device's CPU, GPU, and Neural Engine for efficient computation.
- **Model Compression**: Provides tools for compressing models to reduce their footprint without compromising accuracy.
@@ -205,14 +205,14 @@ Yes, you can run inference directly using the exported CoreML model. Below are t
```python
from ultralytics import YOLO
- coreml_model = YOLO("yolov8n.mlpackage")
+ coreml_model = YOLO("yolo11n.mlpackage")
results = coreml_model("https://ultralytics.com/images/bus.jpg")
```
=== "CLI"
```bash
- yolo predict model=yolov8n.mlpackage source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n.mlpackage source='https://ultralytics.com/images/bus.jpg'
```
For additional information, refer to the [Usage section](#usage) of the CoreML export guide.
diff --git a/docs/en/integrations/dvc.md b/docs/en/integrations/dvc.md
index 76ba91b4320..c90377e06fe 100644
--- a/docs/en/integrations/dvc.md
+++ b/docs/en/integrations/dvc.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Unlock seamless YOLOv8 tracking with DVCLive. Discover how to log, visualize, and analyze experiments for optimized ML model performance.
-keywords: YOLOv8, DVCLive, experiment tracking, machine learning, model training, data visualization, Git integration
+description: Unlock seamless YOLO11 tracking with DVCLive. Discover how to log, visualize, and analyze experiments for optimized ML model performance.
+keywords: YOLO11, DVCLive, experiment tracking, machine learning, model training, data visualization, Git integration
---
-# Advanced YOLOv8 Experiment Tracking with DVCLive
+# Advanced YOLO11 Experiment Tracking with DVCLive
Experiment tracking in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) is critical to model development and evaluation. It involves recording and analyzing various parameters, metrics, and outcomes from numerous training runs. This process is essential for understanding model performance and making data-driven decisions to refine and optimize models.
-Integrating DVCLive with [Ultralytics YOLOv8](https://www.ultralytics.com/) transforms the way experiments are tracked and managed. This integration offers a seamless solution for automatically logging key experiment details, comparing results across different runs, and visualizing data for in-depth analysis. In this guide, we'll understand how DVCLive can be used to streamline the process.
+Integrating DVCLive with [Ultralytics YOLO11](https://www.ultralytics.com/) transforms the way experiments are tracked and managed. This integration offers a seamless solution for automatically logging key experiment details, comparing results across different runs, and visualizing data for in-depth analysis. In this guide, we'll understand how DVCLive can be used to streamline the process.
## DVCLive
@@ -18,9 +18,9 @@ Integrating DVCLive with [Ultralytics YOLOv8](https://www.ultralytics.com/) tran
[DVCLive](https://dvc.org/doc/dvclive), developed by DVC, is an innovative open-source tool for experiment tracking in machine learning. Integrating seamlessly with Git and DVC, it automates the logging of crucial experiment data like model parameters and training metrics. Designed for simplicity, DVCLive enables effortless comparison and analysis of multiple runs, enhancing the efficiency of machine learning projects with intuitive [data visualization](https://www.ultralytics.com/glossary/data-visualization) and analysis tools.
-## YOLOv8 Training with DVCLive
+## YOLO11 Training with DVCLive
-YOLOv8 training sessions can be effectively monitored with DVCLive. Additionally, DVC provides integral features for visualizing these experiments, including the generation of a report that enables the comparison of metric plots across all tracked experiments, offering a comprehensive view of the training process.
+YOLO11 training sessions can be effectively monitored with DVCLive. Additionally, DVC provides integral features for visualizing these experiments, including the generation of a report that enables the comparison of metric plots across all tracked experiments, offering a comprehensive view of the training process.
## Installation
@@ -31,11 +31,11 @@ To install the required packages, run:
=== "CLI"
```bash
- # Install the required packages for YOLOv8 and DVCLive
+ # Install the required packages for YOLO11 and DVCLive
pip install ultralytics dvclive
```
-For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
## Configuring DVCLive
@@ -66,27 +66,27 @@ In these commands, ensure to replace "you@example.com" with the email address as
## Usage
-Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
+Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
-### Training YOLOv8 Models with DVCLive
+### Training YOLO11 Models with DVCLive
-Start by running your YOLOv8 training sessions. You can use different model configurations and training parameters to suit your project needs. For instance:
+Start by running your YOLO11 training sessions. You can use different model configurations and training parameters to suit your project needs. For instance:
```bash
-# Example training commands for YOLOv8 with varying configurations
-yolo train model=yolov8n.pt data=coco8.yaml epochs=5 imgsz=512
-yolo train model=yolov8n.pt data=coco8.yaml epochs=5 imgsz=640
+# Example training commands for YOLO11 with varying configurations
+yolo train model=yolo11n.pt data=coco8.yaml epochs=5 imgsz=512
+yolo train model=yolo11n.pt data=coco8.yaml epochs=5 imgsz=640
```
-Adjust the model, data, [epochs](https://www.ultralytics.com/glossary/epoch), and imgsz parameters according to your specific requirements. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+Adjust the model, data, [epochs](https://www.ultralytics.com/glossary/epoch), and imgsz parameters according to your specific requirements. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md).
### Monitoring Experiments with DVCLive
-DVCLive enhances the training process by enabling the tracking and visualization of key metrics. When installed, Ultralytics YOLOv8 automatically integrates with DVCLive for experiment tracking, which you can later analyze for performance insights. For a comprehensive understanding of the specific performance metrics used during training, be sure to explore [our detailed guide on performance metrics](../guides/yolo-performance-metrics.md).
+DVCLive enhances the training process by enabling the tracking and visualization of key metrics. When installed, Ultralytics YOLO11 automatically integrates with DVCLive for experiment tracking, which you can later analyze for performance insights. For a comprehensive understanding of the specific performance metrics used during training, be sure to explore [our detailed guide on performance metrics](../guides/yolo-performance-metrics.md).
### Analyzing Results
-After your YOLOv8 training sessions are complete, you can leverage DVCLive's powerful visualization tools for in-depth analysis of the results. DVCLive's integration ensures that all training metrics are systematically logged, facilitating a comprehensive evaluation of your model's performance.
+After your YOLO11 training sessions are complete, you can leverage DVCLive's powerful visualization tools for in-depth analysis of the results. DVCLive's integration ensures that all training metrics are systematically logged, facilitating a comprehensive evaluation of your model's performance.
To start the analysis, you can extract the experiment data using DVC's API and process it with Pandas for easier handling and visualization:
@@ -108,7 +108,7 @@ df.reset_index(drop=True, inplace=True)
print(df)
```
-The output of the code snippet above provides a clear tabular view of the different experiments conducted with YOLOv8 models. Each row represents a different training run, detailing the experiment's name, the number of epochs, image size (imgsz), the specific model used, and the mAP50-95(B) metric. This metric is crucial for evaluating the model's [accuracy](https://www.ultralytics.com/glossary/accuracy), with higher values indicating better performance.
+The output of the code snippet above provides a clear tabular view of the different experiments conducted with YOLO11 models. Each row represents a different training run, detailing the experiment's name, the number of epochs, image size (imgsz), the specific model used, and the mAP50-95(B) metric. This metric is crucial for evaluating the model's [accuracy](https://www.ultralytics.com/glossary/accuracy), with higher values indicating better performance.
#### Visualizing Results with Plotly
@@ -164,7 +164,7 @@ Based on your analysis, iterate on your experiments. Adjust model configurations
## Summary
-This guide has led you through the process of integrating DVCLive with Ultralytics' YOLOv8. You have learned how to harness the power of DVCLive for detailed experiment monitoring, effective visualization, and insightful analysis in your machine learning endeavors.
+This guide has led you through the process of integrating DVCLive with Ultralytics' YOLO11. You have learned how to harness the power of DVCLive for detailed experiment monitoring, effective visualization, and insightful analysis in your machine learning endeavors.
For further details on usage, visit [DVCLive's official documentation](https://dvc.org/doc/dvclive/ml-frameworks/yolo).
@@ -172,9 +172,9 @@ Additionally, explore more integrations and capabilities of Ultralytics by visit
## FAQ
-### How do I integrate DVCLive with Ultralytics YOLOv8 for experiment tracking?
+### How do I integrate DVCLive with Ultralytics YOLO11 for experiment tracking?
-Integrating DVCLive with Ultralytics YOLOv8 is straightforward. Start by installing the necessary packages:
+Integrating DVCLive with Ultralytics YOLO11 is straightforward. Start by installing the necessary packages:
!!! example "Installation"
@@ -198,21 +198,21 @@ Next, initialize a Git repository and configure DVCLive in your project:
git commit -m "DVC init"
```
-Follow our [YOLOv8 Installation guide](../quickstart.md) for detailed setup instructions.
+Follow our [YOLO11 Installation guide](../quickstart.md) for detailed setup instructions.
-### Why should I use DVCLive for tracking YOLOv8 experiments?
+### Why should I use DVCLive for tracking YOLO11 experiments?
-Using DVCLive with YOLOv8 provides several advantages, such as:
+Using DVCLive with YOLO11 provides several advantages, such as:
- **Automated Logging**: DVCLive automatically records key experiment details like model parameters and metrics.
- **Easy Comparison**: Facilitates comparison of results across different runs.
- **Visualization Tools**: Leverages DVCLive's robust data visualization capabilities for in-depth analysis.
-For further details, refer to our guide on [YOLOv8 Model Training](../modes/train.md) and [YOLO Performance Metrics](../guides/yolo-performance-metrics.md) to maximize your experiment tracking efficiency.
+For further details, refer to our guide on [YOLO11 Model Training](../modes/train.md) and [YOLO Performance Metrics](../guides/yolo-performance-metrics.md) to maximize your experiment tracking efficiency.
-### How can DVCLive improve my results analysis for YOLOv8 training sessions?
+### How can DVCLive improve my results analysis for YOLO11 training sessions?
-After completing your YOLOv8 training sessions, DVCLive helps in visualizing and analyzing the results effectively. Example code for loading and displaying experiment data:
+After completing your YOLO11 training sessions, DVCLive helps in visualizing and analyzing the results effectively. Example code for loading and displaying experiment data:
```python
import dvc.api
@@ -241,11 +241,11 @@ fig = parallel_coordinates(df, columns, color="metrics.mAP50-95(B)")
fig.show()
```
-Refer to our guide on [YOLOv8 Training with DVCLive](#yolov8-training-with-dvclive) for more examples and best practices.
+Refer to our guide on [YOLO11 Training with DVCLive](#yolo11-training-with-dvclive) for more examples and best practices.
-### What are the steps to configure my environment for DVCLive and YOLOv8 integration?
+### What are the steps to configure my environment for DVCLive and YOLO11 integration?
-To configure your environment for a smooth integration of DVCLive and YOLOv8, follow these steps:
+To configure your environment for a smooth integration of DVCLive and YOLO11, follow these steps:
1. **Install Required Packages**: Use `pip install ultralytics dvclive`.
2. **Initialize Git Repository**: Run `git init -q`.
@@ -254,9 +254,9 @@ To configure your environment for a smooth integration of DVCLive and YOLOv8, fo
These steps ensure proper version control and setup for experiment tracking. For in-depth configuration details, visit our [Configuration guide](../quickstart.md).
-### How do I visualize YOLOv8 experiment results using DVCLive?
+### How do I visualize YOLO11 experiment results using DVCLive?
-DVCLive offers powerful tools to visualize the results of YOLOv8 experiments. Here's how you can generate comparative plots:
+DVCLive offers powerful tools to visualize the results of YOLO11 experiments. Here's how you can generate comparative plots:
!!! example "Generate Comparative Plots"
@@ -275,4 +275,4 @@ from IPython.display import HTML
HTML(filename="./dvc_plots/index.html")
```
-These visualizations help identify trends and optimize model performance. Check our detailed guides on [YOLOv8 Experiment Analysis](#analyzing-results) for comprehensive steps and examples.
+These visualizations help identify trends and optimize model performance. Check our detailed guides on [YOLO11 Experiment Analysis](#analyzing-results) for comprehensive steps and examples.
diff --git a/docs/en/integrations/edge-tpu.md b/docs/en/integrations/edge-tpu.md
index d72410c5a77..f8821e47bd4 100644
--- a/docs/en/integrations/edge-tpu.md
+++ b/docs/en/integrations/edge-tpu.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to export YOLOv8 models to TFLite Edge TPU format for high-speed, low-power inferencing on mobile and embedded devices.
-keywords: YOLOv8, TFLite Edge TPU, TensorFlow Lite, model export, machine learning, edge computing, neural networks, Ultralytics
+description: Learn how to export YOLO11 models to TFLite Edge TPU format for high-speed, low-power inferencing on mobile and embedded devices.
+keywords: YOLO11, TFLite Edge TPU, TensorFlow Lite, model export, machine learning, edge computing, neural networks, Ultralytics
---
-# Learn to Export to TFLite Edge TPU Format From YOLOv8 Model
+# Learn to Export to TFLite Edge TPU Format From YOLO11 Model
Deploying computer vision models on devices with limited computational power, such as mobile or embedded systems, can be tricky. Using a model format that is optimized for faster performance simplifies the process. The [TensorFlow Lite](https://ai.google.dev/edge/litert) [Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) or TFLite Edge TPU model format is designed to use minimal power while delivering fast performance for neural networks.
-The export to TFLite Edge TPU format feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through converting your models to the TFLite Edge TPU format, making it easier for your models to perform well on various mobile and embedded devices.
+The export to TFLite Edge TPU format feature allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through converting your models to the TFLite Edge TPU format, making it easier for your models to perform well on various mobile and embedded devices.
## Why Should You Export to TFLite Edge TPU?
@@ -32,7 +32,7 @@ Here are the key features that make TFLite Edge TPU a great model format choice
## Deployment Options with TFLite Edge TPU
-Before we jump into how to export YOLOv8 models to the TFLite Edge TPU format, let's understand where TFLite Edge TPU models are usually used.
+Before we jump into how to export YOLO11 models to the TFLite Edge TPU format, let's understand where TFLite Edge TPU models are usually used.
TFLite Edge TPU offers various deployment options for machine learning models, including:
@@ -42,9 +42,9 @@ TFLite Edge TPU offers various deployment options for machine learning models, i
- **Hybrid Deployment**: A hybrid approach combines on-device and cloud deployment and offers a versatile and scalable solution for deploying machine learning models. Advantages include on-device processing for quick responses and [cloud computing](https://www.ultralytics.com/glossary/cloud-computing) for more complex computations.
-## Exporting YOLOv8 Models to TFLite Edge TPU
+## Exporting YOLO11 Models to TFLite Edge TPU
-You can expand model compatibility and deployment flexibility by converting YOLOv8 models to TensorFlow Edge TPU.
+You can expand model compatibility and deployment flexibility by converting YOLO11 models to TensorFlow Edge TPU.
### Installation
@@ -55,15 +55,15 @@ To install the required package, run:
=== "CLI"
```bash
- # Install the required package for YOLOv8
+ # Install the required package for YOLO11
pip install ultralytics
```
-For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
### Usage
-Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
+Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
!!! example "Usage"
@@ -72,14 +72,14 @@ Before diving into the usage instructions, it's important to note that while all
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to TFLite Edge TPU format
- model.export(format="edgetpu") # creates 'yolov8n_full_integer_quant_edgetpu.tflite'
+ model.export(format="edgetpu") # creates 'yolo11n_full_integer_quant_edgetpu.tflite'
# Load the exported TFLite Edge TPU model
- edgetpu_model = YOLO("yolov8n_full_integer_quant_edgetpu.tflite")
+ edgetpu_model = YOLO("yolo11n_full_integer_quant_edgetpu.tflite")
# Run inference
results = edgetpu_model("https://ultralytics.com/images/bus.jpg")
@@ -88,22 +88,22 @@ Before diving into the usage instructions, it's important to note that while all
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to TFLite Edge TPU format
- yolo export model=yolov8n.pt format=edgetpu # creates 'yolov8n_full_integer_quant_edgetpu.tflite'
+ # Export a YOLO11n PyTorch model to TFLite Edge TPU format
+ yolo export model=yolo11n.pt format=edgetpu # creates 'yolo11n_full_integer_quant_edgetpu.tflite'
# Run inference with the exported model
- yolo predict model=yolov8n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg'
```
For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md).
-## Deploying Exported YOLOv8 TFLite Edge TPU Models
+## Deploying Exported YOLO11 TFLite Edge TPU Models
-After successfully exporting your Ultralytics YOLOv8 models to TFLite Edge TPU format, you can now deploy them. The primary and recommended first step for running a TFLite Edge TPU model is to use the YOLO("model_edgetpu.tflite") method, as outlined in the previous usage code snippet.
+After successfully exporting your Ultralytics YOLO11 models to TFLite Edge TPU format, you can now deploy them. The primary and recommended first step for running a TFLite Edge TPU model is to use the YOLO("model_edgetpu.tflite") method, as outlined in the previous usage code snippet.
However, for in-depth instructions on deploying your TFLite Edge TPU models, take a look at the following resources:
-- **[Coral Edge TPU on a Raspberry Pi with Ultralytics YOLOv8](../guides/coral-edge-tpu-on-raspberry-pi.md)**: Discover how to integrate Coral Edge TPUs with Raspberry Pi for enhanced machine learning capabilities.
+- **[Coral Edge TPU on a Raspberry Pi with Ultralytics YOLO11](../guides/coral-edge-tpu-on-raspberry-pi.md)**: Discover how to integrate Coral Edge TPUs with Raspberry Pi for enhanced machine learning capabilities.
- **[Code Examples](https://coral.ai/docs/edgetpu/compiler/)**: Access practical TensorFlow Edge TPU deployment examples to kickstart your projects.
@@ -111,17 +111,17 @@ However, for in-depth instructions on deploying your TFLite Edge TPU models, tak
## Summary
-In this guide, we've learned how to export Ultralytics YOLOv8 models to TFLite Edge TPU format. By following the steps mentioned above, you can increase the speed and power of your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications.
+In this guide, we've learned how to export Ultralytics YOLO11 models to TFLite Edge TPU format. By following the steps mentioned above, you can increase the speed and power of your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications.
For further details on usage, visit the [Edge TPU official website](https://cloud.google.com/tpu).
-Also, for more information on other Ultralytics YOLOv8 integrations, please visit our [integration guide page](index.md). There, you'll discover valuable resources and insights.
+Also, for more information on other Ultralytics YOLO11 integrations, please visit our [integration guide page](index.md). There, you'll discover valuable resources and insights.
## FAQ
-### How do I export a YOLOv8 model to TFLite Edge TPU format?
+### How do I export a YOLO11 model to TFLite Edge TPU format?
-To export a YOLOv8 model to TFLite Edge TPU format, you can follow these steps:
+To export a YOLO11 model to TFLite Edge TPU format, you can follow these steps:
!!! example "Usage"
@@ -130,14 +130,14 @@ To export a YOLOv8 model to TFLite Edge TPU format, you can follow these steps:
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to TFLite Edge TPU format
- model.export(format="edgetpu") # creates 'yolov8n_full_integer_quant_edgetpu.tflite'
+ model.export(format="edgetpu") # creates 'yolo11n_full_integer_quant_edgetpu.tflite'
# Load the exported TFLite Edge TPU model
- edgetpu_model = YOLO("yolov8n_full_integer_quant_edgetpu.tflite")
+ edgetpu_model = YOLO("yolo11n_full_integer_quant_edgetpu.tflite")
# Run inference
results = edgetpu_model("https://ultralytics.com/images/bus.jpg")
@@ -146,18 +146,18 @@ To export a YOLOv8 model to TFLite Edge TPU format, you can follow these steps:
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to TFLite Edge TPU format
- yolo export model=yolov8n.pt format=edgetpu # creates 'yolov8n_full_integer_quant_edgetpu.tflite'
+ # Export a YOLO11n PyTorch model to TFLite Edge TPU format
+ yolo export model=yolo11n.pt format=edgetpu # creates 'yolo11n_full_integer_quant_edgetpu.tflite'
# Run inference with the exported model
- yolo predict model=yolov8n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg'
```
For complete details on exporting models to other formats, refer to our [export guide](../modes/export.md).
-### What are the benefits of exporting YOLOv8 models to TFLite Edge TPU?
+### What are the benefits of exporting YOLO11 models to TFLite Edge TPU?
-Exporting YOLOv8 models to TFLite Edge TPU offers several benefits:
+Exporting YOLO11 models to TFLite Edge TPU offers several benefits:
- **Optimized Performance**: Achieve high-speed neural network performance with minimal power consumption.
- **Reduced Latency**: Quick local data processing without the need for cloud dependency.
diff --git a/docs/en/integrations/google-colab.md b/docs/en/integrations/google-colab.md
index 2c45528c192..2c242f6f824 100644
--- a/docs/en/integrations/google-colab.md
+++ b/docs/en/integrations/google-colab.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to efficiently train Ultralytics YOLOv8 models using Google Colab's powerful cloud-based environment. Start your project with ease.
-keywords: YOLOv8, Google Colab, machine learning, deep learning, model training, GPU, TPU, cloud computing, Jupyter Notebook, Ultralytics
+description: Learn how to efficiently train Ultralytics YOLO11 models using Google Colab's powerful cloud-based environment. Start your project with ease.
+keywords: YOLO11, Google Colab, machine learning, deep learning, model training, GPU, TPU, cloud computing, Jupyter Notebook, Ultralytics
---
-# Accelerating YOLOv8 Projects with Google Colab
+# Accelerating YOLO11 Projects with Google Colab
Many developers lack the powerful computing resources needed to build [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models. Acquiring high-end hardware or renting a decent GPU can be expensive. Google Colab is a great solution to this. It's a browser-based platform that allows you to work with large datasets, develop complex models, and share your work with others without a huge cost.
-You can use Google Colab to work on projects related to [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models. Google Colab's user-friendly environment is well suited for efficient model development and experimentation. Let's learn more about Google Colab, its key features, and how you can use it to train YOLOv8 models.
+You can use Google Colab to work on projects related to [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models. Google Colab's user-friendly environment is well suited for efficient model development and experimentation. Let's learn more about Google Colab, its key features, and how you can use it to train YOLO11 models.
## Google Colaboratory
@@ -16,15 +16,15 @@ Google Colaboratory, commonly known as Google Colab, was developed by Google Res
You can use Google Colab regardless of the specifications and configurations of your local computer. All you need is a Google account and a web browser, and you're good to go.
-## Training YOLOv8 Using Google Colaboratory
+## Training YOLO11 Using Google Colaboratory
-Training YOLOv8 models on Google Colab is pretty straightforward. Thanks to the integration, you can access the [Google Colab YOLOv8 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+Training YOLO11 models on Google Colab is pretty straightforward. Thanks to the integration, you can access the [Google Colab YOLO11 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md).
Sign in to your Google account and run the notebook's cells to train your model.
-
+
-Learn how to train a YOLOv8 model with custom data on YouTube with Nicolai. Check out the guide below.
+Learn how to train a YOLO11 model with custom data on YouTube with Nicolai. Check out the guide below.
@@ -34,7 +34,7 @@ Learn how to train a YOLOv8 model with custom data on YouTube with Nicolai. Chec
allowfullscreen>
- Watch: How to Train Ultralytics YOLOv8 models on Your Custom Dataset in Google Colab | Episode 3
+ Watch: How to Train Ultralytics YOLO11 models on Your Custom Dataset in Google Colab | Episode 3
### Common Questions While Working with Google Colab
@@ -75,9 +75,9 @@ Now, let's look at some of the standout features that make Google Colab a go-to
- **Educational Resources:** Google Colab offers a range of tutorials and example notebooks to help users learn and explore various functionalities.
-## Why Should You Use Google Colab for Your YOLOv8 Projects?
+## Why Should You Use Google Colab for Your YOLO11 Projects?
-There are many options for training and evaluating YOLOv8 models, so what makes the integration with Google Colab unique? Let's explore the advantages of this integration:
+There are many options for training and evaluating YOLO11 models, so what makes the integration with Google Colab unique? Let's explore the advantages of this integration:
- **Zero Setup:** Since Colab runs in the cloud, users can start training models immediately without the need for complex environment setups. Just create an account and start coding.
@@ -95,7 +95,7 @@ There are many options for training and evaluating YOLOv8 models, so what makes
If you'd like to dive deeper into Google Colab, here are a few resources to guide you.
-- **[Training Custom Datasets with Ultralytics YOLOv8 in Google Colab](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab)**: Learn how to train custom datasets with Ultralytics YOLOv8 on Google Colab. This comprehensive blog post will take you through the entire process, from initial setup to the training and evaluation stages.
+- **[Training Custom Datasets with Ultralytics YOLO11 in Google Colab](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab)**: Learn how to train custom datasets with Ultralytics YOLO11 on Google Colab. This comprehensive blog post will take you through the entire process, from initial setup to the training and evaluation stages.
- **[Curated Notebooks](https://colab.google/notebooks/)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas.
@@ -103,21 +103,21 @@ If you'd like to dive deeper into Google Colab, here are a few resources to guid
## Summary
-We've discussed how you can easily experiment with Ultralytics YOLOv8 models on Google Colab. You can use Google Colab to train and evaluate your models on GPUs and TPUs with a few clicks.
+We've discussed how you can easily experiment with Ultralytics YOLO11 models on Google Colab. You can use Google Colab to train and evaluate your models on GPUs and TPUs with a few clicks.
For more details, visit [Google Colab's FAQ page](https://research.google.com/colaboratory/intl/en-GB/faq.html).
-Interested in more YOLOv8 integrations? Visit the [Ultralytics integration guide page](index.md) to explore additional tools and capabilities that can improve your machine-learning projects.
+Interested in more YOLO11 integrations? Visit the [Ultralytics integration guide page](index.md) to explore additional tools and capabilities that can improve your machine-learning projects.
## FAQ
-### How do I start training Ultralytics YOLOv8 models on Google Colab?
+### How do I start training Ultralytics YOLO11 models on Google Colab?
-To start training Ultralytics YOLOv8 models on Google Colab, sign in to your Google account, then access the [Google Colab YOLOv8 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb). This notebook guides you through the setup and training process. After launching the notebook, run the cells step-by-step to train your model. For a full guide, refer to the [YOLOv8 Model Training guide](../modes/train.md).
+To start training Ultralytics YOLO11 models on Google Colab, sign in to your Google account, then access the [Google Colab YOLO11 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb). This notebook guides you through the setup and training process. After launching the notebook, run the cells step-by-step to train your model. For a full guide, refer to the [YOLO11 Model Training guide](../modes/train.md).
-### What are the advantages of using Google Colab for training YOLOv8 models?
+### What are the advantages of using Google Colab for training YOLO11 models?
-Google Colab offers several advantages for training YOLOv8 models:
+Google Colab offers several advantages for training YOLO11 models:
- **Zero Setup:** No initial environment setup is required; just log in and start coding.
- **Free GPU Access:** Use powerful GPUs or TPUs without the need for expensive hardware.
@@ -126,7 +126,7 @@ Google Colab offers several advantages for training YOLOv8 models:
For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://colab.google/notebooks/).
-### How can I handle Google Colab session timeouts during YOLOv8 training?
+### How can I handle Google Colab session timeouts during YOLO11 training?
Google Colab sessions timeout due to inactivity, especially for free users. To handle this:
@@ -136,9 +136,9 @@ Google Colab sessions timeout due to inactivity, especially for free users. To h
For more tips on managing your Colab session, visit the [Google Colab FAQ page](https://research.google.com/colaboratory/intl/en-GB/faq.html).
-### Can I use custom datasets for training YOLOv8 models in Google Colab?
+### Can I use custom datasets for training YOLO11 models in Google Colab?
-Yes, you can use custom datasets to train YOLOv8 models in Google Colab. Upload your dataset to Google Drive and load it directly into your Colab notebook. You can follow Nicolai's YouTube guide, [How to Train YOLOv8 Models on Your Custom Dataset](https://www.youtube.com/watch?v=LNwODJXcvt4), or refer to the [Custom Dataset Training guide](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab) for detailed steps.
+Yes, you can use custom datasets to train YOLO11 models in Google Colab. Upload your dataset to Google Drive and load it directly into your Colab notebook. You can follow Nicolai's YouTube guide, [How to Train YOLO11 Models on Your Custom Dataset](https://www.youtube.com/watch?v=LNwODJXcvt4), or refer to the [Custom Dataset Training guide](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab) for detailed steps.
### What should I do if my Google Colab training session is interrupted?
diff --git a/docs/en/integrations/gradio.md b/docs/en/integrations/gradio.md
index b8a3644a6b3..3199a519cc0 100644
--- a/docs/en/integrations/gradio.md
+++ b/docs/en/integrations/gradio.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Discover an interactive way to perform object detection with Ultralytics YOLOv8 using Gradio. Upload images and adjust settings for real-time results.
-keywords: Ultralytics, YOLOv8, Gradio, object detection, interactive, real-time, image processing, AI
+description: Discover an interactive way to perform object detection with Ultralytics YOLO11 using Gradio. Upload images and adjust settings for real-time results.
+keywords: Ultralytics, YOLO11, Gradio, object detection, interactive, real-time, image processing, AI
---
-# Interactive [Object Detection](https://www.ultralytics.com/glossary/object-detection): Gradio & Ultralytics YOLOv8 ๐
+# Interactive [Object Detection](https://www.ultralytics.com/glossary/object-detection): Gradio & Ultralytics YOLO11 ๐
## Introduction to Interactive Object Detection
-This Gradio interface provides an easy and interactive way to perform object detection using the [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) model. Users can upload images and adjust parameters like confidence threshold and intersection-over-union (IoU) threshold to get real-time detection results.
+This Gradio interface provides an easy and interactive way to perform object detection using the [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) model. Users can upload images and adjust parameters like confidence threshold and intersection-over-union (IoU) threshold to get real-time detection results.
@@ -18,7 +18,7 @@ This Gradio interface provides an easy and interactive way to perform object det
allowfullscreen>
- Watch: Gradio Integration with Ultralytics YOLOv8
+ Watch: Gradio Integration with Ultralytics YOLO11
## Why Use Gradio for Object Detection?
@@ -52,7 +52,7 @@ pip install gradio
## Usage Example
-This section provides the Python code used to create the Gradio interface with the Ultralytics YOLOv8 model. Supports classification tasks, detection tasks, segmentation tasks, and key point tasks.
+This section provides the Python code used to create the Gradio interface with the Ultralytics YOLO11 model. Supports classification tasks, detection tasks, segmentation tasks, and key point tasks.
```python
import gradio as gr
@@ -60,11 +60,11 @@ import PIL.Image as Image
from ultralytics import ASSETS, YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
def predict_image(img, conf_threshold, iou_threshold):
- """Predicts and plots labeled objects in an image using YOLOv8 model with adjustable confidence and IOU thresholds."""
+ """Predicts objects in an image using a YOLO11 model with adjustable confidence and IOU thresholds."""
results = model.predict(
source=img,
conf=conf_threshold,
@@ -90,7 +90,7 @@ iface = gr.Interface(
],
outputs=gr.Image(type="pil", label="Result"),
title="Ultralytics Gradio",
- description="Upload images for inference. The Ultralytics YOLOv8n model is used by default.",
+ description="Upload images for inference. The Ultralytics YOLO11n model is used by default.",
examples=[
[ASSETS / "bus.jpg", 0.25, 0.45],
[ASSETS / "zidane.jpg", 0.25, 0.45],
@@ -119,9 +119,9 @@ if __name__ == "__main__":
## FAQ
-### How do I use Gradio with Ultralytics YOLOv8 for object detection?
+### How do I use Gradio with Ultralytics YOLO11 for object detection?
-To use Gradio with Ultralytics YOLOv8 for object detection, you can follow these steps:
+To use Gradio with Ultralytics YOLO11 for object detection, you can follow these steps:
1. **Install Gradio:** Use the command `pip install gradio`.
2. **Create Interface:** Write a Python script to initialize the Gradio interface. You can refer to the provided code example in the [documentation](#usage-example) for details.
@@ -134,7 +134,7 @@ import gradio as gr
from ultralytics import YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
def predict_image(img, conf_threshold, iou_threshold):
@@ -156,15 +156,15 @@ iface = gr.Interface(
gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU threshold"),
],
outputs=gr.Image(type="pil", label="Result"),
- title="Ultralytics Gradio YOLOv8",
- description="Upload images for YOLOv8 object detection.",
+ title="Ultralytics Gradio YOLO11",
+ description="Upload images for YOLO11 object detection.",
)
iface.launch()
```
-### What are the benefits of using Gradio for Ultralytics YOLOv8 object detection?
+### What are the benefits of using Gradio for Ultralytics YOLO11 object detection?
-Using Gradio for Ultralytics YOLOv8 object detection offers several benefits:
+Using Gradio for Ultralytics YOLO11 object detection offers several benefits:
- **User-Friendly Interface:** Gradio provides an intuitive interface for users to upload images and visualize detection results without any coding effort.
- **Real-Time Adjustments:** You can dynamically adjust detection parameters such as confidence and IoU thresholds and see the effects immediately.
@@ -172,22 +172,22 @@ Using Gradio for Ultralytics YOLOv8 object detection offers several benefits:
For more details, you can read this [blog post](https://www.ultralytics.com/blog/ai-and-radiology-a-new-era-of-precision-and-efficiency).
-### Can I use Gradio and Ultralytics YOLOv8 together for educational purposes?
+### Can I use Gradio and Ultralytics YOLO11 together for educational purposes?
-Yes, Gradio and Ultralytics YOLOv8 can be utilized together for educational purposes effectively. Gradio's intuitive web interface makes it easy for students and educators to interact with state-of-the-art [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models like Ultralytics YOLOv8 without needing advanced programming skills. This setup is ideal for demonstrating key concepts in object detection and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), as Gradio provides immediate visual feedback which helps in understanding the impact of different parameters on the detection performance.
+Yes, Gradio and Ultralytics YOLO11 can be utilized together for educational purposes effectively. Gradio's intuitive web interface makes it easy for students and educators to interact with state-of-the-art [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models like Ultralytics YOLO11 without needing advanced programming skills. This setup is ideal for demonstrating key concepts in object detection and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), as Gradio provides immediate visual feedback which helps in understanding the impact of different parameters on the detection performance.
-### How do I adjust the confidence and IoU thresholds in the Gradio interface for YOLOv8?
+### How do I adjust the confidence and IoU thresholds in the Gradio interface for YOLO11?
-In the Gradio interface for YOLOv8, you can adjust the confidence and IoU thresholds using the sliders provided. These thresholds help control the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) and object separation:
+In the Gradio interface for YOLO11, you can adjust the confidence and IoU thresholds using the sliders provided. These thresholds help control the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) and object separation:
- **Confidence Threshold:** Determines the minimum confidence level for detecting objects. Slide to increase or decrease the confidence required.
- **IoU Threshold:** Sets the intersection-over-union threshold for distinguishing between overlapping objects. Adjust this value to refine object separation.
For more information on these parameters, visit the [parameters explanation section](#parameters-explanation).
-### What are some practical applications of using Ultralytics YOLOv8 with Gradio?
+### What are some practical applications of using Ultralytics YOLO11 with Gradio?
-Practical applications of combining Ultralytics YOLOv8 with Gradio include:
+Practical applications of combining Ultralytics YOLO11 with Gradio include:
- **Real-Time Object Detection Demonstrations:** Ideal for showcasing how object detection works in real-time.
- **Educational Tools:** Useful in academic settings to teach object detection and computer vision concepts.
@@ -196,4 +196,4 @@ Practical applications of combining Ultralytics YOLOv8 with Gradio include:
For examples of similar use cases, check out the [Ultralytics blog](https://www.ultralytics.com/blog/monitoring-animal-behavior-using-ultralytics-yolov8).
-Providing this information within the documentation will help in enhancing the usability and accessibility of Ultralytics YOLOv8, making it more approachable for users at all levels of expertise.
+Providing this information within the documentation will help in enhancing the usability and accessibility of Ultralytics YOLO11, making it more approachable for users at all levels of expertise.
diff --git a/docs/en/integrations/ibm-watsonx.md b/docs/en/integrations/ibm-watsonx.md
index cda19b055ce..0e77bc5e1bc 100644
--- a/docs/en/integrations/ibm-watsonx.md
+++ b/docs/en/integrations/ibm-watsonx.md
@@ -1,18 +1,18 @@
---
comments: true
-description: Dive into our detailed integration guide on using IBM Watson to train a YOLOv8 model. Uncover key features and step-by-step instructions on model training.
-keywords: IBM Watsonx, IBM Watsonx AI, What is Watson?, IBM Watson Integration, IBM Watson Features, YOLOv8, Ultralytics, Model Training, GPU, TPU, cloud computing
+description: Dive into our detailed integration guide on using IBM Watson to train a YOLO11 model. Uncover key features and step-by-step instructions on model training.
+keywords: IBM Watsonx, IBM Watsonx AI, What is Watson?, IBM Watson Integration, IBM Watson Features, YOLO11, Ultralytics, Model Training, GPU, TPU, cloud computing
---
-# A Step-by-Step Guide to Training YOLOv8 Models with IBM Watsonx
+# A Step-by-Step Guide to Training YOLO11 Models with IBM Watsonx
Nowadays, scalable [computer vision solutions](../guides/steps-of-a-cv-project.md) are becoming more common and transforming the way we handle visual data. A great example is IBM Watsonx, an advanced AI and data platform that simplifies the development, deployment, and management of AI models. It offers a complete suite for the entire AI lifecycle and seamless integration with IBM Cloud services.
-You can train [Ultralytics YOLOv8 models](https://github.com/ultralytics/ultralytics) using IBM Watsonx. It's a good option for enterprises interested in efficient [model training](../modes/train.md), fine-tuning for specific tasks, and improving [model performance](../guides/model-evaluation-insights.md) with robust tools and a user-friendly setup. In this guide, we'll walk you through the process of training YOLOv8 with IBM Watsonx, covering everything from setting up your environment to evaluating your trained models. Let's get started!
+You can train [Ultralytics YOLO11 models](https://github.com/ultralytics/ultralytics) using IBM Watsonx. It's a good option for enterprises interested in efficient [model training](../modes/train.md), fine-tuning for specific tasks, and improving [model performance](../guides/model-evaluation-insights.md) with robust tools and a user-friendly setup. In this guide, we'll walk you through the process of training YOLO11 with IBM Watsonx, covering everything from setting up your environment to evaluating your trained models. Let's get started!
## What is IBM Watsonx?
-[Watsonx](https://www.ibm.com/watsonx) is IBM's cloud-based platform designed for commercial [generative AI](https://www.ultralytics.com/glossary/generative-ai) and scientific data. IBM Watsonx's three components - watsonx.ai, watsonx.data, and watsonx.governance - come together to create an end-to-end, trustworthy AI platform that can accelerate AI projects aimed at solving business problems. It provides powerful tools for building, training, and [deploying machine learning models](../guides/model-deployment-options.md) and makes it easy to connect with various data sources.
+[Watsonx](https://www.ibm.com/watsonx) is IBM's cloud-based platform designed for commercial [generative AI](https://www.ultralytics.com/glossary/generative-ai) and scientific data. IBM Watsonx's three components - `watsonx.ai`, `watsonx.data`, and `watsonx.governance` - come together to create an end-to-end, trustworthy AI platform that can accelerate AI projects aimed at solving business problems. It provides powerful tools for building, training, and [deploying machine learning models](../guides/model-deployment-options.md) and makes it easy to connect with various data sources.
@@ -22,7 +22,7 @@ Its user-friendly interface and collaborative capabilities streamline the develo
## Key Features of IBM Watsonx
-IBM Watsonx is made of three main components: watsonx.ai, watsonx.data, and watsonx.governance. Each component offers features that cater to different aspects of AI and data management. Let's take a closer look at them.
+IBM Watsonx is made of three main components: `watsonx.ai`, `watsonx.data`, and `watsonx.governance`. Each component offers features that cater to different aspects of AI and data management. Let's take a closer look at them.
### [Watsonx.ai](https://www.ibm.com/products/watsonx-ai)
@@ -36,9 +36,9 @@ Watsonx.data supports both cloud and on-premises deployments through the IBM Sto
Watsonx.governance makes compliance easier by automatically identifying regulatory changes and enforcing policies. It links requirements to internal risk data and provides up-to-date AI factsheets. The platform helps manage risk with alerts and tools to detect issues such as [bias and drift](../guides/model-monitoring-and-maintenance.md). It also automates the monitoring and documentation of the AI lifecycle, organizes AI development with a model inventory, and enhances collaboration with user-friendly dashboards and reporting tools.
-## How to Train YOLOv8 Using IBM Watsonx
+## How to Train YOLO11 Using IBM Watsonx
-You can use IBM Watsonx to accelerate your YOLOv8 model training workflow.
+You can use IBM Watsonx to accelerate your YOLO11 model training workflow.
### Prerequisites
@@ -67,7 +67,7 @@ Next, you can install and import the necessary Python libraries.
pip install ultralytics==8.0.196
```
-For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
Then, you can import the needed packages.
@@ -86,7 +86,7 @@ Then, you can import the needed packages.
### Step 3: Load the Data
-For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLOv8 model to detect and classify litter and biological objects in underwater images.
+For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLO11 model to detect and classify litter and biological objects in underwater images.
We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md) under the section "API credentials".
@@ -133,7 +133,7 @@ After loading the dataset, we printed and saved our working directory. We have a
If you see "trash_ICRA19" among the directory's contents, then it has loaded successfully. You should see three files/folders: a `config.yaml` file, a `videos_for_testing` directory, and a `dataset` directory. We will ignore the `videos_for_testing` directory, so feel free to delete it.
-We will use the config.yaml file and the contents of the dataset directory to train our [object detection](https://www.ultralytics.com/glossary/object-detection) model. Here is a sample image from our marine litter data set.
+We will use the `config.yaml` file and the contents of the dataset directory to train our [object detection](https://www.ultralytics.com/glossary/object-detection) model. Here is a sample image from our marine litter data set.
@@ -205,14 +205,14 @@ names:
2: rov
```
-Run the following script to delete the current contents of config.yaml and replace it with the above contents that reflect our new data set directory structure. Be certain to replace the work_dir portion of the root directory path in line 4 with your own working directory path we retrieved earlier. Leave the train, val, and test subdirectory definitions. Also, do not change {work_dir} in line 23 of the code.
+Run the following script to delete the current contents of `config.yaml` and replace it with the above contents that reflect our new data set directory structure. Be certain to replace the work_dir portion of the root directory path in line 4 with your own working directory path we retrieved earlier. Leave the train, val, and test subdirectory definitions. Also, do not change {work_dir} in line 23 of the code.
!!! example "Edit the .yaml File"
=== "Python"
```python
- # Contents of new confg.yaml file
+ # Contents of new config.yaml file
def update_yaml_file(file_path):
data = {
"path": "work_dir/trash_ICRA19/dataset",
@@ -236,34 +236,34 @@ Run the following script to delete the current contents of config.yaml and repla
print(f"{file_path} updated successfully.")
```
-### Step 5: Train the YOLOv8 model
+### Step 5: Train the YOLO11 model
-Run the following command-line code to fine tune a pretrained default YOLOv8 model.
+Run the following command-line code to fine tune a pretrained default YOLO11 model.
-!!! example "Train the YOLOv8 model"
+!!! example "Train the YOLO11 model"
=== "CLI"
```bash
- !yolo task=detect mode=train data={work_dir}/trash_ICRA19/config.yaml model=yolov8s.pt epochs=2 batch=32 lr0=.04 plots=True
+ !yolo task=detect mode=train data={work_dir}/trash_ICRA19/config.yaml model=yolo11n.pt epochs=2 batch=32 lr0=.04 plots=True
```
Here's a closer look at the parameters in the model training command:
- **task**: It specifies the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) task for which you are using the specified YOLO model and data set.
- **mode**: Denotes the purpose for which you are loading the specified model and data. Since we are training a model, it is set to "train." Later, when we test our model's performance, we will set it to "predict."
-- **epochs**: This delimits the number of times YOLOv8 will pass through our entire data set.
+- **epochs**: This delimits the number of times YOLO11 will pass through our entire data set.
- **batch**: The numerical value stipulates the training [batch sizes](https://www.ultralytics.com/glossary/batch-size). Batches are the number of images a model processes before it updates its parameters.
- **lr0**: Specifies the model's initial [learning rate](https://www.ultralytics.com/glossary/learning-rate).
- **plots**: Directs YOLO to generate and save plots of our model's training and evaluation metrics.
-For a detailed understanding of the model training process and best practices, refer to the [YOLOv8 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLOv8 effectively.
+For a detailed understanding of the model training process and best practices, refer to the [YOLO11 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLO11 effectively.
### Step 6: Test the Model
We can now run inference to test the performance of our fine-tuned model:
-!!! example "Test the YOLOv8 model"
+!!! example "Test the YOLO11 model"
=== "CLI"
@@ -312,11 +312,11 @@ Unlike precision, recall moves in the opposite direction, showing greater recall
### Step 8: Calculating [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou)
-You can measure the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) by calculating the IoU between a predicted bounding box and a ground truth bounding box for the same object. Check out [IBM's tutorial on training YOLOv8](https://developer.ibm.com/tutorials/awb-train-yolo-object-detection-model-in-python/) for more details.
+You can measure the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) by calculating the IoU between a predicted bounding box and a ground truth bounding box for the same object. Check out [IBM's tutorial on training YOLO11](https://developer.ibm.com/tutorials/awb-train-yolo-object-detection-model-in-python/) for more details.
## Summary
-We explored IBM Watsonx key features, and how to train a YOLOv8 model using IBM Watsonx. We also saw how IBM Watsonx can enhance your AI workflows with advanced tools for model building, data management, and compliance.
+We explored IBM Watsonx key features, and how to train a YOLO11 model using IBM Watsonx. We also saw how IBM Watsonx can enhance your AI workflows with advanced tools for model building, data management, and compliance.
For further details on usage, visit [IBM Watsonx official documentation](https://www.ibm.com/watsonx).
@@ -324,9 +324,9 @@ Also, be sure to check out the [Ultralytics integration guide page](./index.md),
## FAQ
-### How do I train a YOLOv8 model using IBM Watsonx?
+### How do I train a YOLO11 model using IBM Watsonx?
-To train a YOLOv8 model using IBM Watsonx, follow these steps:
+To train a YOLO11 model using IBM Watsonx, follow these steps:
1. **Set Up Your Environment**: Create an IBM Cloud account and set up a Watsonx.ai project. Use a Jupyter Notebook for your coding environment.
2. **Install Libraries**: Install necessary libraries like `torch`, `opencv`, and `ultralytics`.
@@ -335,7 +335,7 @@ To train a YOLOv8 model using IBM Watsonx, follow these steps:
5. **Train the Model**: Use the YOLO command-line interface to train your model with specific parameters like `epochs`, `batch size`, and `learning rate`.
6. **Test and Evaluate**: Run inference to test the model and evaluate its performance using metrics like precision and recall.
-For detailed instructions, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+For detailed instructions, refer to our [YOLO11 Model Training guide](../modes/train.md).
### What are the key features of IBM Watsonx for AI model training?
@@ -347,20 +347,20 @@ IBM Watsonx offers several key features for AI model training:
For more information, visit the [IBM Watsonx official documentation](https://www.ibm.com/watsonx).
-### Why should I use IBM Watsonx for training Ultralytics YOLOv8 models?
+### Why should I use IBM Watsonx for training Ultralytics YOLO11 models?
-IBM Watsonx is an excellent choice for training Ultralytics YOLOv8 models due to its comprehensive suite of tools that streamline the AI lifecycle. Key benefits include:
+IBM Watsonx is an excellent choice for training Ultralytics YOLO11 models due to its comprehensive suite of tools that streamline the AI lifecycle. Key benefits include:
- **Scalability**: Easily scale your model training with IBM Cloud services.
- **Integration**: Seamlessly integrate with various data sources and APIs.
- **User-Friendly Interface**: Simplifies the development process with a collaborative and intuitive interface.
- **Advanced Tools**: Access to powerful tools like the Prompt Lab, Tuning Studio, and Flows Engine for enhancing model performance.
-Learn more about [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) and how to train models using IBM Watsonx in our [integration guide](./index.md).
+Learn more about [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) and how to train models using IBM Watsonx in our [integration guide](./index.md).
-### How can I preprocess my dataset for YOLOv8 training on IBM Watsonx?
+### How can I preprocess my dataset for YOLO11 training on IBM Watsonx?
-To preprocess your dataset for YOLOv8 training on IBM Watsonx:
+To preprocess your dataset for YOLO11 training on IBM Watsonx:
1. **Organize Directories**: Ensure your dataset follows the YOLO directory structure with separate subdirectories for images and labels within the train/val/test split.
2. **Update .yaml File**: Modify the `.yaml` configuration file to reflect the new directory structure and class names.
@@ -399,9 +399,9 @@ if __name__ == "__main__":
For more details, refer to our [data preprocessing guide](../guides/preprocessing_annotated_data.md).
-### What are the prerequisites for training a YOLOv8 model on IBM Watsonx?
+### What are the prerequisites for training a YOLO11 model on IBM Watsonx?
-Before you start training a YOLOv8 model on IBM Watsonx, ensure you have the following prerequisites:
+Before you start training a YOLO11 model on IBM Watsonx, ensure you have the following prerequisites:
- **IBM Cloud Account**: Create an account on IBM Cloud to access Watsonx.ai.
- **Kaggle Account**: For loading datasets, you'll need a Kaggle account and an API key.
diff --git a/docs/en/integrations/index.md b/docs/en/integrations/index.md
index 391b1ecb81a..4b91b18f2ec 100644
--- a/docs/en/integrations/index.md
+++ b/docs/en/integrations/index.md
@@ -18,7 +18,7 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of
allowfullscreen>
- Watch: Ultralytics YOLOv8 Deployment and Integrations
+ Watch: Ultralytics YOLO11 Deployment and Integrations
## Datasets Integrations
@@ -27,67 +27,77 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of
## Training Integrations
+- [Amazon SageMaker](amazon-sagemaker.md): Leverage Amazon SageMaker to efficiently build, train, and deploy Ultralytics models, providing an all-in-one platform for the ML lifecycle.
+
- [ClearML](clearml.md): Automate your Ultralytics ML workflows, monitor experiments, and foster team collaboration.
- [Comet ML](comet.md): Enhance your model development with Ultralytics by tracking, comparing, and optimizing your machine learning experiments.
- [DVC](dvc.md): Implement version control for your Ultralytics machine learning projects, synchronizing data, code, and models effectively.
-- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment.
+- [Google Colab](google-colab.md): Use Google Colab to train and evaluate Ultralytics models in a cloud-based environment that supports collaboration and sharing.
-- [Ultralytics HUB](https://hub.ultralytics.com/): Access and contribute to a community of pre-trained Ultralytics models.
+- [IBM Watsonx](ibm-watsonx.md): See how IBM Watsonx simplifies the training and evaluation of Ultralytics models with its cutting-edge AI tools, effortless integration, and advanced model management system.
+
+- [JupyterLab](jupyterlab.md): Find out how to use JupyterLab's interactive and customizable environment to train and evaluate Ultralytics models with ease and efficiency.
+
+- [Kaggle](kaggle.md): Explore how you can use Kaggle to train and evaluate Ultralytics models in a cloud-based environment with pre-installed libraries, GPU support, and a vibrant community for collaboration and sharing.
+
+- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment.
- [Neptune](https://neptune.ai/): Maintain a comprehensive log of your ML experiments with Ultralytics in this metadata store designed for MLOps.
+- [Paperspace Gradient](paperspace.md): Paperspace Gradient simplifies working on YOLO11 projects by providing easy-to-use cloud tools for training, testing, and deploying your models quickly.
+
- [Ray Tune](ray-tune.md): Optimize the hyperparameters of your Ultralytics models at any scale.
- [TensorBoard](tensorboard.md): Visualize your Ultralytics ML workflows, monitor model metrics, and foster team collaboration.
+- [Ultralytics HUB](https://hub.ultralytics.com/): Access and contribute to a community of pre-trained Ultralytics models.
+
- [Weights & Biases (W&B)](weights-biases.md): Monitor experiments, visualize metrics, and foster reproducibility and collaboration on Ultralytics projects.
-- [Amazon SageMaker](amazon-sagemaker.md): Leverage Amazon SageMaker to efficiently build, train, and deploy Ultralytics models, providing an all-in-one platform for the ML lifecycle.
+- [VS Code](vscode.md): An extension for VS Code that provides code snippets for accelerating development workflows with Ultralytics and also for anyone looking for examples to help learn or get started with Ultralytics.
-- [Paperspace Gradient](paperspace.md): Paperspace Gradient simplifies working on YOLOv8 projects by providing easy-to-use cloud tools for training, testing, and deploying your models quickly.
+- [Albumentations](albumentations.md): Enhance your Ultralytics models with powerful image augmentations to improve model robustness and generalization.
-- [Google Colab](google-colab.md): Use Google Colab to train and evaluate Ultralytics models in a cloud-based environment that supports collaboration and sharing.
+## Deployment Integrations
-- [Kaggle](kaggle.md): Explore how you can use Kaggle to train and evaluate Ultralytics models in a cloud-based environment with pre-installed libraries, GPU support, and a vibrant community for collaboration and sharing.
+- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment).
-- [JupyterLab](jupyterlab.md): Find out how to use JupyterLab's interactive and customizable environment to train and evaluate Ultralytics models with ease and efficiency.
+- [Gradio](gradio.md) ๐ NEW: Deploy Ultralytics models with Gradio for real-time, interactive object detection demos.
-- [IBM Watsonx](ibm-watsonx.md): See how IBM Watsonx simplifies the training and evaluation of Ultralytics models with its cutting-edge AI tools, effortless integration, and advanced model management system.
+- [NCNN](ncnn.md): Developed by [Tencent](http://www.tencent.com/), NCNN is an efficient [neural network](https://www.ultralytics.com/glossary/neural-network-nn) inference framework tailored for mobile devices. It enables direct deployment of AI models into apps, optimizing performance across various mobile platforms.
-## Deployment Integrations
+- [MNN](mnn.md): Developed by [Alibaba](https://www.alibabagroup.com/), MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device.
- [Neural Magic](neural-magic.md): Leverage Quantization Aware Training (QAT) and pruning techniques to optimize Ultralytics models for superior performance and leaner size.
-- [Gradio](gradio.md) ๐ NEW: Deploy Ultralytics models with Gradio for real-time, interactive object detection demos.
-
-- [TorchScript](torchscript.md): Developed as part of the [PyTorch](https://pytorch.org/) framework, TorchScript enables efficient execution and deployment of machine learning models in various production environments without the need for Python dependencies.
-
- [ONNX](onnx.md): An open-source format created by [Microsoft](https://www.microsoft.com/) for facilitating the transfer of AI models between various frameworks, enhancing the versatility and deployment flexibility of Ultralytics models.
- [OpenVINO](openvino.md): Intel's toolkit for optimizing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models efficiently across various Intel CPU and GPU platforms.
-- [TensorRT](tensorrt.md): Developed by [NVIDIA](https://www.nvidia.com/), this high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference framework and model format optimizes AI models for accelerated speed and efficiency on NVIDIA GPUs, ensuring streamlined deployment.
+- [PaddlePaddle](paddlepaddle.md): An open-source deep learning platform by [Baidu](https://www.baidu.com/), PaddlePaddle enables the efficient deployment of AI models and focuses on the scalability of industrial applications.
-- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment).
+- [TF GraphDef](tf-graphdef.md): Developed by [Google](https://www.google.com/), GraphDef is TensorFlow's format for representing computation graphs, enabling optimized execution of machine learning models across diverse hardware.
- [TF SavedModel](tf-savedmodel.md): Developed by [Google](https://www.google.com/), TF SavedModel is a universal serialization format for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) models, enabling easy sharing and deployment across a wide range of platforms, from servers to edge devices.
-- [TF GraphDef](tf-graphdef.md): Developed by [Google](https://www.google.com/), GraphDef is TensorFlow's format for representing computation graphs, enabling optimized execution of machine learning models across diverse hardware.
+- [TF.js](tfjs.md): Developed by [Google](https://www.google.com/) to facilitate machine learning in browsers and Node.js, TF.js allows JavaScript-based deployment of ML models.
- [TFLite](tflite.md): Developed by [Google](https://www.google.com/), TFLite is a lightweight framework for deploying machine learning models on mobile and edge devices, ensuring fast, efficient inference with minimal memory footprint.
- [TFLite Edge TPU](edge-tpu.md): Developed by [Google](https://www.google.com/) for optimizing TensorFlow Lite models on Edge TPUs, this model format ensures high-speed, efficient [edge computing](https://www.ultralytics.com/glossary/edge-computing).
-- [TF.js](tfjs.md): Developed by [Google](https://www.google.com/) to facilitate machine learning in browsers and Node.js, TF.js allows JavaScript-based deployment of ML models.
+- [TensorRT](tensorrt.md): Developed by [NVIDIA](https://www.nvidia.com/), this high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference framework and model format optimizes AI models for accelerated speed and efficiency on NVIDIA GPUs, ensuring streamlined deployment.
-- [PaddlePaddle](paddlepaddle.md): An open-source deep learning platform by [Baidu](https://www.baidu.com/), PaddlePaddle enables the efficient deployment of AI models and focuses on the scalability of industrial applications.
+- [TorchScript](torchscript.md): Developed as part of the [PyTorch](https://pytorch.org/) framework, TorchScript enables efficient execution and deployment of machine learning models in various production environments without the need for Python dependencies.
-- [NCNN](ncnn.md): Developed by [Tencent](http://www.tencent.com/), NCNN is an efficient [neural network](https://www.ultralytics.com/glossary/neural-network-nn) inference framework tailored for mobile devices. It enables direct deployment of AI models into apps, optimizing performance across various mobile platforms.
+- [SONY IMX500](sony-imx500.md): Optimize and deploy [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) models on Raspberry Pi AI Cameras with the IMX500 sensor for fast, low-power performance.
-- [VS Code](vscode.md): An extension for VS Code that provides code snippets for accelerating development workflows with Ultralytics and also for anyone looking for examples to help learn or get started with Ultralytics.
+- [Rockchip RKNN](rockchip-rknn.md): Developed by [Rockchip](https://www.rock-chips.com/), RKNN is a specialized neural network inference framework optimized for Rockchip's hardware platforms, particularly their NPUs. It facilitates efficient deployment of AI models on edge devices, enabling high-performance inference in real-time applications.
+
+- [Seeed Studio reCamera](seeedstudio-recamera.md): Developed by [Seeed Studio](https://www.seeedstudio.com/), the reCamera is a cutting-edge edge AI device designed for real-time computer vision applications. Powered by the RISC-V-based SG200X processor, it delivers high-performance AI inference with energy efficiency. Its modular design, advanced video processing capabilities, and support for flexible deployment make it an ideal choice for various use cases, including safety monitoring, environmental applications, and manufacturing.
### Export Formats
@@ -111,7 +121,7 @@ Let's collaborate to make the Ultralytics YOLO ecosystem more expansive and feat
### What is Ultralytics HUB, and how does it streamline the ML workflow?
-Ultralytics HUB is a cloud-based platform designed to make machine learning (ML) workflows for Ultralytics models seamless and efficient. By using this tool, you can easily upload datasets, train models, perform real-time tracking, and deploy YOLOv8 models without needing extensive coding skills. You can explore the key features on the [Ultralytics HUB](https://hub.ultralytics.com/) page and get started quickly with our [Quickstart](https://docs.ultralytics.com/hub/quickstart/) guide.
+Ultralytics HUB is a cloud-based platform designed to make machine learning (ML) workflows for Ultralytics models seamless and efficient. By using this tool, you can easily upload datasets, train models, perform real-time tracking, and deploy YOLO11 models without needing extensive coding skills. You can explore the key features on the [Ultralytics HUB](https://hub.ultralytics.com/) page and get started quickly with our [Quickstart](https://docs.ultralytics.com/hub/quickstart/) guide.
### How do I integrate Ultralytics YOLO models with Roboflow for dataset management?
@@ -121,9 +131,9 @@ Integrating Ultralytics YOLO models with Roboflow enhances dataset management by
Yes, you can. Integrating MLFlow with Ultralytics models allows you to track experiments, improve reproducibility, and streamline the entire ML lifecycle. Detailed instructions for setting up this integration can be found on the [MLFlow](mlflow.md) integration page. This integration is particularly useful for monitoring model metrics and managing the ML workflow efficiently.
-### What are the benefits of using Neural Magic for YOLOv8 model optimization?
+### What are the benefits of using Neural Magic for YOLO11 model optimization?
-Neural Magic optimizes YOLOv8 models by leveraging techniques like Quantization Aware Training (QAT) and pruning, resulting in highly efficient, smaller models that perform better on resource-limited hardware. Check out the [Neural Magic](neural-magic.md) integration page to learn how to implement these optimizations for superior performance and leaner models. This is especially beneficial for deployment on edge devices.
+Neural Magic optimizes YOLO11 models by leveraging techniques like Quantization Aware Training (QAT) and pruning, resulting in highly efficient, smaller models that perform better on resource-limited hardware. Check out the [Neural Magic](neural-magic.md) integration page to learn how to implement these optimizations for superior performance and leaner models. This is especially beneficial for deployment on edge devices.
### How do I deploy Ultralytics YOLO models with Gradio for interactive demos?
diff --git a/docs/en/integrations/jupyterlab.md b/docs/en/integrations/jupyterlab.md
index b3179918b1f..668940ffab5 100644
--- a/docs/en/integrations/jupyterlab.md
+++ b/docs/en/integrations/jupyterlab.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Explore our integration guide that explains how you can use JupyterLab to train a YOLOv8 model. We'll also cover key features and tips for common issues.
-keywords: JupyterLab, What is JupyterLab, How to Use JupyterLab, JupyterLab How to Use, YOLOv8, Ultralytics, Model Training, GPU, TPU, cloud computing
+description: Explore our integration guide that explains how you can use JupyterLab to train a YOLO11 model. We'll also cover key features and tips for common issues.
+keywords: JupyterLab, What is JupyterLab, How to Use JupyterLab, JupyterLab How to Use, YOLO11, Ultralytics, Model Training, GPU, TPU, cloud computing
---
-# A Guide on How to Use JupyterLab to Train Your YOLOv8 Models
+# A Guide on How to Use JupyterLab to Train Your YOLO11 Models
Building [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models can be tough, especially when you don't have the right tools or environment to work with. If you are facing this issue, JupyterLab might be the right solution for you. JupyterLab is a user-friendly, web-based platform that makes coding more flexible and interactive. You can use it to handle big datasets, create complex models, and even collaborate with others, all in one place.
-You can use JupyterLab to [work on projects](../guides/steps-of-a-cv-project.md) related to [Ultralytics YOLOv8 models](https://github.com/ultralytics/ultralytics). JupyterLab is a great option for efficient model development and experimentation. It makes it easy to start experimenting with and [training YOLOv8 models](../modes/train.md) right from your computer. Let's dive deeper into JupyterLab, its key features, and how you can use it to train YOLOv8 models.
+You can use JupyterLab to [work on projects](../guides/steps-of-a-cv-project.md) related to [Ultralytics YOLO11 models](https://github.com/ultralytics/ultralytics). JupyterLab is a great option for efficient model development and experimentation. It makes it easy to start experimenting with and [training YOLO11 models](../modes/train.md) right from your computer. Let's dive deeper into JupyterLab, its key features, and how you can use it to train YOLO11 models.
## What is JupyterLab?
@@ -26,7 +26,7 @@ Here are some of the key features that make JupyterLab a great option for model
- **Markdown Preview**: Working with Markdown files is more efficient in JupyterLab, thanks to its simultaneous preview feature. As you write or edit your Markdown file, you can see the formatted output in real-time. It makes it easier to double-check that your documentation looks perfect, saving you from having to switch back and forth between editing and preview modes.
- **Run Code from Text Files**: If you're sharing a text file with code, JupyterLab makes it easy to run it directly within the platform. You can highlight the code and press Shift + Enter to execute it. It is great for verifying code snippets quickly and helps guarantee that the code you share is functional and error-free.
-## Why Should You Use JupyterLab for Your YOLOv8 Projects?
+## Why Should You Use JupyterLab for Your YOLO11 Projects?
There are multiple platforms for developing and evaluating machine learning models, so what makes JupyterLab stand out? Let's explore some of the unique aspects that JupyterLab offers for your machine-learning projects:
@@ -46,9 +46,9 @@ When working with Kaggle, you might come across some common issues. Here are som
- **Installing JupyterLab Extensions**: JupyterLab supports various extensions to enhance functionality. You can install and customize these extensions to suit your needs. For detailed instructions, refer to [JupyterLab Extensions Guide](https://jupyterlab.readthedocs.io/en/latest/user/extensions.html) for more information.
- **Using Multiple Versions of Python**: If you need to work with different versions of Python, you can use Jupyter kernels configured with different Python versions.
-## How to Use JupyterLab to Try Out YOLOv8
+## How to Use JupyterLab to Try Out YOLO11
-JupyterLab makes it easy to experiment with YOLOv8. To get started, follow these simple steps.
+JupyterLab makes it easy to experiment with YOLO11. To get started, follow these simple steps.
### Step 1: Install JupyterLab
@@ -63,7 +63,7 @@ First, you need to install JupyterLab. Open your terminal and run the command:
pip install jupyterlab
```
-### Step 2: Download the YOLOv8 Tutorial Notebook
+### Step 2: Download the YOLO11 Tutorial Notebook
Next, download the [tutorial.ipynb](https://github.com/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) file from the Ultralytics GitHub repository. Save this file to any directory on your local machine.
@@ -85,13 +85,13 @@ Once you've run this command, it will open JupyterLab in your default web browse
### Step 4: Start Experimenting
-In JupyterLab, open the tutorial.ipynb notebook. You can now start running the cells to explore and experiment with YOLOv8.
+In JupyterLab, open the tutorial.ipynb notebook. You can now start running the cells to explore and experiment with YOLO11.
-
+
-JupyterLab's interactive environment allows you to modify code, visualize outputs, and document your findings all in one place. You can try out different configurations and understand how YOLOv8 works.
+JupyterLab's interactive environment allows you to modify code, visualize outputs, and document your findings all in one place. You can try out different configurations and understand how YOLO11 works.
-For a detailed understanding of the model training process and best practices, refer to the [YOLOv8 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLOv8 effectively.
+For a detailed understanding of the model training process and best practices, refer to the [YOLO11 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLO11 effectively.
## Keep Learning about Jupyterlab
@@ -103,17 +103,17 @@ If you're excited to learn more about JupyterLab, here are some great resources
## Summary
-We've explored how JupyterLab can be a powerful tool for experimenting with Ultralytics YOLOv8 models. Using its flexible and interactive environment, you can easily set up JupyterLab on your local machine and start working with YOLOv8. JupyterLab makes it simple to [train](../guides/model-training-tips.md) and [evaluate](../guides/model-testing.md) your models, visualize outputs, and [document your findings](../guides/model-monitoring-and-maintenance.md) all in one place.
+We've explored how JupyterLab can be a powerful tool for experimenting with Ultralytics YOLO11 models. Using its flexible and interactive environment, you can easily set up JupyterLab on your local machine and start working with YOLO11. JupyterLab makes it simple to [train](../guides/model-training-tips.md) and [evaluate](../guides/model-testing.md) your models, visualize outputs, and [document your findings](../guides/model-monitoring-and-maintenance.md) all in one place.
For more details, visit the [JupyterLab FAQ Page](https://jupyterlab.readthedocs.io/en/stable/getting_started/faq.html).
-Interested in more YOLOv8 integrations? Check out the [Ultralytics integration guide](./index.md) to explore additional tools and capabilities for your machine learning projects.
+Interested in more YOLO11 integrations? Check out the [Ultralytics integration guide](./index.md) to explore additional tools and capabilities for your machine learning projects.
## FAQ
-### How do I use JupyterLab to train a YOLOv8 model?
+### How do I use JupyterLab to train a YOLO11 model?
-To train a YOLOv8 model using JupyterLab:
+To train a YOLO11 model using JupyterLab:
1. Install JupyterLab and the Ultralytics package:
@@ -128,7 +128,7 @@ To train a YOLOv8 model using JupyterLab:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
```
4. Train the model on your custom dataset:
@@ -147,22 +147,22 @@ To train a YOLOv8 model using JupyterLab:
JupyterLab's interactive environment allows you to easily modify parameters, visualize results, and iterate on your model training process.
-### What are the key features of JupyterLab that make it suitable for YOLOv8 projects?
+### What are the key features of JupyterLab that make it suitable for YOLO11 projects?
-JupyterLab offers several features that make it ideal for YOLOv8 projects:
+JupyterLab offers several features that make it ideal for YOLO11 projects:
-1. Interactive code execution: Test and debug YOLOv8 code snippets in real-time.
+1. Interactive code execution: Test and debug YOLO11 code snippets in real-time.
2. Integrated file browser: Easily manage datasets, model weights, and configuration files.
3. Flexible layout: Arrange multiple notebooks, terminals, and output windows side-by-side for efficient workflow.
-4. Rich output display: Visualize YOLOv8 detection results, training curves, and model performance metrics inline.
-5. Markdown support: Document your YOLOv8 experiments and findings with rich text and images.
+4. Rich output display: Visualize YOLO11 detection results, training curves, and model performance metrics inline.
+5. Markdown support: Document your YOLO11 experiments and findings with rich text and images.
6. Extension ecosystem: Enhance functionality with extensions for version control, [remote computing](google-colab.md), and more.
-These features allow for a seamless development experience when working with YOLOv8 models, from data preparation to [model deployment](https://www.ultralytics.com/glossary/model-deployment).
+These features allow for a seamless development experience when working with YOLO11 models, from data preparation to [model deployment](https://www.ultralytics.com/glossary/model-deployment).
-### How can I optimize YOLOv8 model performance using JupyterLab?
+### How can I optimize YOLO11 model performance using JupyterLab?
-To optimize YOLOv8 model performance in JupyterLab:
+To optimize YOLO11 model performance in JupyterLab:
1. Use the autobatch feature to determine the optimal batch size:
@@ -190,11 +190,11 @@ To optimize YOLOv8 model performance in JupyterLab:
4. Experiment with different model architectures and [export formats](../modes/export.md) to find the best balance of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for your specific use case.
-JupyterLab's interactive environment allows for quick iterations and real-time feedback, making it easier to optimize your YOLOv8 models efficiently.
+JupyterLab's interactive environment allows for quick iterations and real-time feedback, making it easier to optimize your YOLO11 models efficiently.
-### How do I handle common issues when working with JupyterLab and YOLOv8?
+### How do I handle common issues when working with JupyterLab and YOLO11?
-When working with JupyterLab and YOLOv8, you might encounter some common issues. Here's how to handle them:
+When working with JupyterLab and YOLO11, you might encounter some common issues. Here's how to handle them:
1. GPU memory issues:
@@ -203,7 +203,7 @@ When working with JupyterLab and YOLOv8, you might encounter some common issues.
2. Package conflicts:
- - Create a separate conda environment for your YOLOv8 projects to avoid conflicts.
+ - Create a separate conda environment for your YOLO11 projects to avoid conflicts.
- Use `!pip install package_name` in a notebook cell to install missing packages.
3. Kernel crashes:
diff --git a/docs/en/integrations/kaggle.md b/docs/en/integrations/kaggle.md
index 66929d109d8..40c928fa06f 100644
--- a/docs/en/integrations/kaggle.md
+++ b/docs/en/integrations/kaggle.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Dive into our guide on YOLOv8's integration with Kaggle. Find out what Kaggle is, its key features, and how to train a YOLOv8 model using the integration.
-keywords: What is Kaggle, What is Kaggle Used For, YOLOv8, Kaggle Machine Learning, Model Training, GPU, TPU, cloud computing
+description: Dive into our guide on YOLO11's integration with Kaggle. Find out what Kaggle is, its key features, and how to train a YOLO11 model using the integration.
+keywords: What is Kaggle, What is Kaggle Used For, YOLO11, Kaggle Machine Learning, Model Training, GPU, TPU, cloud computing
---
-# A Guide on Using Kaggle to Train Your YOLOv8 Models
+# A Guide on Using Kaggle to Train Your YOLO11 Models
If you are learning about AI and working on [small projects](../solutions/index.md), you might not have access to powerful computing resources yet, and high-end hardware can be pretty expensive. Fortunately, Kaggle, a platform owned by Google, offers a great solution. Kaggle provides a free, cloud-based environment where you can access GPU resources, handle large datasets, and collaborate with a diverse community of data scientists and [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) enthusiasts.
-Kaggle is a great choice for [training](../guides/model-training-tips.md) and experimenting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics?tab=readme-ov-file) models. Kaggle Notebooks make using popular machine-learning libraries and frameworks in your projects easy. Let's explore Kaggle's main features and learn how you can train YOLOv8 models on this platform!
+Kaggle is a great choice for [training](../guides/model-training-tips.md) and experimenting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics?tab=readme-ov-file) models. Kaggle Notebooks make using popular machine-learning libraries and frameworks in your projects easy. Let's explore Kaggle's main features and learn how you can train YOLO11 models on this platform!
## What is Kaggle?
@@ -16,21 +16,21 @@ Kaggle is a platform that brings together data scientists from around the world
With more than [10 million users](https://www.kaggle.com/discussions/general/332147) as of 2022, Kaggle provides a rich environment for developing and experimenting with machine learning models. You don't need to worry about your local machine's specs or setup; you can dive right in with just a Kaggle account and a web browser.
-## Training YOLOv8 Using Kaggle
+## Training YOLO11 Using Kaggle
-Training YOLOv8 models on Kaggle is simple and efficient, thanks to the platform's access to powerful GPUs.
+Training YOLO11 models on Kaggle is simple and efficient, thanks to the platform's access to powerful GPUs.
-To get started, access the [Kaggle YOLOv8 Notebook](https://www.kaggle.com/code/ultralytics/yolov8). Kaggle's environment comes with pre-installed libraries like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making the setup process hassle-free.
+To get started, access the [Kaggle YOLO11 Notebook](https://www.kaggle.com/code/glennjocherultralytics/yolo11). Kaggle's environment comes with pre-installed libraries like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making the setup process hassle-free.
-
+
-Once you sign in to your Kaggle account, you can click on the option to copy and edit the code, select a GPU under the accelerator settings, and run the notebook's cells to begin training your model. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+Once you sign in to your Kaggle account, you can click on the option to copy and edit the code, select a GPU under the accelerator settings, and run the notebook's cells to begin training your model. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md).

-On the [official YOLOv8 Kaggle notebook page](https://www.kaggle.com/code/ultralytics/yolov8), if you click on the three dots in the upper right-hand corner, you'll notice more options will pop up.
+On the [official YOLO11 Kaggle notebook page](https://www.kaggle.com/code/glennjocherultralytics/yolo11), if you click on the three dots in the upper right-hand corner, you'll notice more options will pop up.
-
+
These options include:
@@ -48,7 +48,7 @@ These options include:
When working with Kaggle, you might come across some common issues. Here are some points to help you navigate the platform smoothly:
-- **Access to GPUs**: In your Kaggle notebooks, you can activate a GPU at any time, with usage allowed for up to 30 hours per week. Kaggle provides the Nvidia Tesla P100 GPU with 16GB of memory and also offers the option of using a Nvidia GPU T4 x2. Powerful hardware accelerates your machine-learning tasks, making model training and inference much faster.
+- **Access to GPUs**: In your Kaggle notebooks, you can activate a GPU at any time, with usage allowed for up to 30 hours per week. Kaggle provides the NVIDIA Tesla P100 GPU with 16GB of memory and also offers the option of using a NVIDIA GPU T4 x2. Powerful hardware accelerates your machine-learning tasks, making model training and inference much faster.
- **Kaggle Kernels**: Kaggle Kernels are free Jupyter notebook servers that can integrate GPUs, allowing you to perform machine learning operations on cloud computers. You don't have to rely on your own computer's CPU, avoiding overload and freeing up your local resources.
- **Kaggle Datasets**: Kaggle datasets are free to download. However, it's important to check the license for each dataset to understand any usage restrictions. Some datasets may have limitations on academic publications or commercial use. You can download datasets directly to your Kaggle notebook or anywhere else via the Kaggle API.
- **Saving and Committing Notebooks**: To save and commit a notebook on Kaggle, click "Save Version." This saves the current state of your notebook. Once the background kernel finishes generating the output files, you can access them from the Output tab on the main notebook page.
@@ -59,17 +59,17 @@ When working with Kaggle, you might come across some common issues. Here are som
Next, let's understand the features Kaggle offers that make it an excellent platform for data science and machine learning enthusiasts. Here are some of the key highlights:
-- **Datasets**: Kaggle hosts a massive collection of datasets on various topics. You can easily search and use these datasets in your projects, which is particularly handy for training and testing your YOLOv8 models.
+- **Datasets**: Kaggle hosts a massive collection of datasets on various topics. You can easily search and use these datasets in your projects, which is particularly handy for training and testing your YOLO11 models.
- **Competitions**: Known for its exciting competitions, Kaggle allows data scientists and machine learning enthusiasts to solve real-world problems. Competing helps you improve your skills, learn new techniques, and gain recognition in the community.
-- **Free Access to TPUs**: Kaggle provides free access to powerful TPUs, which are essential for training complex machine learning models. This means you can speed up processing and boost the performance of your YOLOv8 projects without incurring extra costs.
-- **Integration with Github**: Kaggle allows you to easily connect your GitHub repository to upload notebooks and save your work. This integration makes it convenient to manage and access your files.
+- **Free Access to TPUs**: Kaggle provides free access to powerful TPUs, which are essential for training complex machine learning models. This means you can speed up processing and boost the performance of your YOLO11 projects without incurring extra costs.
+- **Integration with GitHub**: Kaggle allows you to easily connect your GitHub repository to upload notebooks and save your work. This integration makes it convenient to manage and access your files.
- **Community and Discussions**: Kaggle boasts a strong community of data scientists and machine learning practitioners. The discussion forums and shared notebooks are fantastic resources for learning and troubleshooting. You can easily find help, share your knowledge, and collaborate with others.
-## Why Should You Use Kaggle for Your YOLOv8 Projects?
+## Why Should You Use Kaggle for Your YOLO11 Projects?
There are multiple platforms for training and evaluating machine learning models, so what makes Kaggle stand out? Let's dive into the benefits of using Kaggle for your machine-learning projects:
-- **Public Notebooks**: You can make your Kaggle notebooks public, allowing other users to view, vote, fork, and discuss your work. Kaggle promotes collaboration, feedback, and the sharing of ideas, helping you improve your YOLOv8 models.
+- **Public Notebooks**: You can make your Kaggle notebooks public, allowing other users to view, vote, fork, and discuss your work. Kaggle promotes collaboration, feedback, and the sharing of ideas, helping you improve your YOLO11 models.
- **Comprehensive History of Notebook Commits**: Kaggle creates a detailed history of your notebook commits. This allows you to review and track changes over time, making it easier to understand the evolution of your project and revert to previous versions if needed.
- **Console Access**: Kaggle provides a console, giving you more control over your environment. This feature allows you to perform various tasks directly from the command line, enhancing your workflow and productivity.
- **Resource Availability**: Each notebook editing session on Kaggle is provided with significant resources: 12 hours of execution time for CPU and GPU sessions, 9 hours of execution time for TPU sessions, and 20 gigabytes of auto-saved disk space.
@@ -81,34 +81,34 @@ If you want to learn more about Kaggle, here are some helpful resources to guide
- [**Kaggle Learn**](https://www.kaggle.com/learn): Discover a variety of free, interactive tutorials on Kaggle Learn. These courses cover essential data science topics and provide hands-on experience to help you master new skills.
- [**Getting Started with Kaggle**](https://www.kaggle.com/code/alexisbcook/getting-started-with-kaggle): This comprehensive guide walks you through the basics of using Kaggle, from joining competitions to creating your first notebook. It's a great starting point for newcomers.
-- [**Kaggle Medium Page**](https://medium.com/@kaggleteam): Explore tutorials, updates, and community contributions on Kaggle's Medium page. It's an excellent source for staying up-to-date with the latest trends and gaining deeper insights into data science.
+- [**Kaggle Medium Page**](https://medium.com/@kaggleteam): Explore tutorials, updates, and community contributions to Kaggle's Medium page. It's an excellent source for staying up-to-date with the latest trends and gaining deeper insights into data science.
## Summary
-We've seen how Kaggle can boost your YOLOv8 projects by providing free access to powerful GPUs, making model training and evaluation efficient. Kaggle's platform is user-friendly, with pre-installed libraries for quick setup.
+We've seen how Kaggle can boost your YOLO11 projects by providing free access to powerful GPUs, making model training and evaluation efficient. Kaggle's platform is user-friendly, with pre-installed libraries for quick setup.
For more details, visit [Kaggle's documentation](https://www.kaggle.com/docs).
-Interested in more YOLOv8 integrations? Check out the[ Ultralytics integration guide](https://docs.ultralytics.com/integrations/) to explore additional tools and capabilities for your machine learning projects.
+Interested in more YOLO11 integrations? Check out the[ Ultralytics integration guide](https://docs.ultralytics.com/integrations/) to explore additional tools and capabilities for your machine learning projects.
## FAQ
-### How do I train a YOLOv8 model on Kaggle?
+### How do I train a YOLO11 model on Kaggle?
-Training a YOLOv8 model on Kaggle is straightforward. First, access the [Kaggle YOLOv8 Notebook](https://www.kaggle.com/ultralytics/yolov8). Sign in to your Kaggle account, copy and edit the notebook, and select a GPU under the accelerator settings. Run the notebook cells to start training. For more detailed steps, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+Training a YOLO11 model on Kaggle is straightforward. First, access the [Kaggle YOLO11 Notebook](https://www.kaggle.com/code/glennjocherultralytics/yolo11). Sign in to your Kaggle account, copy and edit the notebook, and select a GPU under the accelerator settings. Run the notebook cells to start training. For more detailed steps, refer to our [YOLO11 Model Training guide](../modes/train.md).
-### What are the benefits of using Kaggle for YOLOv8 model training?
+### What are the benefits of using Kaggle for YOLO11 model training?
-Kaggle offers several advantages for training YOLOv8 models:
+Kaggle offers several advantages for training YOLO11 models:
-- **Free GPU Access**: Utilize powerful GPUs like Nvidia Tesla P100 or T4 x2 for up to 30 hours per week.
+- **Free GPU Access**: Utilize powerful GPUs like NVIDIA Tesla P100 or T4 x2 for up to 30 hours per week.
- **Pre-installed Libraries**: Libraries like TensorFlow and PyTorch are pre-installed, simplifying the setup.
- **Community Collaboration**: Engage with a vast community of data scientists and machine learning enthusiasts.
- **Version Control**: Easily manage different versions of your notebooks and revert to previous versions if needed.
For more details, visit our [Ultralytics integration guide](https://docs.ultralytics.com/integrations/).
-### What common issues might I encounter when using Kaggle for YOLOv8, and how can I resolve them?
+### What common issues might I encounter when using Kaggle for YOLO11, and how can I resolve them?
Common issues include:
@@ -119,7 +119,7 @@ Common issues include:
For more troubleshooting tips, see our [Common Issues guide](../guides/yolo-common-issues.md).
-### Why should I choose Kaggle over other platforms like Google Colab for training YOLOv8 models?
+### Why should I choose Kaggle over other platforms like Google Colab for training YOLO11 models?
Kaggle offers unique features that make it an excellent choice:
@@ -127,7 +127,8 @@ Kaggle offers unique features that make it an excellent choice:
- **Free Access to TPUs**: Speed up training with powerful TPUs without extra costs.
- **Comprehensive History**: Track changes over time with a detailed history of notebook commits.
- **Resource Availability**: Significant resources are provided for each notebook session, including 12 hours of execution time for CPU and GPU sessions.
- For a comparison with Google Colab, refer to our [Google Colab guide](./google-colab.md).
+
+For a comparison with Google Colab, refer to our [Google Colab guide](./google-colab.md).
### How can I revert to a previous version of my Kaggle notebook?
diff --git a/docs/en/integrations/mnn.md b/docs/en/integrations/mnn.md
new file mode 100644
index 00000000000..bd92c9a8f33
--- /dev/null
+++ b/docs/en/integrations/mnn.md
@@ -0,0 +1,344 @@
+---
+comments: true
+description: Optimize YOLO11 models for mobile and embedded devices by exporting to MNN format.
+keywords: Ultralytics, YOLO11, MNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models
+---
+
+# MNN Export for YOLO11 Models and Deploy
+
+## MNN
+
+
+
+
+
+[MNN](https://github.com/alibaba/MNN) is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. At present, MNN has been integrated into more than 30 apps of Alibaba Inc, such as Taobao, Tmall, Youku, DingTalk, Xianyu, etc., covering more than 70 usage scenarios such as live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control. In addition, MNN is also used on embedded devices, such as IoT.
+
+## Export to MNN: Converting Your YOLO11 Model
+
+You can expand model compatibility and deployment flexibility by converting YOLO11 models to MNN format.
+
+### Installation
+
+To install the required packages, run:
+
+!!! tip "Installation"
+
+ === "CLI"
+
+ ```bash
+ # Install the required package for YOLO11 and MNN
+ pip install ultralytics
+ pip install MNN
+ ```
+
+### Usage
+
+Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
+
+!!! example "Usage"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
+
+ # Export the model to MNN format
+ model.export(format="mnn") # creates 'yolo11n.mnn'
+
+ # Load the exported MNN model
+ mnn_model = YOLO("yolo11n.mnn")
+
+ # Run inference
+ results = mnn_model("https://ultralytics.com/images/bus.jpg")
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Export a YOLO11n PyTorch model to MNN format
+ yolo export model=yolo11n.pt format=mnn # creates 'yolo11n.mnn'
+
+ # Run inference with the exported model
+ yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg'
+ ```
+
+For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md).
+
+### MNN-Only Inference
+
+A function that relies solely on MNN for YOLO11 inference and preprocessing is implemented, providing both Python and C++ versions for easy deployment in any scenario.
+
+!!! example "MNN"
+
+ === "Python"
+
+ ```python
+ import argparse
+
+ import MNN
+ import MNN.cv as cv2
+ import MNN.numpy as np
+
+
+ def inference(model, img, precision, backend, thread):
+ config = {}
+ config["precision"] = precision
+ config["backend"] = backend
+ config["numThread"] = thread
+ rt = MNN.nn.create_runtime_manager((config,))
+ # net = MNN.nn.load_module_from_file(model, ['images'], ['output0'], runtime_manager=rt)
+ net = MNN.nn.load_module_from_file(model, [], [], runtime_manager=rt)
+ original_image = cv2.imread(img)
+ ih, iw, _ = original_image.shape
+ length = max((ih, iw))
+ scale = length / 640
+ image = np.pad(original_image, [[0, length - ih], [0, length - iw], [0, 0]], "constant")
+ image = cv2.resize(
+ image, (640, 640), 0.0, 0.0, cv2.INTER_LINEAR, -1, [0.0, 0.0, 0.0], [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0]
+ )
+ image = image[..., ::-1] # BGR to RGB
+ input_var = np.expand_dims(image, 0)
+ input_var = MNN.expr.convert(input_var, MNN.expr.NC4HW4)
+ output_var = net.forward(input_var)
+ output_var = MNN.expr.convert(output_var, MNN.expr.NCHW)
+ output_var = output_var.squeeze()
+ # output_var shape: [84, 8400]; 84 means: [cx, cy, w, h, prob * 80]
+ cx = output_var[0]
+ cy = output_var[1]
+ w = output_var[2]
+ h = output_var[3]
+ probs = output_var[4:]
+ # [cx, cy, w, h] -> [y0, x0, y1, x1]
+ x0 = cx - w * 0.5
+ y0 = cy - h * 0.5
+ x1 = cx + w * 0.5
+ y1 = cy + h * 0.5
+ boxes = np.stack([x0, y0, x1, y1], axis=1)
+ # get max prob and idx
+ scores = np.max(probs, 0)
+ class_ids = np.argmax(probs, 0)
+ result_ids = MNN.expr.nms(boxes, scores, 100, 0.45, 0.25)
+ print(result_ids.shape)
+ # nms result box, score, ids
+ result_boxes = boxes[result_ids]
+ result_scores = scores[result_ids]
+ result_class_ids = class_ids[result_ids]
+ for i in range(len(result_boxes)):
+ x0, y0, x1, y1 = result_boxes[i].read_as_tuple()
+ y0 = int(y0 * scale)
+ y1 = int(y1 * scale)
+ x0 = int(x0 * scale)
+ x1 = int(x1 * scale)
+ print(result_class_ids[i])
+ cv2.rectangle(original_image, (x0, y0), (x1, y1), (0, 0, 255), 2)
+ cv2.imwrite("res.jpg", original_image)
+
+
+ if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str, required=True, help="the yolo11 model path")
+ parser.add_argument("--img", type=str, required=True, help="the input image path")
+ parser.add_argument("--precision", type=str, default="normal", help="inference precision: normal, low, high, lowBF")
+ parser.add_argument(
+ "--backend",
+ type=str,
+ default="CPU",
+ help="inference backend: CPU, OPENCL, OPENGL, NN, VULKAN, METAL, TRT, CUDA, HIAI",
+ )
+ parser.add_argument("--thread", type=int, default=4, help="inference using thread: int")
+ args = parser.parse_args()
+ inference(args.model, args.img, args.precision, args.backend, args.thread)
+ ```
+
+ === "CPP"
+
+ ```cpp
+ #include
+ #include
+ #include
+ #include
+ #include
+ #include
+
+ #include
+
+ using namespace MNN;
+ using namespace MNN::Express;
+ using namespace MNN::CV;
+
+ int main(int argc, const char* argv[]) {
+ if (argc < 3) {
+ MNN_PRINT("Usage: ./yolo11_demo.out model.mnn input.jpg [forwardType] [precision] [thread]\n");
+ return 0;
+ }
+ int thread = 4;
+ int precision = 0;
+ int forwardType = MNN_FORWARD_CPU;
+ if (argc >= 4) {
+ forwardType = atoi(argv[3]);
+ }
+ if (argc >= 5) {
+ precision = atoi(argv[4]);
+ }
+ if (argc >= 6) {
+ thread = atoi(argv[5]);
+ }
+ MNN::ScheduleConfig sConfig;
+ sConfig.type = static_cast(forwardType);
+ sConfig.numThread = thread;
+ BackendConfig bConfig;
+ bConfig.precision = static_cast(precision);
+ sConfig.backendConfig = &bConfig;
+ std::shared_ptr rtmgr = std::shared_ptr(Executor::RuntimeManager::createRuntimeManager(sConfig));
+ if(rtmgr == nullptr) {
+ MNN_ERROR("Empty RuntimeManger\n");
+ return 0;
+ }
+ rtmgr->setCache(".cachefile");
+
+ std::shared_ptr net(Module::load(std::vector{}, std::vector{}, argv[1], rtmgr));
+ auto original_image = imread(argv[2]);
+ auto dims = original_image->getInfo()->dim;
+ int ih = dims[0];
+ int iw = dims[1];
+ int len = ih > iw ? ih : iw;
+ float scale = len / 640.0;
+ std::vector padvals { 0, len - ih, 0, len - iw, 0, 0 };
+ auto pads = _Const(static_cast(padvals.data()), {3, 2}, NCHW, halide_type_of());
+ auto image = _Pad(original_image, pads, CONSTANT);
+ image = resize(image, Size(640, 640), 0, 0, INTER_LINEAR, -1, {0., 0., 0.}, {1./255., 1./255., 1./255.});
+ image = cvtColor(image, COLOR_BGR2RGB);
+ auto input = _Unsqueeze(image, {0});
+ input = _Convert(input, NC4HW4);
+ auto outputs = net->onForward({input});
+ auto output = _Convert(outputs[0], NCHW);
+ output = _Squeeze(output);
+ // output shape: [84, 8400]; 84 means: [cx, cy, w, h, prob * 80]
+ auto cx = _Gather(output, _Scalar(0));
+ auto cy = _Gather(output, _Scalar(1));
+ auto w = _Gather(output, _Scalar(2));
+ auto h = _Gather(output, _Scalar(3));
+ std::vector startvals { 4, 0 };
+ auto start = _Const(static_cast(startvals.data()), {2}, NCHW, halide_type_of());
+ std::vector sizevals { -1, -1 };
+ auto size = _Const(static_cast(sizevals.data()), {2}, NCHW, halide_type_of());
+ auto probs = _Slice(output, start, size);
+ // [cx, cy, w, h] -> [y0, x0, y1, x1]
+ auto x0 = cx - w * _Const(0.5);
+ auto y0 = cy - h * _Const(0.5);
+ auto x1 = cx + w * _Const(0.5);
+ auto y1 = cy + h * _Const(0.5);
+ auto boxes = _Stack({x0, y0, x1, y1}, 1);
+ auto scores = _ReduceMax(probs, {0});
+ auto ids = _ArgMax(probs, 0);
+ auto result_ids = _Nms(boxes, scores, 100, 0.45, 0.25);
+ auto result_ptr = result_ids->readMap();
+ auto box_ptr = boxes->readMap();
+ auto ids_ptr = ids->readMap();
+ auto score_ptr = scores->readMap();
+ for (int i = 0; i < 100; i++) {
+ auto idx = result_ptr[i];
+ if (idx < 0) break;
+ auto x0 = box_ptr[idx * 4 + 0] * scale;
+ auto y0 = box_ptr[idx * 4 + 1] * scale;
+ auto x1 = box_ptr[idx * 4 + 2] * scale;
+ auto y1 = box_ptr[idx * 4 + 3] * scale;
+ auto class_idx = ids_ptr[idx];
+ auto score = score_ptr[idx];
+ rectangle(original_image, {x0, y0}, {x1, y1}, {0, 0, 255}, 2);
+ }
+ if (imwrite("res.jpg", original_image)) {
+ MNN_PRINT("result image write to `res.jpg`.\n");
+ }
+ rtmgr->updateCache();
+ return 0;
+ }
+ ```
+
+## Summary
+
+In this guide, we introduce how to export the Ultralytics YOLO11 model to MNN and use MNN for inference.
+
+For more usage, please refer to the [MNN documentation](https://mnn-docs.readthedocs.io/en/latest).
+
+## FAQ
+
+### How do I export Ultralytics YOLO11 models to MNN format?
+
+To export your Ultralytics YOLO11 model to MNN format, follow these steps:
+
+!!! example "Export"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
+
+ # Export to MNN format
+ model.export(format="mnn") # creates 'yolo11n.mnn' with fp32 weight
+ model.export(format="mnn", half=True) # creates 'yolo11n.mnn' with fp16 weight
+ model.export(format="mnn", int8=True) # creates 'yolo11n.mnn' with int8 weight
+ ```
+
+ === "CLI"
+
+ ```bash
+ yolo export model=yolo11n.pt format=mnn # creates 'yolo11n.mnn' with fp32 weight
+ yolo export model=yolo11n.pt format=mnn half=True # creates 'yolo11n.mnn' with fp16 weight
+ yolo export model=yolo11n.pt format=mnn int8=True # creates 'yolo11n.mnn' with int8 weight
+ ```
+
+For detailed export options, check the [Export](../modes/export.md) page in the documentation.
+
+### How do I predict with an exported YOLO11 MNN model?
+
+To predict with an exported YOLO11 MNN model, use the `predict` function from the YOLO class.
+
+!!! example "Predict"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load the YOLO11 MNN model
+ model = YOLO("yolo11n.mnn")
+
+ # Export to MNN format
+ results = mnn_model("https://ultralytics.com/images/bus.jpg") # predict with `fp32`
+ results = mnn_model("https://ultralytics.com/images/bus.jpg", half=True) # predict with `fp16` if device support
+
+ for result in results:
+ result.show() # display to screen
+ result.save(filename="result.jpg") # save to disk
+ ```
+
+ === "CLI"
+
+ ```bash
+ yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' # predict with `fp32`
+ yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' --half=True # predict with `fp16` if device support
+ ```
+
+### What platforms are supported for MNN?
+
+MNN is versatile and supports various platforms:
+
+- **Mobile**: Android, iOS, Harmony.
+- **Embedded Systems and IoT Devices**: Devices like Raspberry Pi and NVIDIA Jetson.
+- **Desktop and Servers**: Linux, Windows, and macOS.
+
+### How can I deploy Ultralytics YOLO11 MNN models on Mobile Devices?
+
+To deploy your YOLO11 models on Mobile devices:
+
+1. **Build for Android**: Follow the [MNN Android](https://github.com/alibaba/MNN/tree/master/project/android).
+2. **Build for iOS**: Follow the [MNN iOS](https://github.com/alibaba/MNN/tree/master/project/ios).
+3. **Build for Harmony**: Follow the [MNN Harmony](https://github.com/alibaba/MNN/tree/master/project/harmony).
diff --git a/docs/en/integrations/ncnn.md b/docs/en/integrations/ncnn.md
index 42d04198e14..9dc13f966f7 100644
--- a/docs/en/integrations/ncnn.md
+++ b/docs/en/integrations/ncnn.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Optimize YOLOv8 models for mobile and embedded devices by exporting to NCNN format. Enhance performance in resource-constrained environments.
-keywords: Ultralytics, YOLOv8, NCNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models
+description: Optimize YOLO11 models for mobile and embedded devices by exporting to NCNN format. Enhance performance in resource-constrained environments.
+keywords: Ultralytics, YOLO11, NCNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models
---
-# How to Export to NCNN from YOLOv8 for Smooth Deployment
+# How to Export to NCNN from YOLO11 for Smooth Deployment
Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models on devices with limited computational power, such as mobile or embedded systems, can be tricky. You need to make sure you use a format optimized for optimal performance. This makes sure that even devices with limited processing power can handle advanced computer vision tasks well.
-The export to NCNN format feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for lightweight device-based applications. In this guide, we'll walk you through how to convert your models to the NCNN format, making it easier for your models to perform well on various mobile and embedded devices.
+The export to NCNN format feature allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for lightweight device-based applications. In this guide, we'll walk you through how to convert your models to the NCNN format, making it easier for your models to perform well on various mobile and embedded devices.
## Why should you export to NCNN?
@@ -34,7 +34,7 @@ NCNN models offer a wide range of key features that enable on-device [machine le
## Deployment Options with NCNN
-Before we look at the code for exporting YOLOv8 models to the NCNN format, let's understand how NCNN models are normally used.
+Before we look at the code for exporting YOLO11 models to the NCNN format, let's understand how NCNN models are normally used.
NCNN models, designed for efficiency and performance, are compatible with a variety of deployment platforms:
@@ -44,9 +44,9 @@ NCNN models, designed for efficiency and performance, are compatible with a vari
- **Desktop and Server Deployment**: Capable of being deployed in desktop and server environments across Linux, Windows, and macOS, supporting development, training, and evaluation with higher computational capacities.
-## Export to NCNN: Converting Your YOLOv8 Model
+## Export to NCNN: Converting Your YOLO11 Model
-You can expand model compatibility and deployment flexibility by converting YOLOv8 models to NCNN format.
+You can expand model compatibility and deployment flexibility by converting YOLO11 models to NCNN format.
### Installation
@@ -57,15 +57,15 @@ To install the required packages, run:
=== "CLI"
```bash
- # Install the required package for YOLOv8
+ # Install the required package for YOLO11
pip install ultralytics
```
-For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
### Usage
-Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
+Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
!!! example "Usage"
@@ -74,14 +74,14 @@ Before diving into the usage instructions, it's important to note that while all
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to NCNN format
- model.export(format="ncnn") # creates '/yolov8n_ncnn_model'
+ model.export(format="ncnn") # creates '/yolo11n_ncnn_model'
# Load the exported NCNN model
- ncnn_model = YOLO("./yolov8n_ncnn_model")
+ ncnn_model = YOLO("./yolo11n_ncnn_model")
# Run inference
results = ncnn_model("https://ultralytics.com/images/bus.jpg")
@@ -90,18 +90,18 @@ Before diving into the usage instructions, it's important to note that while all
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to NCNN format
- yolo export model=yolov8n.pt format=ncnn # creates '/yolov8n_ncnn_model'
+ # Export a YOLO11n PyTorch model to NCNN format
+ yolo export model=yolo11n.pt format=ncnn # creates '/yolo11n_ncnn_model'
# Run inference with the exported model
- yolo predict model='./yolov8n_ncnn_model' source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model='./yolo11n_ncnn_model' source='https://ultralytics.com/images/bus.jpg'
```
For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md).
-## Deploying Exported YOLOv8 NCNN Models
+## Deploying Exported YOLO11 NCNN Models
-After successfully exporting your Ultralytics YOLOv8 models to NCNN format, you can now deploy them. The primary and recommended first step for running a NCNN model is to utilize the YOLO("./model_ncnn_model") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your NCNN models in various other settings, take a look at the following resources:
+After successfully exporting your Ultralytics YOLO11 models to NCNN format, you can now deploy them. The primary and recommended first step for running a NCNN model is to utilize the YOLO("yolo11n_ncnn_model/") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your NCNN models in various other settings, take a look at the following resources:
- **[Android](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-android)**: This blog explains how to use NCNN models for performing tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) through Android applications.
@@ -113,40 +113,40 @@ After successfully exporting your Ultralytics YOLOv8 models to NCNN format, you
## Summary
-In this guide, we've gone over exporting Ultralytics YOLOv8 models to the NCNN format. This conversion step is crucial for improving the efficiency and speed of YOLOv8 models, making them more effective and suitable for limited-resource computing environments.
+In this guide, we've gone over exporting Ultralytics YOLO11 models to the NCNN format. This conversion step is crucial for improving the efficiency and speed of YOLO11 models, making them more effective and suitable for limited-resource computing environments.
For detailed instructions on usage, please refer to the [official NCNN documentation](https://ncnn.readthedocs.io/en/latest/index.html).
-Also, if you're interested in exploring other integration options for Ultralytics YOLOv8, be sure to visit our [integration guide page](index.md) for further insights and information.
+Also, if you're interested in exploring other integration options for Ultralytics YOLO11, be sure to visit our [integration guide page](index.md) for further insights and information.
## FAQ
-### How do I export Ultralytics YOLOv8 models to NCNN format?
+### How do I export Ultralytics YOLO11 models to NCNN format?
-To export your Ultralytics YOLOv8 model to NCNN format, follow these steps:
+To export your Ultralytics YOLO11 model to NCNN format, follow these steps:
- **Python**: Use the `export` function from the YOLO class.
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export to NCNN format
- model.export(format="ncnn") # creates '/yolov8n_ncnn_model'
+ model.export(format="ncnn") # creates '/yolo11n_ncnn_model'
```
- **CLI**: Use the `yolo` command with the `export` argument.
```bash
- yolo export model=yolov8n.pt format=ncnn # creates '/yolov8n_ncnn_model'
+ yolo export model=yolo11n.pt format=ncnn # creates '/yolo11n_ncnn_model'
```
For detailed export options, check the [Export](../modes/export.md) page in the documentation.
-### What are the advantages of exporting YOLOv8 models to NCNN?
+### What are the advantages of exporting YOLO11 models to NCNN?
-Exporting your Ultralytics YOLOv8 models to NCNN offers several benefits:
+Exporting your Ultralytics YOLO11 models to NCNN offers several benefits:
- **Efficiency**: NCNN models are optimized for mobile and embedded devices, ensuring high performance even with limited computational resources.
- **Quantization**: NCNN supports techniques like quantization that improve model speed and reduce memory usage.
@@ -174,13 +174,13 @@ NCNN is versatile and supports various platforms:
If running models on a Raspberry Pi isn't fast enough, converting to the NCNN format could speed things up as detailed in our [Raspberry Pi Guide](../guides/raspberry-pi.md).
-### How can I deploy Ultralytics YOLOv8 NCNN models on Android?
+### How can I deploy Ultralytics YOLO11 NCNN models on Android?
-To deploy your YOLOv8 models on Android:
+To deploy your YOLO11 models on Android:
1. **Build for Android**: Follow the [NCNN Build for Android](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-android) guide.
2. **Integrate with Your App**: Use the NCNN Android SDK to integrate the exported model into your application for efficient on-device inference.
-For step-by-step instructions, refer to our guide on [Deploying YOLOv8 NCNN Models](#deploying-exported-yolov8-ncnn-models).
+For step-by-step instructions, refer to our guide on [Deploying YOLO11 NCNN Models](#deploying-exported-yolo11-ncnn-models).
For more advanced guides and use cases, visit the [Ultralytics documentation page](../guides/model-deployment-options.md).
diff --git a/docs/en/integrations/neural-magic.md b/docs/en/integrations/neural-magic.md
index d05cf98fa7f..72837ccdce4 100644
--- a/docs/en/integrations/neural-magic.md
+++ b/docs/en/integrations/neural-magic.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Enhance YOLOv8 performance using Neural Magic's DeepSparse Engine. Learn how to deploy and benchmark YOLOv8 models on CPUs for efficient object detection.
-keywords: YOLOv8, DeepSparse, Neural Magic, model optimization, object detection, inference speed, CPU performance, sparsity, pruning, quantization
+description: Enhance YOLO11 performance using Neural Magic's DeepSparse Engine. Learn how to deploy and benchmark YOLO11 models on CPUs for efficient object detection.
+keywords: YOLO11, DeepSparse, Neural Magic, model optimization, object detection, inference speed, CPU performance, sparsity, pruning, quantization
---
-# Optimizing YOLOv8 Inferences with Neural Magic's DeepSparse Engine
+# Optimizing YOLO11 Inferences with Neural Magic's DeepSparse Engine
-When deploying [object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLOv8](https://www.ultralytics.com/) on various hardware, you can bump into unique issues like optimization. This is where YOLOv8's integration with Neural Magic's DeepSparse Engine steps in. It transforms the way YOLOv8 models are executed and enables GPU-level performance directly on CPUs.
+When deploying [object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLO11](https://www.ultralytics.com/) on various hardware, you can bump into unique issues like optimization. This is where YOLO11's integration with Neural Magic's DeepSparse Engine steps in. It transforms the way YOLO11 models are executed and enables GPU-level performance directly on CPUs.
-This guide shows you how to deploy YOLOv8 using Neural Magic's DeepSparse, how to run inferences, and also how to benchmark performance to ensure it is optimized.
+This guide shows you how to deploy YOLO11 using Neural Magic's DeepSparse, how to run inferences, and also how to benchmark performance to ensure it is optimized.
## Neural Magic's DeepSparse
@@ -18,17 +18,17 @@ This guide shows you how to deploy YOLOv8 using Neural Magic's DeepSparse, how t
[Neural Magic's DeepSparse](https://neuralmagic.com/deepsparse/) is an inference run-time designed to optimize the execution of neural networks on CPUs. It applies advanced techniques like sparsity, pruning, and quantization to dramatically reduce computational demands while maintaining accuracy. DeepSparse offers an agile solution for efficient and scalable [neural network](https://www.ultralytics.com/glossary/neural-network-nn) execution across various devices.
-## Benefits of Integrating Neural Magic's DeepSparse with YOLOv8
+## Benefits of Integrating Neural Magic's DeepSparse with YOLO11
Before diving into how to deploy YOLOV8 using DeepSparse, let's understand the benefits of using DeepSparse. Some key advantages include:
-- **Enhanced Inference Speed**: Achieves up to 525 FPS (on YOLOv8n), significantly speeding up YOLOv8's inference capabilities compared to traditional methods.
+- **Enhanced Inference Speed**: Achieves up to 525 FPS (on YOLO11n), significantly speeding up YOLO11's inference capabilities compared to traditional methods.
-- **Optimized Model Efficiency**: Uses pruning and quantization to enhance YOLOv8's efficiency, reducing model size and computational requirements while maintaining [accuracy](https://www.ultralytics.com/glossary/accuracy).
+- **Optimized Model Efficiency**: Uses pruning and quantization to enhance YOLO11's efficiency, reducing model size and computational requirements while maintaining [accuracy](https://www.ultralytics.com/glossary/accuracy).
@@ -36,9 +36,9 @@ Before diving into how to deploy YOLOV8 using DeepSparse, let's understand the b
- **High Performance on Standard CPUs**: Delivers GPU-like performance on CPUs, providing a more accessible and cost-effective option for various applications.
-- **Streamlined Integration and Deployment**: Offers user-friendly tools for easy integration of YOLOv8 into applications, including image and video annotation features.
+- **Streamlined Integration and Deployment**: Offers user-friendly tools for easy integration of YOLO11 into applications, including image and video annotation features.
-- **Support for Various Model Types**: Compatible with both standard and sparsity-optimized YOLOv8 models, adding deployment flexibility.
+- **Support for Various Model Types**: Compatible with both standard and sparsity-optimized YOLO11 models, adding deployment flexibility.
- **Cost-Effective and Scalable Solution**: Reduces operational expenses and offers scalable deployment of advanced object detection models.
@@ -56,15 +56,15 @@ Neural Magic's Deep Sparse technology is inspired by the human brain's efficienc
For more details on how Neural Magic's DeepSparse technology work, check out [their blog post](https://neuralmagic.com/blog/how-neural-magics-deep-sparse-technology-works/).
-## Creating A Sparse Version of YOLOv8 Trained on a Custom Dataset
+## Creating A Sparse Version of YOLO11 Trained on a Custom Dataset
-SparseZoo, an open-source model repository by Neural Magic, offers [a collection of pre-sparsified YOLOv8 model checkpoints](https://sparsezoo.neuralmagic.com/?modelSet=computer_vision&searchModels=yolo). With SparseML, seamlessly integrated with Ultralytics, users can effortlessly fine-tune these sparse checkpoints on their specific datasets using a straightforward command-line interface.
+SparseZoo, an open-source model repository by Neural Magic, offers [a collection of pre-sparsified YOLO11 model checkpoints](https://sparsezoo.neuralmagic.com/?modelSet=computer_vision&searchModels=yolo). With SparseML, seamlessly integrated with Ultralytics, users can effortlessly fine-tune these sparse checkpoints on their specific datasets using a straightforward command-line interface.
-Checkout [Neural Magic's SparseML YOLOv8 documentation](https://github.com/neuralmagic/sparseml/tree/main/integrations/ultralytics-yolov8) for more details.
+Checkout [Neural Magic's SparseML YOLO11 documentation](https://github.com/neuralmagic/sparseml/tree/main/integrations/ultralytics-yolov8) for more details.
## Usage: Deploying YOLOV8 using DeepSparse
-Deploying YOLOv8 with Neural Magic's DeepSparse involves a few straightforward steps. Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. Here's how you can get started.
+Deploying YOLO11 with Neural Magic's DeepSparse involves a few straightforward steps. Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. Here's how you can get started.
### Step 1: Installation
@@ -79,24 +79,24 @@ To install the required packages, run:
pip install deepsparse[yolov8]
```
-### Step 2: Exporting YOLOv8 to ONNX Format
+### Step 2: Exporting YOLO11 to ONNX Format
-DeepSparse Engine requires YOLOv8 models in ONNX format. Exporting your model to this format is essential for compatibility with DeepSparse. Use the following command to export YOLOv8 models:
+DeepSparse Engine requires YOLO11 models in ONNX format. Exporting your model to this format is essential for compatibility with DeepSparse. Use the following command to export YOLO11 models:
!!! tip "Model Export"
=== "CLI"
```bash
- # Export YOLOv8 model to ONNX format
- yolo task=detect mode=export model=yolov8n.pt format=onnx opset=13
+ # Export YOLO11 model to ONNX format
+ yolo task=detect mode=export model=yolo11n.pt format=onnx opset=13
```
-This command will save the `yolov8n.onnx` model to your disk.
+This command will save the `yolo11n.onnx` model to your disk.
### Step 3: Deploying and Running Inferences
-With your YOLOv8 model in ONNX format, you can deploy and run inferences using DeepSparse. This can be done easily with their intuitive Python API:
+With your YOLO11 model in ONNX format, you can deploy and run inferences using DeepSparse. This can be done easily with their intuitive Python API:
!!! tip "Deploying and Running Inferences"
@@ -105,8 +105,8 @@ With your YOLOv8 model in ONNX format, you can deploy and run inferences using D
```python
from deepsparse import Pipeline
- # Specify the path to your YOLOv8 ONNX model
- model_path = "path/to/yolov8n.onnx"
+ # Specify the path to your YOLO11 ONNX model
+ model_path = "path/to/yolo11n.onnx"
# Set up the DeepSparse Pipeline
yolo_pipeline = Pipeline.create(task="yolov8", model_path=model_path)
@@ -118,7 +118,7 @@ With your YOLOv8 model in ONNX format, you can deploy and run inferences using D
### Step 4: Benchmarking Performance
-It's important to check that your YOLOv8 model is performing optimally on DeepSparse. You can benchmark your model's performance to analyze throughput and latency:
+It's important to check that your YOLO11 model is performing optimally on DeepSparse. You can benchmark your model's performance to analyze throughput and latency:
!!! tip "Benchmarking"
@@ -126,12 +126,12 @@ It's important to check that your YOLOv8 model is performing optimally on DeepSp
```bash
# Benchmark performance
- deepsparse.benchmark model_path="path/to/yolov8n.onnx" --scenario=sync --input_shapes="[1,3,640,640]"
+ deepsparse.benchmark model_path="path/to/yolo11n.onnx" --scenario=sync --input_shapes="[1,3,640,640]"
```
### Step 5: Additional Features
-DeepSparse provides additional features for practical integration of YOLOv8 in applications, such as image annotation and dataset evaluation.
+DeepSparse provides additional features for practical integration of YOLO11 in applications, such as image annotation and dataset evaluation.
!!! tip "Additional Features"
@@ -139,10 +139,10 @@ DeepSparse provides additional features for practical integration of YOLOv8 in a
```bash
# For image annotation
- deepsparse.yolov8.annotate --source "path/to/image.jpg" --model_filepath "path/to/yolov8n.onnx"
+ deepsparse.yolov8.annotate --source "path/to/image.jpg" --model_filepath "path/to/yolo11n.onnx"
# For evaluating model performance on a dataset
- deepsparse.yolov8.eval --model_path "path/to/yolov8n.onnx"
+ deepsparse.yolov8.eval --model_path "path/to/yolo11n.onnx"
```
Running the annotate command processes your specified image, detecting objects, and saving the annotated image with bounding boxes and classifications. The annotated image will be stored in an annotation-results folder. This helps provide a visual representation of the model's detection capabilities.
@@ -151,61 +151,61 @@ Running the annotate command processes your specified image, detecting objects,
-After running the eval command, you will receive detailed output metrics such as [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP (mean Average Precision). This provides a comprehensive view of your model's performance on the dataset. This functionality is particularly useful for fine-tuning and optimizing your YOLOv8 models for specific use cases, ensuring high accuracy and efficiency.
+After running the eval command, you will receive detailed output metrics such as [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP (mean Average Precision). This provides a comprehensive view of your model's performance on the dataset. This functionality is particularly useful for fine-tuning and optimizing your YOLO11 models for specific use cases, ensuring high accuracy and efficiency.
## Summary
-This guide explored integrating Ultralytics' YOLOv8 with Neural Magic's DeepSparse Engine. It highlighted how this integration enhances YOLOv8's performance on CPU platforms, offering GPU-level efficiency and advanced neural network sparsity techniques.
+This guide explored integrating Ultralytics' YOLO11 with Neural Magic's DeepSparse Engine. It highlighted how this integration enhances YOLO11's performance on CPU platforms, offering GPU-level efficiency and advanced neural network sparsity techniques.
-For more detailed information and advanced usage, visit [Neural Magic's DeepSparse documentation](https://docs.neuralmagic.com/products/deepsparse/). Also, check out Neural Magic's documentation on the integration with YOLOv8 [here](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/yolov8#yolov8-inference-pipelines) and watch a great session on it [here](https://www.youtube.com/watch?v=qtJ7bdt52x8).
+For more detailed information and advanced usage, visit [Neural Magic's DeepSparse documentation](https://docs.neuralmagic.com/products/deepsparse/). Also, check out Neural Magic's documentation on the integration with YOLO11 [here](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/yolov8#yolov8-inference-pipelines) and watch a great session on it [here](https://www.youtube.com/watch?v=qtJ7bdt52x8).
-Additionally, for a broader understanding of various YOLOv8 integrations, visit the [Ultralytics integration guide page](../integrations/index.md), where you can discover a range of other exciting integration possibilities.
+Additionally, for a broader understanding of various YOLO11 integrations, visit the [Ultralytics integration guide page](../integrations/index.md), where you can discover a range of other exciting integration possibilities.
## FAQ
-### What is Neural Magic's DeepSparse Engine and how does it optimize YOLOv8 performance?
+### What is Neural Magic's DeepSparse Engine and how does it optimize YOLO11 performance?
-Neural Magic's DeepSparse Engine is an inference runtime designed to optimize the execution of neural networks on CPUs through advanced techniques such as sparsity, pruning, and quantization. By integrating DeepSparse with YOLOv8, you can achieve GPU-like performance on standard CPUs, significantly enhancing inference speed, model efficiency, and overall performance while maintaining accuracy. For more details, check out the [Neural Magic's DeepSparse section](#neural-magics-deepsparse).
+Neural Magic's DeepSparse Engine is an inference runtime designed to optimize the execution of neural networks on CPUs through advanced techniques such as sparsity, pruning, and quantization. By integrating DeepSparse with YOLO11, you can achieve GPU-like performance on standard CPUs, significantly enhancing inference speed, model efficiency, and overall performance while maintaining accuracy. For more details, check out the [Neural Magic's DeepSparse section](#neural-magics-deepsparse).
-### How can I install the needed packages to deploy YOLOv8 using Neural Magic's DeepSparse?
+### How can I install the needed packages to deploy YOLO11 using Neural Magic's DeepSparse?
-Installing the required packages for deploying YOLOv8 with Neural Magic's DeepSparse is straightforward. You can easily install them using the CLI. Here's the command you need to run:
+Installing the required packages for deploying YOLO11 with Neural Magic's DeepSparse is straightforward. You can easily install them using the CLI. Here's the command you need to run:
```bash
pip install deepsparse[yolov8]
```
-Once installed, follow the steps provided in the [Installation section](#step-1-installation) to set up your environment and start using DeepSparse with YOLOv8.
+Once installed, follow the steps provided in the [Installation section](#step-1-installation) to set up your environment and start using DeepSparse with YOLO11.
-### How do I convert YOLOv8 models to ONNX format for use with DeepSparse?
+### How do I convert YOLO11 models to ONNX format for use with DeepSparse?
-To convert YOLOv8 models to the ONNX format, which is required for compatibility with DeepSparse, you can use the following CLI command:
+To convert YOLO11 models to the ONNX format, which is required for compatibility with DeepSparse, you can use the following CLI command:
```bash
-yolo task=detect mode=export model=yolov8n.pt format=onnx opset=13
+yolo task=detect mode=export model=yolo11n.pt format=onnx opset=13
```
-This command will export your YOLOv8 model (`yolov8n.pt`) to a format (`yolov8n.onnx`) that can be utilized by the DeepSparse Engine. More information about model export can be found in the [Model Export section](#step-2-exporting-yolov8-to-onnx-format).
+This command will export your YOLO11 model (`yolo11n.pt`) to a format (`yolo11n.onnx`) that can be utilized by the DeepSparse Engine. More information about model export can be found in the [Model Export section](#step-2-exporting-yolo11-to-onnx-format).
-### How do I benchmark YOLOv8 performance on the DeepSparse Engine?
+### How do I benchmark YOLO11 performance on the DeepSparse Engine?
-Benchmarking YOLOv8 performance on DeepSparse helps you analyze throughput and latency to ensure your model is optimized. You can use the following CLI command to run a benchmark:
+Benchmarking YOLO11 performance on DeepSparse helps you analyze throughput and latency to ensure your model is optimized. You can use the following CLI command to run a benchmark:
```bash
-deepsparse.benchmark model_path="path/to/yolov8n.onnx" --scenario=sync --input_shapes="[1,3,640,640]"
+deepsparse.benchmark model_path="path/to/yolo11n.onnx" --scenario=sync --input_shapes="[1,3,640,640]"
```
This command will provide you with vital performance metrics. For more details, see the [Benchmarking Performance section](#step-4-benchmarking-performance).
-### Why should I use Neural Magic's DeepSparse with YOLOv8 for object detection tasks?
+### Why should I use Neural Magic's DeepSparse with YOLO11 for object detection tasks?
-Integrating Neural Magic's DeepSparse with YOLOv8 offers several benefits:
+Integrating Neural Magic's DeepSparse with YOLO11 offers several benefits:
-- **Enhanced Inference Speed:** Achieves up to 525 FPS, significantly speeding up YOLOv8's capabilities.
+- **Enhanced Inference Speed:** Achieves up to 525 FPS, significantly speeding up YOLO11's capabilities.
- **Optimized Model Efficiency:** Uses sparsity, pruning, and quantization techniques to reduce model size and computational needs while maintaining accuracy.
- **High Performance on Standard CPUs:** Offers GPU-like performance on cost-effective CPU hardware.
- **Streamlined Integration:** User-friendly tools for easy deployment and integration.
-- **Flexibility:** Supports both standard and sparsity-optimized YOLOv8 models.
+- **Flexibility:** Supports both standard and sparsity-optimized YOLO11 models.
- **Cost-Effective:** Reduces operational expenses through efficient resource utilization.
-For a deeper dive into these advantages, visit the [Benefits of Integrating Neural Magic's DeepSparse with YOLOv8 section](#benefits-of-integrating-neural-magics-deepsparse-with-yolov8).
+For a deeper dive into these advantages, visit the [Benefits of Integrating Neural Magic's DeepSparse with YOLO11 section](#benefits-of-integrating-neural-magics-deepsparse-with-yolo11).
diff --git a/docs/en/integrations/onnx.md b/docs/en/integrations/onnx.md
index 3bb372ac2a7..fbff328d02e 100644
--- a/docs/en/integrations/onnx.md
+++ b/docs/en/integrations/onnx.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Learn how to export YOLOv8 models to ONNX format for flexible deployment across various platforms with enhanced performance.
-keywords: YOLOv8, ONNX, model export, Ultralytics, ONNX Runtime, machine learning, model deployment, computer vision, deep learning
+description: Learn how to export YOLO11 models to ONNX format for flexible deployment across various platforms with enhanced performance.
+keywords: YOLO11, ONNX, model export, Ultralytics, ONNX Runtime, machine learning, model deployment, computer vision, deep learning
---
-# ONNX Export for YOLOv8 Models
+# ONNX Export for YOLO11 Models
Often, when deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models, you'll need a model format that's both flexible and compatible with multiple platforms.
-Exporting [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models to ONNX format streamlines deployment and ensures optimal performance across various environments. This guide will show you how to easily convert your YOLOv8 models to ONNX and enhance their scalability and effectiveness in real-world applications.
+Exporting [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to ONNX format streamlines deployment and ensures optimal performance across various environments. This guide will show you how to easily convert your YOLO11 models to ONNX and enhance their scalability and effectiveness in real-world applications.
## ONNX and ONNX Runtime
@@ -44,7 +44,7 @@ The ability of ONNX to handle various formats can be attributed to the following
## Common Usage of ONNX
-Before we jump into how to export YOLOv8 models to the ONNX format, let's take a look at where ONNX models are usually used.
+Before we jump into how to export YOLO11 models to the ONNX format, let's take a look at where ONNX models are usually used.
### CPU Deployment
@@ -60,9 +60,9 @@ While ONNX models are commonly used on CPUs, they can also be deployed on the fo
- **Web Browsers**: ONNX can run directly in web browsers, powering interactive and dynamic web-based AI applications.
-## Exporting YOLOv8 Models to ONNX
+## Exporting YOLO11 Models to ONNX
-You can expand model compatibility and deployment flexibility by converting YOLOv8 models to ONNX format.
+You can expand model compatibility and deployment flexibility by converting YOLO11 models to ONNX format.
### Installation
@@ -73,15 +73,15 @@ To install the required package, run:
=== "CLI"
```bash
- # Install the required package for YOLOv8
+ # Install the required package for YOLO11
pip install ultralytics
```
-For detailed instructions and best practices related to the installation process, check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
### Usage
-Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
+Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements.
!!! example "Usage"
@@ -90,14 +90,14 @@ Before diving into the usage instructions, be sure to check out the range of [YO
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to ONNX format
- model.export(format="onnx") # creates 'yolov8n.onnx'
+ model.export(format="onnx") # creates 'yolo11n.onnx'
# Load the exported ONNX model
- onnx_model = YOLO("yolov8n.onnx")
+ onnx_model = YOLO("yolo11n.onnx")
# Run inference
results = onnx_model("https://ultralytics.com/images/bus.jpg")
@@ -106,18 +106,18 @@ Before diving into the usage instructions, be sure to check out the range of [YO
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to ONNX format
- yolo export model=yolov8n.pt format=onnx # creates 'yolov8n.onnx'
+ # Export a YOLO11n PyTorch model to ONNX format
+ yolo export model=yolo11n.pt format=onnx # creates 'yolo11n.onnx'
# Run inference with the exported model
- yolo predict model=yolov8n.onnx source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n.onnx source='https://ultralytics.com/images/bus.jpg'
```
For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md).
-## Deploying Exported YOLOv8 ONNX Models
+## Deploying Exported YOLO11 ONNX Models
-Once you've successfully exported your Ultralytics YOLOv8 models to ONNX format, the next step is deploying these models in various environments. For detailed instructions on deploying your ONNX models, take a look at the following resources:
+Once you've successfully exported your Ultralytics YOLO11 models to ONNX format, the next step is deploying these models in various environments. For detailed instructions on deploying your ONNX models, take a look at the following resources:
- **[ONNX Runtime Python API Documentation](https://onnxruntime.ai/docs/api/python/api_summary.html)**: This guide provides essential information for loading and running ONNX models using ONNX Runtime.
@@ -127,17 +127,17 @@ Once you've successfully exported your Ultralytics YOLOv8 models to ONNX format,
## Summary
-In this guide, you've learned how to export Ultralytics YOLOv8 models to ONNX format to increase their interoperability and performance across various platforms. You were also introduced to the ONNX Runtime and ONNX deployment options.
+In this guide, you've learned how to export Ultralytics YOLO11 models to ONNX format to increase their interoperability and performance across various platforms. You were also introduced to the ONNX Runtime and ONNX deployment options.
For further details on usage, visit the [ONNX official documentation](https://onnx.ai/onnx/intro/).
-Also, if you'd like to know more about other Ultralytics YOLOv8 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there.
+Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there.
## FAQ
-### How do I export YOLOv8 models to ONNX format using Ultralytics?
+### How do I export YOLO11 models to ONNX format using Ultralytics?
-To export your YOLOv8 models to ONNX format using Ultralytics, follow these steps:
+To export your YOLO11 models to ONNX format using Ultralytics, follow these steps:
!!! example "Usage"
@@ -146,14 +146,14 @@ To export your YOLOv8 models to ONNX format using Ultralytics, follow these step
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to ONNX format
- model.export(format="onnx") # creates 'yolov8n.onnx'
+ model.export(format="onnx") # creates 'yolo11n.onnx'
# Load the exported ONNX model
- onnx_model = YOLO("yolov8n.onnx")
+ onnx_model = YOLO("yolo11n.onnx")
# Run inference
results = onnx_model("https://ultralytics.com/images/bus.jpg")
@@ -162,18 +162,18 @@ To export your YOLOv8 models to ONNX format using Ultralytics, follow these step
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to ONNX format
- yolo export model=yolov8n.pt format=onnx # creates 'yolov8n.onnx'
+ # Export a YOLO11n PyTorch model to ONNX format
+ yolo export model=yolo11n.pt format=onnx # creates 'yolo11n.onnx'
# Run inference with the exported model
- yolo predict model=yolov8n.onnx source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model=yolo11n.onnx source='https://ultralytics.com/images/bus.jpg'
```
For more details, visit the [export documentation](../modes/export.md).
-### What are the advantages of using ONNX Runtime for deploying YOLOv8 models?
+### What are the advantages of using ONNX Runtime for deploying YOLO11 models?
-Using ONNX Runtime for deploying YOLOv8 models offers several advantages:
+Using ONNX Runtime for deploying YOLO11 models offers several advantages:
- **Cross-platform compatibility**: ONNX Runtime supports various platforms, such as Windows, macOS, and Linux, ensuring your models run smoothly across different environments.
- **Hardware acceleration**: ONNX Runtime can leverage hardware-specific optimizations for CPUs, GPUs, and dedicated accelerators, providing high-performance inference.
@@ -181,9 +181,9 @@ Using ONNX Runtime for deploying YOLOv8 models offers several advantages:
Learn more by checking the [ONNX Runtime documentation](https://onnxruntime.ai/docs/api/python/api_summary.html).
-### What deployment options are available for YOLOv8 models exported to ONNX?
+### What deployment options are available for YOLO11 models exported to ONNX?
-YOLOv8 models exported to ONNX can be deployed on various platforms including:
+YOLO11 models exported to ONNX can be deployed on various platforms including:
- **CPUs**: Utilizing ONNX Runtime for optimized CPU inference.
- **GPUs**: Leveraging NVIDIA CUDA for high-performance GPU acceleration.
@@ -192,19 +192,19 @@ YOLOv8 models exported to ONNX can be deployed on various platforms including:
For more information, explore our guide on [model deployment options](../guides/model-deployment-options.md).
-### Why should I use ONNX format for Ultralytics YOLOv8 models?
+### Why should I use ONNX format for Ultralytics YOLO11 models?
-Using ONNX format for Ultralytics YOLOv8 models provides numerous benefits:
+Using ONNX format for Ultralytics YOLO11 models provides numerous benefits:
- **Interoperability**: ONNX allows models to be transferred between different machine learning frameworks seamlessly.
- **Performance Optimization**: ONNX Runtime can enhance model performance by utilizing hardware-specific optimizations.
- **Flexibility**: ONNX supports various deployment environments, enabling you to use the same model on different platforms without modification.
-Refer to the comprehensive guide on [exporting YOLOv8 models to ONNX](https://www.ultralytics.com/blog/export-and-optimize-a-yolov8-model-for-inference-on-openvino).
+Refer to the comprehensive guide on [exporting YOLO11 models to ONNX](https://www.ultralytics.com/blog/export-and-optimize-a-yolov8-model-for-inference-on-openvino).
-### How can I troubleshoot issues when exporting YOLOv8 models to ONNX?
+### How can I troubleshoot issues when exporting YOLO11 models to ONNX?
-When exporting YOLOv8 models to ONNX, you might encounter common issues such as mismatched dependencies or unsupported operations. To troubleshoot these problems:
+When exporting YOLO11 models to ONNX, you might encounter common issues such as mismatched dependencies or unsupported operations. To troubleshoot these problems:
1. Verify that you have the correct version of required dependencies installed.
2. Check the official [ONNX documentation](https://onnx.ai/onnx/intro/) for supported operators and features.
diff --git a/docs/en/integrations/openvino.md b/docs/en/integrations/openvino.md
index 8395f949f10..b3fd6a8201a 100644
--- a/docs/en/integrations/openvino.md
+++ b/docs/en/integrations/openvino.md
@@ -59,14 +59,19 @@ Export a YOLOv8n model to OpenVINO format and run inference with the exported mo
## Arguments
-| Key | Value | Description |
-| --------- | ------------ | --------------------------------------------------------------------------- |
-| `format` | `'openvino'` | format to export to |
-| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) |
-| `half` | `False` | FP16 quantization |
-| `int8` | `False` | INT8 quantization |
-| `batch` | `1` | [batch size](https://www.ultralytics.com/glossary/batch-size) for inference |
-| `dynamic` | `False` | allows dynamic input sizes |
+| Key | Value | Description |
+| --------- | ------------ | ------------------------------------------------------------------------------------------- |
+| `format` | `'openvino'` | format to export to |
+| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) |
+| `half` | `False` | FP16 quantization |
+| `int8` | `False` | INT8 quantization |
+| `batch` | `1` | [batch size](https://www.ultralytics.com/glossary/batch-size) for inference |
+| `dynamic` | `False` | allows dynamic input sizes |
+| `data` | `coco8.yaml` | Path to the dataset configuration file (default: `coco8.yaml`), essential for quantization. |
+
+!!! note
+
+ When using `data` argument for quantization, please check [Dataset Guide](https://docs.ultralytics.com/datasets/detect) to learn more about the dataset format.
## Benefits of OpenVINO
@@ -148,7 +153,7 @@ This table represents the benchmark results for five different models (YOLOv8n,
### Intel Arc GPU
-Intelยฎ Arcโข represents Intel's foray into the dedicated GPU market. The Arcโข series, designed to compete with leading GPU manufacturers like AMD and Nvidia, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers.
+Intelยฎ Arcโข represents Intel's foray into the dedicated GPU market. The Arcโข series, designed to compete with leading GPU manufacturers like AMD and NVIDIA, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers.
The Arcโข series is divided into three categories: Arcโข 3, Arcโข 5, and Arcโข 7, with each number indicating the performance level. Each category includes several models, and the 'M' in the GPU model name signifies a mobile, integrated variant.
@@ -352,7 +357,7 @@ To reproduce the Ultralytics benchmarks above on all export [formats](../modes/e
model = YOLO("yolov8n.pt")
# Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all export formats
- results = model.benchmarks(data="coco8.yaml")
+ results = model.benchmark(data="coco8.yaml")
```
=== "CLI"
@@ -466,7 +471,7 @@ Yes, you can benchmark YOLOv8 models in various formats including PyTorch, Torch
model = YOLO("yolov8n.pt")
# Benchmark YOLOv8n speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset for all export formats
- results = model.benchmarks(data="coco8.yaml")
+ results = model.benchmark(data="coco8.yaml")
```
=== "CLI"
diff --git a/docs/en/integrations/paddlepaddle.md b/docs/en/integrations/paddlepaddle.md
index 62092df9f56..77c6164b0f0 100644
--- a/docs/en/integrations/paddlepaddle.md
+++ b/docs/en/integrations/paddlepaddle.md
@@ -1,12 +1,12 @@
---
comments: true
-description: Learn how to export YOLOv8 models to PaddlePaddle format for enhanced performance, flexibility, and deployment across various platforms and devices.
-keywords: YOLOv8, PaddlePaddle, export models, computer vision, deep learning, model deployment, performance optimization
+description: Learn how to export YOLO11 models to PaddlePaddle format for enhanced performance, flexibility, and deployment across various platforms and devices.
+keywords: YOLO11, PaddlePaddle, export models, computer vision, deep learning, model deployment, performance optimization
---
-# How to Export to PaddlePaddle Format from YOLOv8 Models
+# How to Export to PaddlePaddle Format from YOLO11 Models
-Bridging the gap between developing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models in real-world scenarios with varying conditions can be difficult. PaddlePaddle makes this process easier with its focus on flexibility, performance, and its capability for parallel processing in distributed environments. This means you can use your YOLOv8 computer vision models on a wide variety of devices and platforms, from smartphones to cloud-based servers.
+Bridging the gap between developing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models in real-world scenarios with varying conditions can be difficult. PaddlePaddle makes this process easier with its focus on flexibility, performance, and its capability for parallel processing in distributed environments. This means you can use your YOLO11 computer vision models on a wide variety of devices and platforms, from smartphones to cloud-based servers.
@@ -16,10 +16,10 @@ Bridging the gap between developing and deploying [computer vision](https://www.
allowfullscreen>
- Watch: How to Export Ultralytics YOLOv8 Models to PaddlePaddle Format | Key Features of PaddlePaddle Format
+ Watch: How to Export Ultralytics YOLO11 Models to PaddlePaddle Format | Key Features of PaddlePaddle Format
-The ability to export to PaddlePaddle model format allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for use within the PaddlePaddle framework. PaddlePaddle is known for facilitating industrial deployments and is a good choice for deploying computer vision applications in real-world settings across various domains.
+The ability to export to PaddlePaddle model format allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for use within the PaddlePaddle framework. PaddlePaddle is known for facilitating industrial deployments and is a good choice for deploying computer vision applications in real-world settings across various domains.
## Why should you export to PaddlePaddle?
@@ -31,7 +31,7 @@ Developed by Baidu, [PaddlePaddle](https://www.paddlepaddle.org.cn/en) (**PA**ra
It offers tools and resources similar to popular frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making it accessible for developers of all experience levels. From farming and factories to service businesses, PaddlePaddle's large developer community of over 4.77 million is helping create and deploy AI applications.
-By exporting your Ultralytics YOLOv8 models to PaddlePaddle format, you can tap into PaddlePaddle's strengths in performance optimization. PaddlePaddle prioritizes efficient model execution and reduced memory usage. As a result, your YOLOv8 models can potentially achieve even better performance, delivering top-notch results in practical scenarios.
+By exporting your Ultralytics YOLO11 models to PaddlePaddle format, you can tap into PaddlePaddle's strengths in performance optimization. PaddlePaddle prioritizes efficient model execution and reduced memory usage. As a result, your YOLO11 models can potentially achieve even better performance, delivering top-notch results in practical scenarios.
## Key Features of PaddlePaddle Models
@@ -45,7 +45,7 @@ PaddlePaddle models offer a range of key features that contribute to their flexi
## Deployment Options in PaddlePaddle
-Before diving into the code for exporting YOLOv8 models to PaddlePaddle, let's take a look at the different deployment scenarios in which PaddlePaddle models excel.
+Before diving into the code for exporting YOLO11 models to PaddlePaddle, let's take a look at the different deployment scenarios in which PaddlePaddle models excel.
PaddlePaddle provides a range of options, each offering a distinct balance of ease of use, flexibility, and performance:
@@ -57,9 +57,9 @@ PaddlePaddle provides a range of options, each offering a distinct balance of ea
- **Paddle.js**: Paddle.js enables you to deploy PaddlePaddle models directly within web browsers. Paddle.js can either load a pre-trained model or transform a model from [paddle-hub](https://github.com/PaddlePaddle/PaddleHub) with model transforming tools provided by Paddle.js. It can run in browsers that support WebGL/WebGPU/WebAssembly.
-## Export to PaddlePaddle: Converting Your YOLOv8 Model
+## Export to PaddlePaddle: Converting Your YOLO11 Model
-Converting YOLOv8 models to the PaddlePaddle format can improve execution flexibility and optimize performance for various deployment scenarios.
+Converting YOLO11 models to the PaddlePaddle format can improve execution flexibility and optimize performance for various deployment scenarios.
### Installation
@@ -70,15 +70,15 @@ To install the required package, run:
=== "CLI"
```bash
- # Install the required package for YOLOv8
+ # Install the required package for YOLO11
pip install ultralytics
```
-For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
### Usage
-Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
+Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md).
!!! example "Usage"
@@ -87,14 +87,14 @@ Before diving into the usage instructions, it's important to note that while all
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to PaddlePaddle format
- model.export(format="paddle") # creates '/yolov8n_paddle_model'
+ model.export(format="paddle") # creates '/yolo11n_paddle_model'
# Load the exported PaddlePaddle model
- paddle_model = YOLO("./yolov8n_paddle_model")
+ paddle_model = YOLO("./yolo11n_paddle_model")
# Run inference
results = paddle_model("https://ultralytics.com/images/bus.jpg")
@@ -103,18 +103,18 @@ Before diving into the usage instructions, it's important to note that while all
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to PaddlePaddle format
- yolo export model=yolov8n.pt format=paddle # creates '/yolov8n_paddle_model'
+ # Export a YOLO11n PyTorch model to PaddlePaddle format
+ yolo export model=yolo11n.pt format=paddle # creates '/yolo11n_paddle_model'
# Run inference with the exported model
- yolo predict model='./yolov8n_paddle_model' source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model='./yolo11n_paddle_model' source='https://ultralytics.com/images/bus.jpg'
```
For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md).
-## Deploying Exported YOLOv8 PaddlePaddle Models
+## Deploying Exported YOLO11 PaddlePaddle Models
-After successfully exporting your Ultralytics YOLOv8 models to PaddlePaddle format, you can now deploy them. The primary and recommended first step for running a PaddlePaddle model is to use the YOLO("./model_paddle_model") method, as outlined in the previous usage code snippet.
+After successfully exporting your Ultralytics YOLO11 models to PaddlePaddle format, you can now deploy them. The primary and recommended first step for running a PaddlePaddle model is to use the YOLO("yolo11n_paddle_model/") method, as outlined in the previous usage code snippet.
However, for in-depth instructions on deploying your PaddlePaddle models in various other settings, take a look at the following resources:
@@ -126,17 +126,17 @@ However, for in-depth instructions on deploying your PaddlePaddle models in vari
## Summary
-In this guide, we explored the process of exporting Ultralytics YOLOv8 models to the PaddlePaddle format. By following these steps, you can leverage PaddlePaddle's strengths in diverse deployment scenarios, optimizing your models for different hardware and software environments.
+In this guide, we explored the process of exporting Ultralytics YOLO11 models to the PaddlePaddle format. By following these steps, you can leverage PaddlePaddle's strengths in diverse deployment scenarios, optimizing your models for different hardware and software environments.
For further details on usage, visit the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
-Want to explore more ways to integrate your Ultralytics YOLOv8 models? Our [integration guide page](index.md) explores various options, equipping you with valuable resources and insights.
+Want to explore more ways to integrate your Ultralytics YOLO11 models? Our [integration guide page](index.md) explores various options, equipping you with valuable resources and insights.
## FAQ
-### How do I export Ultralytics YOLOv8 models to PaddlePaddle format?
+### How do I export Ultralytics YOLO11 models to PaddlePaddle format?
-Exporting Ultralytics YOLOv8 models to PaddlePaddle format is straightforward. You can use the `export` method of the YOLO class to perform this exportation. Here is an example using Python:
+Exporting Ultralytics YOLO11 models to PaddlePaddle format is straightforward. You can use the `export` method of the YOLO class to perform this exportation. Here is an example using Python:
!!! example "Usage"
@@ -145,14 +145,14 @@ Exporting Ultralytics YOLOv8 models to PaddlePaddle format is straightforward. Y
```python
from ultralytics import YOLO
- # Load the YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
# Export the model to PaddlePaddle format
- model.export(format="paddle") # creates '/yolov8n_paddle_model'
+ model.export(format="paddle") # creates '/yolo11n_paddle_model'
# Load the exported PaddlePaddle model
- paddle_model = YOLO("./yolov8n_paddle_model")
+ paddle_model = YOLO("./yolo11n_paddle_model")
# Run inference
results = paddle_model("https://ultralytics.com/images/bus.jpg")
@@ -161,11 +161,11 @@ Exporting Ultralytics YOLOv8 models to PaddlePaddle format is straightforward. Y
=== "CLI"
```bash
- # Export a YOLOv8n PyTorch model to PaddlePaddle format
- yolo export model=yolov8n.pt format=paddle # creates '/yolov8n_paddle_model'
+ # Export a YOLO11n PyTorch model to PaddlePaddle format
+ yolo export model=yolo11n.pt format=paddle # creates '/yolo11n_paddle_model'
# Run inference with the exported model
- yolo predict model='./yolov8n_paddle_model' source='https://ultralytics.com/images/bus.jpg'
+ yolo predict model='./yolo11n_paddle_model' source='https://ultralytics.com/images/bus.jpg'
```
For more detailed setup and troubleshooting, check the [Ultralytics Installation Guide](../quickstart.md) and [Common Issues Guide](../guides/yolo-common-issues.md).
@@ -179,17 +179,17 @@ PaddlePaddle offers several key advantages for model deployment:
- **Operator Fusion**: By merging compatible operations, it reduces computational overhead.
- **Quantization Techniques**: Supports both post-training and quantization-aware training, enabling lower-[precision](https://www.ultralytics.com/glossary/precision) data representations for improved performance.
-You can achieve enhanced results by exporting your Ultralytics YOLOv8 models to PaddlePaddle, ensuring flexibility and high performance across various applications and hardware platforms. Learn more about PaddlePaddle's features [here](https://www.paddlepaddle.org.cn/en).
+You can achieve enhanced results by exporting your Ultralytics YOLO11 models to PaddlePaddle, ensuring flexibility and high performance across various applications and hardware platforms. Learn more about PaddlePaddle's features [here](https://www.paddlepaddle.org.cn/en).
-### Why should I choose PaddlePaddle for deploying my YOLOv8 models?
+### Why should I choose PaddlePaddle for deploying my YOLO11 models?
-PaddlePaddle, developed by Baidu, is optimized for industrial and commercial AI deployments. Its large developer community and robust framework provide extensive tools similar to TensorFlow and PyTorch. By exporting your YOLOv8 models to PaddlePaddle, you leverage:
+PaddlePaddle, developed by Baidu, is optimized for industrial and commercial AI deployments. Its large developer community and robust framework provide extensive tools similar to TensorFlow and PyTorch. By exporting your YOLO11 models to PaddlePaddle, you leverage:
- **Enhanced Performance**: Optimal execution speed and reduced memory footprint.
- **Flexibility**: Wide compatibility with various devices from smartphones to cloud servers.
- **Scalability**: Efficient parallel processing capabilities for distributed environments.
-These features make PaddlePaddle a compelling choice for deploying YOLOv8 models in production settings.
+These features make PaddlePaddle a compelling choice for deploying YOLO11 models in production settings.
### How does PaddlePaddle improve model performance over other frameworks?
@@ -199,9 +199,9 @@ PaddlePaddle employs several advanced techniques to optimize model performance:
- **Operator Fusion**: Combines compatible operations to minimize memory transfer and increase inference speed.
- **Quantization**: Reduces model size and increases efficiency using lower-precision data while maintaining [accuracy](https://www.ultralytics.com/glossary/accuracy).
-These techniques prioritize efficient model execution, making PaddlePaddle an excellent option for deploying high-performance YOLOv8 models. For more on optimization, see the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html).
+These techniques prioritize efficient model execution, making PaddlePaddle an excellent option for deploying high-performance YOLO11 models. For more on optimization, see the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html).
-### What deployment options does PaddlePaddle offer for YOLOv8 models?
+### What deployment options does PaddlePaddle offer for YOLO11 models?
PaddlePaddle provides flexible deployment options:
diff --git a/docs/en/integrations/paperspace.md b/docs/en/integrations/paperspace.md
index 7c67d9bbff1..f6f9117a6cf 100644
--- a/docs/en/integrations/paperspace.md
+++ b/docs/en/integrations/paperspace.md
@@ -1,14 +1,14 @@
---
comments: true
-description: Simplify YOLOv8 training with Paperspace Gradient's all-in-one MLOps platform. Access GPUs, automate workflows, and deploy with ease.
-keywords: YOLOv8, Paperspace Gradient, MLOps, machine learning, training, GPUs, Jupyter notebooks, model deployment, AI, cloud platform
+description: Simplify YOLO11 training with Paperspace Gradient's all-in-one MLOps platform. Access GPUs, automate workflows, and deploy with ease.
+keywords: YOLO11, Paperspace Gradient, MLOps, machine learning, training, GPUs, Jupyter notebooks, model deployment, AI, cloud platform
---
-# YOLOv8 Model Training Made Simple with Paperspace Gradient
+# YOLO11 Model Training Made Simple with Paperspace Gradient
-Training computer vision models like [YOLOv8](https://github.com/ultralytics/ultralytics) can be complicated. It involves managing large datasets, using different types of computer hardware like GPUs, TPUs, and CPUs, and making sure data flows smoothly during the training process. Typically, developers end up spending a lot of time managing their computer systems and environments. It can be frustrating when you just want to focus on building the best model.
+Training computer vision models like [YOLO11](https://github.com/ultralytics/ultralytics) can be complicated. It involves managing large datasets, using different types of computer hardware like GPUs, TPUs, and CPUs, and making sure data flows smoothly during the training process. Typically, developers end up spending a lot of time managing their computer systems and environments. It can be frustrating when you just want to focus on building the best model.
-This is where a platform like Paperspace Gradient can make things simpler. Paperspace Gradient is a MLOps platform that lets you build, train, and deploy [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models all in one place. With Gradient, developers can focus on training their YOLOv8 models without the hassle of managing infrastructure and environments.
+This is where a platform like Paperspace Gradient can make things simpler. Paperspace Gradient is a MLOps platform that lets you build, train, and deploy [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models all in one place. With Gradient, developers can focus on training their YOLO11 models without the hassle of managing infrastructure and environments.
## Paperspace
@@ -28,15 +28,15 @@ Paperspace Gradient is a suite of tools designed to make working with AI and mac
Within its toolkit, it includes support for Google's TPUs via a job runner, comprehensive support for Jupyter notebooks and containers, and new programming language integrations. Its focus on language integration particularly stands out, allowing users to easily adapt their existing Python projects to use the most advanced GPU infrastructure available.
-## Training YOLOv8 Using Paperspace Gradient
+## Training YOLO11 Using Paperspace Gradient
-Paperspace Gradient makes training a YOLOv8 model possible with a few clicks. Thanks to the integration, you can access the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+Paperspace Gradient makes training a YOLO11 model possible with a few clicks. Thanks to the integration, you can access the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md).
Sign in and then click on the โStart Machineโ button shown in the image below. In a few seconds, a managed GPU environment will start up, and then you can run the notebook's cells.
-
+
-Explore more capabilities of YOLOv8 and Paperspace Gradient in a discussion with Glenn Jocher, Ultralytics founder, and James Skelton from Paperspace. Watch the discussion below.
+Explore more capabilities of YOLO11 and Paperspace Gradient in a discussion with Glenn Jocher, Ultralytics founder, and James Skelton from Paperspace. Watch the discussion below.
@@ -46,14 +46,14 @@ Explore more capabilities of YOLOv8 and Paperspace Gradient in a discussion with
allowfullscreen>
- Watch: Ultralytics Live Session 7: It's All About the Environment: Optimizing YOLOv8 Training With Gradient
+ Watch: Ultralytics Live Session 7: It's All About the Environment: Optimizing YOLO11 Training With Gradient
## Key Features of Paperspace Gradient
As you explore the Paperspace console, you'll see how each step of the machine-learning workflow is supported and enhanced. Here are some things to look out for:
-- **One-Click Notebooks:** Gradient provides pre-configured Jupyter Notebooks specifically tailored for YOLOv8, eliminating the need for environment setup and dependency management. Simply choose the desired notebook and start experimenting immediately.
+- **One-Click Notebooks:** Gradient provides pre-configured Jupyter Notebooks specifically tailored for YOLO11, eliminating the need for environment setup and dependency management. Simply choose the desired notebook and start experimenting immediately.
- **Hardware Flexibility:** Choose from a range of machine types with varying CPU, GPU, and TPU configurations to suit your training needs and budget. Gradient handles all the backend setup, allowing you to focus on model development.
@@ -61,13 +61,13 @@ As you explore the Paperspace console, you'll see how each step of the machine-l
- **Dataset Management:** Efficiently manage your datasets directly within Gradient. Upload, version, and pre-process data with ease, streamlining the data preparation phase of your project.
-- **Model Serving:** Deploy your trained YOLOv8 models as REST APIs with just a few clicks. Gradient handles the infrastructure, allowing you to easily integrate your [object detection](https://www.ultralytics.com/glossary/object-detection) models into your applications.
+- **Model Serving:** Deploy your trained YOLO11 models as REST APIs with just a few clicks. Gradient handles the infrastructure, allowing you to easily integrate your [object detection](https://www.ultralytics.com/glossary/object-detection) models into your applications.
- **Real-time Monitoring:** Monitor the performance and health of your deployed models through Gradient's intuitive dashboard. Gain insights into inference speed, resource utilization, and potential errors.
-## Why Should You Use Gradient for Your YOLOv8 Projects?
+## Why Should You Use Gradient for Your YOLO11 Projects?
-While many options are available for training, deploying, and evaluating YOLOv8 models, the integration with Paperspace Gradient offers a unique set of advantages that separates it from other solutions. Let's explore what makes this integration unique:
+While many options are available for training, deploying, and evaluating YOLO11 models, the integration with Paperspace Gradient offers a unique set of advantages that separates it from other solutions. Let's explore what makes this integration unique:
- **Enhanced Collaboration:** Shared workspaces and version control facilitate seamless teamwork and ensure reproducibility, allowing your team to work together effectively and maintain a clear history of your project.
@@ -79,37 +79,37 @@ While many options are available for training, deploying, and evaluating YOLOv8
## Summary
-This guide explored the Paperspace Gradient integration for training YOLOv8 models. Gradient provides the tools and infrastructure to accelerate your AI development journey from effortless model training and evaluation to streamlined deployment options.
+This guide explored the Paperspace Gradient integration for training YOLO11 models. Gradient provides the tools and infrastructure to accelerate your AI development journey from effortless model training and evaluation to streamlined deployment options.
For further exploration, visit [PaperSpace's official documentation](https://docs.digitalocean.com/products/paperspace/).
-Also, visit the [Ultralytics integration guide page](index.md) to learn more about different YOLOv8 integrations. It's full of insights and tips to take your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects to the next level.
+Also, visit the [Ultralytics integration guide page](index.md) to learn more about different YOLO11 integrations. It's full of insights and tips to take your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects to the next level.
## FAQ
-### How do I train a YOLOv8 model using Paperspace Gradient?
+### How do I train a YOLO11 model using Paperspace Gradient?
-Training a YOLOv8 model with Paperspace Gradient is straightforward and efficient. First, sign in to the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics). Next, click the โStart Machineโ button to initiate a managed GPU environment. Once the environment is ready, you can run the notebook's cells to start training your YOLOv8 model. For detailed instructions, refer to our [YOLOv8 Model Training guide](../modes/train.md).
+Training a YOLO11 model with Paperspace Gradient is straightforward and efficient. First, sign in to the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics). Next, click the โStart Machineโ button to initiate a managed GPU environment. Once the environment is ready, you can run the notebook's cells to start training your YOLO11 model. For detailed instructions, refer to our [YOLO11 Model Training guide](../modes/train.md).
-### What are the advantages of using Paperspace Gradient for YOLOv8 projects?
+### What are the advantages of using Paperspace Gradient for YOLO11 projects?
-Paperspace Gradient offers several unique advantages for training and deploying YOLOv8 models:
+Paperspace Gradient offers several unique advantages for training and deploying YOLO11 models:
- **Hardware Flexibility:** Choose from various CPU, GPU, and TPU configurations.
-- **One-Click Notebooks:** Use pre-configured Jupyter Notebooks for YOLOv8 without worrying about environment setup.
+- **One-Click Notebooks:** Use pre-configured Jupyter Notebooks for YOLO11 without worrying about environment setup.
- **Experiment Tracking:** Automatic tracking of hyperparameters, metrics, and code changes.
- **Dataset Management:** Efficiently manage your datasets within Gradient.
- **Model Serving:** Deploy models as REST APIs easily.
- **Real-time Monitoring:** Monitor model performance and resource utilization through a dashboard.
-### Why should I choose Ultralytics YOLOv8 over other object detection models?
+### Why should I choose Ultralytics YOLO11 over other object detection models?
-Ultralytics YOLOv8 stands out for its real-time object detection capabilities and high [accuracy](https://www.ultralytics.com/glossary/accuracy). Its seamless integration with platforms like Paperspace Gradient enhances productivity by simplifying the training and deployment process. YOLOv8 supports various use cases, from security systems to retail inventory management. Explore more about YOLOv8's advantages [here](https://www.ultralytics.com/yolo).
+Ultralytics YOLO11 stands out for its real-time object detection capabilities and high [accuracy](https://www.ultralytics.com/glossary/accuracy). Its seamless integration with platforms like Paperspace Gradient enhances productivity by simplifying the training and deployment process. YOLO11 supports various use cases, from security systems to retail inventory management. Explore more about YOLO11's advantages [here](https://www.ultralytics.com/yolo).
-### Can I deploy my YOLOv8 model on edge devices using Paperspace Gradient?
+### Can I deploy my YOLO11 model on edge devices using Paperspace Gradient?
-Yes, you can deploy YOLOv8 models on edge devices using Paperspace Gradient. The platform supports various deployment formats like TFLite and Edge TPU, which are optimized for edge devices. After training your model on Gradient, refer to our [export guide](../modes/export.md) for instructions on converting your model to the desired format.
+Yes, you can deploy YOLO11 models on edge devices using Paperspace Gradient. The platform supports various deployment formats like TFLite and Edge TPU, which are optimized for edge devices. After training your model on Gradient, refer to our [export guide](../modes/export.md) for instructions on converting your model to the desired format.
-### How does experiment tracking in Paperspace Gradient help improve YOLOv8 training?
+### How does experiment tracking in Paperspace Gradient help improve YOLO11 training?
Experiment tracking in Paperspace Gradient streamlines the model development process by automatically logging hyperparameters, metrics, and code changes. This allows you to easily compare different training runs, identify optimal configurations, and reproduce successful experiments.
diff --git a/docs/en/integrations/ray-tune.md b/docs/en/integrations/ray-tune.md
index 3dec5efeb67..29eb3a5173f 100644
--- a/docs/en/integrations/ray-tune.md
+++ b/docs/en/integrations/ray-tune.md
@@ -1,16 +1,16 @@
---
comments: true
-description: Optimize YOLOv8 model performance with Ray Tune. Learn efficient hyperparameter tuning using advanced search strategies, parallelism, and early stopping.
-keywords: YOLOv8, Ray Tune, hyperparameter tuning, model optimization, machine learning, deep learning, AI, Ultralytics, Weights & Biases
+description: Optimize YOLO11 model performance with Ray Tune. Learn efficient hyperparameter tuning using advanced search strategies, parallelism, and early stopping.
+keywords: YOLO11, Ray Tune, hyperparameter tuning, model optimization, machine learning, deep learning, AI, Ultralytics, Weights & Biases
---
-# Efficient [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) with Ray Tune and YOLOv8
+# Efficient [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) with Ray Tune and YOLO11
Hyperparameter tuning is vital in achieving peak model performance by discovering the optimal set of hyperparameters. This involves running trials with different hyperparameters and evaluating each trial's performance.
-## Accelerate Tuning with Ultralytics YOLOv8 and Ray Tune
+## Accelerate Tuning with Ultralytics YOLO11 and Ray Tune
-[Ultralytics YOLOv8](https://www.ultralytics.com/) incorporates Ray Tune for hyperparameter tuning, streamlining the optimization of YOLOv8 model hyperparameters. With Ray Tune, you can utilize advanced search strategies, parallelism, and early stopping to expedite the tuning process.
+[Ultralytics YOLO11](https://www.ultralytics.com/) incorporates Ray Tune for hyperparameter tuning, streamlining the optimization of YOLO11 model hyperparameters. With Ray Tune, you can utilize advanced search strategies, parallelism, and early stopping to expedite the tuning process.
### Ray Tune
@@ -18,11 +18,11 @@ Hyperparameter tuning is vital in achieving peak model performance by discoverin
-[Ray Tune](https://docs.ray.io/en/latest/tune/index.html) is a hyperparameter tuning library designed for efficiency and flexibility. It supports various search strategies, parallelism, and early stopping strategies, and seamlessly integrates with popular [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) frameworks, including Ultralytics YOLOv8.
+[Ray Tune](https://docs.ray.io/en/latest/tune/index.html) is a hyperparameter tuning library designed for efficiency and flexibility. It supports various search strategies, parallelism, and early stopping strategies, and seamlessly integrates with popular [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) frameworks, including Ultralytics YOLO11.
### Integration with Weights & Biases
-YOLOv8 also allows optional integration with [Weights & Biases](https://wandb.ai/site) for monitoring the tuning process.
+YOLO11 also allows optional integration with [Weights & Biases](https://wandb.ai/site) for monitoring the tuning process.
## Installation
@@ -49,21 +49,21 @@ To install the required packages, run:
```python
from ultralytics import YOLO
- # Load a YOLOv8n model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n model
+ model = YOLO("yolo11n.pt")
- # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset
+ # Start tuning hyperparameters for YOLO11n training on the COCO8 dataset
result_grid = model.tune(data="coco8.yaml", use_ray=True)
```
## `tune()` Method Parameters
-The `tune()` method in YOLOv8 provides an easy-to-use interface for hyperparameter tuning with Ray Tune. It accepts several arguments that allow you to customize the tuning process. Below is a detailed explanation of each parameter:
+The `tune()` method in YOLO11 provides an easy-to-use interface for hyperparameter tuning with Ray Tune. It accepts several arguments that allow you to customize the tuning process. Below is a detailed explanation of each parameter:
| Parameter | Type | Description | Default Value |
| --------------- | ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- |
| `data` | `str` | The dataset configuration file (in YAML format) to run the tuner on. This file should specify the training and [validation data](https://www.ultralytics.com/glossary/validation-data) paths, as well as other dataset-specific settings. | |
-| `space` | `dict, optional` | A dictionary defining the hyperparameter search space for Ray Tune. Each key corresponds to a hyperparameter name, and the value specifies the range of values to explore during tuning. If not provided, YOLOv8 uses a default search space with various hyperparameters. | |
+| `space` | `dict, optional` | A dictionary defining the hyperparameter search space for Ray Tune. Each key corresponds to a hyperparameter name, and the value specifies the range of values to explore during tuning. If not provided, YOLO11 uses a default search space with various hyperparameters. | |
| `grace_period` | `int, optional` | The grace period in [epochs](https://www.ultralytics.com/glossary/epoch) for the [ASHA scheduler](https://docs.ray.io/en/latest/tune/api/schedulers.html) in Ray Tune. The scheduler will not terminate any trial before this number of epochs, allowing the model to have some minimum training before making a decision on early stopping. | 10 |
| `gpu_per_trial` | `int, optional` | The number of GPUs to allocate per trial during tuning. This helps manage GPU usage, particularly in multi-GPU environments. If not provided, the tuner will use all available GPUs. | None |
| `iterations` | `int, optional` | The maximum number of trials to run during tuning. This parameter helps control the total number of hyperparameter combinations tested, ensuring the tuning process does not run indefinitely. | 10 |
@@ -73,7 +73,7 @@ By customizing these parameters, you can fine-tune the hyperparameter optimizati
## Default Search Space Description
-The following table lists the default search space parameters for hyperparameter tuning in YOLOv8 with Ray Tune. Each parameter has a specific value range defined by `tune.uniform()`.
+The following table lists the default search space parameters for hyperparameter tuning in YOLO11 with Ray Tune. Each parameter has a specific value range defined by `tune.uniform()`.
| Parameter | Value Range | Description |
| ----------------- | -------------------------- | --------------------------------------------------------------------------- |
@@ -101,15 +101,17 @@ The following table lists the default search space parameters for hyperparameter
## Custom Search Space Example
-In this example, we demonstrate how to use a custom search space for hyperparameter tuning with Ray Tune and YOLOv8. By providing a custom search space, you can focus the tuning process on specific hyperparameters of interest.
+In this example, we demonstrate how to use a custom search space for hyperparameter tuning with Ray Tune and YOLO11. By providing a custom search space, you can focus the tuning process on specific hyperparameters of interest.
!!! example "Usage"
```python
+ from ray import tune
+
from ultralytics import YOLO
# Define a YOLO model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Run Ray Tune on the model
result_grid = model.tune(
@@ -120,7 +122,7 @@ In this example, we demonstrate how to use a custom search space for hyperparame
)
```
-In the code snippet above, we create a YOLO model with the "yolov8n.pt" pretrained weights. Then, we call the `tune()` method, specifying the dataset configuration with "coco8.yaml". We provide a custom search space for the initial learning rate `lr0` using a dictionary with the key "lr0" and the value `tune.uniform(1e-5, 1e-1)`. Finally, we pass additional training arguments, such as the number of epochs directly to the tune method as `epochs=50`.
+In the code snippet above, we create a YOLO model with the "yolo11n.pt" pretrained weights. Then, we call the `tune()` method, specifying the dataset configuration with "coco8.yaml". We provide a custom search space for the initial learning rate `lr0` using a dictionary with the key "lr0" and the value `tune.uniform(1e-5, 1e-1)`. Finally, we pass additional training arguments, such as the number of epochs directly to the tune method as `epochs=50`.
## Processing Ray Tune Results
@@ -186,9 +188,9 @@ Explore further by looking into Ray Tune's [Analyze Results](https://docs.ray.io
## FAQ
-### How do I tune the hyperparameters of my YOLOv8 model using Ray Tune?
+### How do I tune the hyperparameters of my YOLO11 model using Ray Tune?
-To tune the hyperparameters of your Ultralytics YOLOv8 model using Ray Tune, follow these steps:
+To tune the hyperparameters of your Ultralytics YOLO11 model using Ray Tune, follow these steps:
1. **Install the required packages:**
@@ -197,13 +199,13 @@ To tune the hyperparameters of your Ultralytics YOLOv8 model using Ray Tune, fol
pip install wandb # optional for logging
```
-2. **Load your YOLOv8 model and start tuning:**
+2. **Load your YOLO11 model and start tuning:**
```python
from ultralytics import YOLO
- # Load a YOLOv8 model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11 model
+ model = YOLO("yolo11n.pt")
# Start tuning with the COCO8 dataset
result_grid = model.tune(data="coco8.yaml", use_ray=True)
@@ -211,9 +213,9 @@ To tune the hyperparameters of your Ultralytics YOLOv8 model using Ray Tune, fol
This utilizes Ray Tune's advanced search strategies and parallelism to efficiently optimize your model's hyperparameters. For more information, check out the [Ray Tune documentation](https://docs.ray.io/en/latest/tune/index.html).
-### What are the default hyperparameters for YOLOv8 tuning with Ray Tune?
+### What are the default hyperparameters for YOLO11 tuning with Ray Tune?
-Ultralytics YOLOv8 uses the following default hyperparameters for tuning with Ray Tune:
+Ultralytics YOLO11 uses the following default hyperparameters for tuning with Ray Tune:
| Parameter | Value Range | Description |
| --------------- | -------------------------- | ------------------------------ |
@@ -229,9 +231,9 @@ Ultralytics YOLOv8 uses the following default hyperparameters for tuning with Ra
These hyperparameters can be customized to suit your specific needs. For a complete list and more details, refer to the [Hyperparameter Tuning](../guides/hyperparameter-tuning.md) guide.
-### How can I integrate Weights & Biases with my YOLOv8 model tuning?
+### How can I integrate Weights & Biases with my YOLO11 model tuning?
-To integrate Weights & Biases (W&B) with your Ultralytics YOLOv8 tuning process:
+To integrate Weights & Biases (W&B) with your Ultralytics YOLO11 tuning process:
1. **Install W&B:**
@@ -249,7 +251,7 @@ To integrate Weights & Biases (W&B) with your Ultralytics YOLOv8 tuning process:
wandb.init(project="YOLO-Tuning", entity="your-entity")
# Load YOLO model
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
# Tune hyperparameters
result_grid = model.tune(data="coco8.yaml", use_ray=True)
@@ -257,7 +259,7 @@ To integrate Weights & Biases (W&B) with your Ultralytics YOLOv8 tuning process:
This setup will allow you to monitor the tuning process, track hyperparameter configurations, and visualize results in W&B.
-### Why should I use Ray Tune for hyperparameter optimization with YOLOv8?
+### Why should I use Ray Tune for hyperparameter optimization with YOLO11?
Ray Tune offers numerous advantages for hyperparameter optimization:
@@ -265,18 +267,18 @@ Ray Tune offers numerous advantages for hyperparameter optimization:
- **Parallelism:** Supports parallel execution of multiple trials, significantly speeding up the tuning process.
- **Early Stopping:** Employs strategies like ASHA to terminate under-performing trials early, saving computational resources.
-Ray Tune seamlessly integrates with Ultralytics YOLOv8, providing an easy-to-use interface for tuning hyperparameters effectively. To get started, check out the [Efficient Hyperparameter Tuning with Ray Tune and YOLOv8](../guides/hyperparameter-tuning.md) guide.
+Ray Tune seamlessly integrates with Ultralytics YOLO11, providing an easy-to-use interface for tuning hyperparameters effectively. To get started, check out the [Efficient Hyperparameter Tuning with Ray Tune and YOLO11](../guides/hyperparameter-tuning.md) guide.
-### How can I define a custom search space for YOLOv8 hyperparameter tuning?
+### How can I define a custom search space for YOLO11 hyperparameter tuning?
-To define a custom search space for your YOLOv8 hyperparameter tuning with Ray Tune:
+To define a custom search space for your YOLO11 hyperparameter tuning with Ray Tune:
```python
from ray import tune
from ultralytics import YOLO
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
search_space = {"lr0": tune.uniform(1e-5, 1e-1), "momentum": tune.uniform(0.6, 0.98)}
result_grid = model.tune(data="coco8.yaml", space=search_space, use_ray=True)
```
diff --git a/docs/en/integrations/roboflow.md b/docs/en/integrations/roboflow.md
index 321e5601644..5a9d5e3180b 100644
--- a/docs/en/integrations/roboflow.md
+++ b/docs/en/integrations/roboflow.md
@@ -1,7 +1,7 @@
---
comments: true
-description: Learn how to gather, label, and deploy data for custom YOLOv8 models using Roboflow's powerful tools. Optimize your computer vision pipeline effortlessly.
-keywords: Roboflow, YOLOv8, data labeling, computer vision, model training, model deployment, dataset management, automated image annotation, AI tools
+description: Learn how to gather, label, and deploy data for custom YOLO11 models using Roboflow's powerful tools. Optimize your computer vision pipeline effortlessly.
+keywords: Roboflow, YOLO11, data labeling, computer vision, model training, model deployment, dataset management, automated image annotation, AI tools
---
# Roboflow
@@ -17,17 +17,17 @@ keywords: Roboflow, YOLOv8, data labeling, computer vision, model training, mode
For more details see [Ultralytics Licensing](https://www.ultralytics.com/license).
-In this guide, we are going to showcase how to find, label, and organize data for use in training a custom Ultralytics YOLOv8 model. Use the table of contents below to jump directly to a specific section:
+In this guide, we are going to showcase how to find, label, and organize data for use in training a custom Ultralytics YOLO11 model. Use the table of contents below to jump directly to a specific section:
-- Gather data for training a custom YOLOv8 model
-- Upload, convert and label data for YOLOv8 format
+- Gather data for training a custom YOLO11 model
+- Upload, convert and label data for YOLO11 format
- Pre-process and augment data for model robustness
-- Dataset management for [YOLOv8](../models/yolov8.md)
+- Dataset management for [YOLO11](../models/yolov8.md)
- Export data in 40+ formats for model training
-- Upload custom YOLOv8 model weights for testing and deployment
-- Gather Data for Training a Custom YOLOv8 Model
+- Upload custom YOLO11 model weights for testing and deployment
+- Gather Data for Training a Custom YOLO11 Model
-Roboflow provides two services that can help you collect data for YOLOv8 models: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics).
+Roboflow provides two services that can help you collect data for YOLO11 models: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics).
Universe is an online repository with over 250,000 vision datasets totalling over 100 million images.
@@ -41,21 +41,21 @@ With a [free Roboflow account](https://app.roboflow.com/?ref=ultralytics), you c
-For YOLOv8, select "YOLOv8" as the export format:
+For YOLO11, select "YOLO11" as the export format:
-Universe also has a page that aggregates all [public fine-tuned YOLOv8 models uploaded to Roboflow](https://universe.roboflow.com/search?q=model%3Ayolov8&ref=ultralytics). You can use this page to explore pre-trained models you can use for testing or [for automated data labeling](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling?ref=ultralytics) or to prototype with [Roboflow inference](https://github.com/roboflow/inference?ref=ultralytics).
+Universe also has a page that aggregates all [public fine-tuned YOLO11 models uploaded to Roboflow](https://universe.roboflow.com/search?q=model%3Ayolov8&ref=ultralytics). You can use this page to explore pre-trained models you can use for testing or [for automated data labeling](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling?ref=ultralytics) or to prototype with [Roboflow inference](https://github.com/roboflow/inference?ref=ultralytics).
If you want to gather images yourself, try [Collect](https://github.com/roboflow/roboflow-collect), an open source project that allows you to automatically gather images using a webcam on the edge. You can use text or image prompts with Collect to instruct what data should be collected, allowing you to capture only the useful data you need to build your vision model.
-## Upload, Convert and Label Data for YOLOv8 Format
+## Upload, Convert and Label Data for YOLO11 Format
[Roboflow Annotate](https://docs.roboflow.com/annotate/use-roboflow-annotate?ref=ultralytics) is an online annotation tool for use in labeling images for [object detection](https://www.ultralytics.com/glossary/object-detection), classification, and segmentation.
-To label data for a YOLOv8 object detection, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), or classification model, first create a project in Roboflow.
+To label data for a YOLO11 object detection, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), or classification model, first create a project in Roboflow.
@@ -95,7 +95,7 @@ You can also add tags to images from the Tags panel in the sidebar. You can appl
-Models hosted on Roboflow can be used with Label Assist, an automated annotation tool that uses your YOLOv8 model to recommend annotations. To use Label Assist, first upload a YOLOv8 model to Roboflow (see instructions later in the guide). Then, click the magic wand icon in the left sidebar and select your model for use in Label Assist.
+Models hosted on Roboflow can be used with Label Assist, an automated annotation tool that uses your YOLO11 model to recommend annotations. To use Label Assist, first upload a YOLO11 model to Roboflow (see instructions later in the guide). Then, click the magic wand icon in the left sidebar and select your model for use in Label Assist.
Choose a model, then click "Continue" to enable Label Assist:
@@ -109,7 +109,7 @@ When you open new images for annotation, Label Assist will trigger and recommend
-## Dataset Management for YOLOv8
+## Dataset Management for YOLO11
Roboflow provides a suite of tools for understanding computer vision datasets.
@@ -157,13 +157,13 @@ When your dataset version has been generated, you can export your data into a ra
-You are now ready to train YOLOv8 on a custom dataset. Follow this [written guide](https://blog.roboflow.com/how-to-train-yolov8-on-a-custom-dataset/?ref=ultralytics) and [YouTube video](https://www.youtube.com/watch?v=wuZtUMEiKWY) for step-by-step instructions or refer to the [Ultralytics documentation](../modes/train.md).
+You are now ready to train YOLO11 on a custom dataset. Follow this [written guide](https://blog.roboflow.com/how-to-train-yolov8-on-a-custom-dataset/?ref=ultralytics) and [YouTube video](https://www.youtube.com/watch?v=wuZtUMEiKWY) for step-by-step instructions or refer to the [Ultralytics documentation](../modes/train.md).
-## Upload Custom YOLOv8 Model Weights for Testing and Deployment
+## Upload Custom YOLO11 Model Weights for Testing and Deployment
-Roboflow offers an infinitely scalable API for deployed models and SDKs for use with NVIDIA Jetsons, Luxonis OAKs, Raspberry Pis, GPU-based devices, and more.
+Roboflow offers a scalable API for deployed models and SDKs for use with NVIDIA Jetson, Luxonis OAK, Raspberry Pi, GPU-based devices, and more.
-You can deploy YOLOv8 models by uploading YOLOv8 weights to Roboflow. You can do this in a few lines of Python code. Create a new Python file and add the following code:
+You can deploy YOLO11 models by uploading YOLO11 weights to Roboflow. You can do this in a few lines of Python code. Create a new Python file and add the following code:
```python
import roboflow # install with 'pip install roboflow'
@@ -190,7 +190,7 @@ To test your model and find deployment instructions for supported SDKs, go to th
You can also use your uploaded model as a [labeling assistant](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling?ref=ultralytics). This feature uses your trained model to recommend annotations on images uploaded to Roboflow.
-## How to Evaluate YOLOv8 Models
+## How to Evaluate YOLO11 Models
Roboflow provides a range of features for use in evaluating models.
@@ -224,17 +224,17 @@ You can use Vector Analysis to:
## Learning Resources
-Want to learn more about using Roboflow for creating YOLOv8 models? The following resources may be helpful in your work.
+Want to learn more about using Roboflow for creating YOLO11 models? The following resources may be helpful in your work.
-- [Train YOLOv8 on a Custom Dataset](https://github.com/roboflow/notebooks/blob/main/notebooks/train-yolov8-object-detection-on-custom-dataset.ipynb): Follow our interactive notebook that shows you how to train a YOLOv8 model on a custom dataset.
-- [Autodistill](https://docs.autodistill.com/): Use large foundation vision models to label data for specific models. You can label images for use in training YOLOv8 classification, detection, and segmentation models with Autodistill.
+- [Train YOLO11 on a Custom Dataset](https://github.com/roboflow/notebooks/blob/main/notebooks/train-yolov8-object-detection-on-custom-dataset.ipynb): Follow our interactive notebook that shows you how to train a YOLO11 model on a custom dataset.
+- [Autodistill](https://docs.autodistill.com/): Use large foundation vision models to label data for specific models. You can label images for use in training YOLO11 classification, detection, and segmentation models with Autodistill.
- [Supervision](https://supervision.roboflow.com/?ref=ultralytics): A Python package with helpful utilities for use in working with computer vision models. You can use supervision to filter detections, compute confusion matrices, and more, all in a few lines of Python code.
-- [Roboflow Blog](https://blog.roboflow.com/?ref=ultralytics): The Roboflow Blog features over 500 articles on computer vision, covering topics from how to train a YOLOv8 model to annotation best practices.
-- [Roboflow YouTube channel](https://www.youtube.com/@Roboflow): Browse dozens of in-depth computer vision guides on our YouTube channel, covering topics from training YOLOv8 models to automated image labeling.
+- [Roboflow Blog](https://blog.roboflow.com/?ref=ultralytics): The Roboflow Blog features over 500 articles on computer vision, covering topics from how to train a YOLO11 model to annotation best practices.
+- [Roboflow YouTube channel](https://www.youtube.com/@Roboflow): Browse dozens of in-depth computer vision guides on our YouTube channel, covering topics from training YOLO11 models to automated image labeling.
## Project Showcase
-Below are a few of the many pieces of feedback we have received for using YOLOv8 and Roboflow together to create computer vision models.
+Below are a few of the many pieces of feedback we have received for using YOLO11 and Roboflow together to create computer vision models.
@@ -244,26 +244,26 @@ Below are a few of the many pieces of feedback we have received for using YOLOv8
## FAQ
-### How do I label data for YOLOv8 models using Roboflow?
+### How do I label data for YOLO11 models using Roboflow?
-Labeling data for YOLOv8 models using Roboflow is straightforward with Roboflow Annotate. First, create a project on Roboflow and upload your images. After uploading, select the batch of images and click "Start Annotating." You can use the `B` key for bounding boxes or the `P` key for polygons. For faster annotation, use the SAM-based label assistant by clicking the cursor icon in the sidebar. Detailed steps can be found [here](#upload-convert-and-label-data-for-yolov8-format).
+Labeling data for YOLO11 models using Roboflow is straightforward with Roboflow Annotate. First, create a project on Roboflow and upload your images. After uploading, select the batch of images and click "Start Annotating." You can use the `B` key for bounding boxes or the `P` key for polygons. For faster annotation, use the SAM-based label assistant by clicking the cursor icon in the sidebar. Detailed steps can be found [here](#upload-convert-and-label-data-for-yolo11-format).
-### What services does Roboflow offer for collecting YOLOv8 [training data](https://www.ultralytics.com/glossary/training-data)?
+### What services does Roboflow offer for collecting YOLO11 [training data](https://www.ultralytics.com/glossary/training-data)?
-Roboflow provides two key services for collecting YOLOv8 training data: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). Universe offers access to over 250,000 vision datasets, while Collect helps you gather images using a webcam and automated prompts.
+Roboflow provides two key services for collecting YOLO11 training data: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). Universe offers access to over 250,000 vision datasets, while Collect helps you gather images using a webcam and automated prompts.
-### How can I manage and analyze my YOLOv8 dataset using Roboflow?
+### How can I manage and analyze my YOLO11 dataset using Roboflow?
-Roboflow offers robust dataset management tools, including dataset search, tagging, and Health Check. Use the search feature to find images based on text descriptions or tags. Health Check provides insights into dataset quality, showing class balance, image sizes, and annotation heatmaps. This helps optimize dataset performance before training YOLOv8 models. Detailed information can be found [here](#dataset-management-for-yolov8).
+Roboflow offers robust dataset management tools, including dataset search, tagging, and Health Check. Use the search feature to find images based on text descriptions or tags. Health Check provides insights into dataset quality, showing class balance, image sizes, and annotation heatmaps. This helps optimize dataset performance before training YOLO11 models. Detailed information can be found [here](#dataset-management-for-yolo11).
-### How do I export my YOLOv8 dataset from Roboflow?
+### How do I export my YOLO11 dataset from Roboflow?
-To export your YOLOv8 dataset from Roboflow, you need to create a dataset version. Click "Versions" in the sidebar, then "Create New Version" and apply any desired augmentations. Once the version is generated, click "Export Dataset" and choose the YOLOv8 format. Follow this process [here](#export-data-in-40-formats-for-model-training).
+To export your YOLO11 dataset from Roboflow, you need to create a dataset version. Click "Versions" in the sidebar, then "Create New Version" and apply any desired augmentations. Once the version is generated, click "Export Dataset" and choose the YOLO11 format. Follow this process [here](#export-data-in-40-formats-for-model-training).
-### How can I integrate and deploy YOLOv8 models with Roboflow?
+### How can I integrate and deploy YOLO11 models with Roboflow?
-Integrate and deploy YOLOv8 models on Roboflow by uploading your YOLOv8 weights through a few lines of Python code. Use the provided script to authenticate and upload your model, which will create an API for deployment. For details on the script and further instructions, see [this section](#upload-custom-yolov8-model-weights-for-testing-and-deployment).
+Integrate and deploy YOLO11 models on Roboflow by uploading your YOLO11 weights through a few lines of Python code. Use the provided script to authenticate and upload your model, which will create an API for deployment. For details on the script and further instructions, see [this section](#upload-custom-yolo11-model-weights-for-testing-and-deployment).
-### What tools does Roboflow provide for evaluating YOLOv8 models?
+### What tools does Roboflow provide for evaluating YOLO11 models?
-Roboflow offers model evaluation tools, including a confusion matrix and vector analysis plots. Access these tools from the "View Detailed Evaluation" button on your model page. These features help identify model performance issues and find areas for improvement. For more information, refer to [this section](#how-to-evaluate-yolov8-models).
+Roboflow offers model evaluation tools, including a confusion matrix and vector analysis plots. Access these tools from the "View Detailed Evaluation" button on your model page. These features help identify model performance issues and find areas for improvement. For more information, refer to [this section](#how-to-evaluate-yolo11-models).
diff --git a/docs/en/integrations/rockchip-rknn.md b/docs/en/integrations/rockchip-rknn.md
new file mode 100644
index 00000000000..087f942e089
--- /dev/null
+++ b/docs/en/integrations/rockchip-rknn.md
@@ -0,0 +1,206 @@
+---
+comments: true
+description: Learn how to export YOLO11 models to RKNN format for efficient deployment on Rockchip platforms with enhanced performance.
+keywords: YOLO11, RKNN, model export, Ultralytics, Rockchip, machine learning, model deployment, computer vision, deep learning
+---
+
+# Rockchip RKNN Export for Ultralytics YOLO11 Models
+
+When deploying computer vision models on embedded devices, especially those powered by Rockchip processors, having a compatible model format is essential. Exporting [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to RKNN format ensures optimized performance and compatibility with Rockchip's hardware. This guide will walk you through converting your YOLO11 models to RKNN format, enabling efficient deployment on Rockchip platforms.
+
+
+
+
+
+!!! note
+
+ This guide has been tested with [Radxa Rock 5B](https://radxa.com/products/rock5/5b) which is based on Rockchip RK3588 and [Radxa Zero 3W](https://radxa.com/products/zeros/zero3w) which is based on Rockchip RK3566. It is expected to work across other Rockchip-based devices which supports [rknn-toolkit2](https://github.com/airockchip/rknn-toolkit2) such as RK3576, RK3568, RK3562, RV1103, RV1106, RV1103B, RV1106B and RK2118.
+
+## What is Rockchip?
+
+Renowned for delivering versatile and power-efficient solutions, Rockchip designs advanced System-on-Chips (SoCs) that power a wide range of consumer electronics, industrial applications, and AI technologies. With ARM-based architecture, built-in Neural Processing Units (NPUs), and high-resolution multimedia support, Rockchip SoCs enable cutting-edge performance for devices like tablets, smart TVs, IoT systems, and edge AI applications. Companies like Radxa, ASUS, Pine64, Orange Pi, Odroid, Khadas, and Banana Pi offer a variety of products based on Rockchip SoCs, further extending their reach and impact across diverse markets.
+
+## RKNN Toolkit
+
+The [RKNN Toolkit](https://github.com/airockchip/rknn-toolkit2) is a set of tools and libraries provided by Rockchip to facilitate the deployment of deep learning models on their hardware platforms. RKNN, or Rockchip Neural Network, is the proprietary format used by these tools. RKNN models are designed to take full advantage of the hardware acceleration provided by Rockchip's NPU (Neural Processing Unit), ensuring high performance in AI tasks on devices like RK3588, RK3566, RV1103, RV1106, and other Rockchip-powered systems.
+
+## Key Features of RKNN Models
+
+RKNN models offer several advantages for deployment on Rockchip platforms:
+
+- **Optimized for NPU**: RKNN models are specifically optimized to run on Rockchip's NPUs, ensuring maximum performance and efficiency.
+- **Low Latency**: The RKNN format minimizes inference latency, which is critical for real-time applications on edge devices.
+- **Platform-Specific Customization**: RKNN models can be tailored to specific Rockchip platforms, enabling better utilization of hardware resources.
+
+## Flash OS to Rockchip hardware
+
+The first step after getting your hands on a Rockchip-based device is to flash an OS so that that the hardware can boot into a working environment. In this guide we will point to getting started guides of the two devices that we tested which are Radxa Rock 5B and Radxa Zero 3W.
+
+- [Radxa Rock 5B Getting Started Guide](https://docs.radxa.com/en/rock5/rock5b)
+- [Radxa Zero 3W Getting Started Guide](https://docs.radxa.com/en/zero/zero3)
+
+## Export to RKNN: Converting Your YOLO11 Model
+
+Export an Ultralytics YOLO11 model to RKNN format and run inference with the exported model.
+
+!!! note
+
+ Make sure to use an X86-based Linux PC to export the model to RKNN because exporting on Rockchip-based devices (ARM64) are not supported.
+
+### Installation
+
+To install the required packages, run:
+
+!!! Tip "Installation"
+
+ === "CLI"
+
+ ```bash
+ # Install the required package for YOLO11
+ pip install ultralytics
+ ```
+
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+
+### Usage
+
+!!! note
+
+ Export is currently only supported for detection models. More model support will be coming in the future.
+
+!!! Example "Usage"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
+
+ # Export the model to RKNN format
+ # 'name' can be one of rk3588, rk3576, rk3566, rk3568, rk3562, rv1103, rv1106, rv1103b, rv1106b, rk2118
+ model.export(format="rknn", name="rk3588") # creates '/yolo11n_rknn_model'
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Export a YOLO11n PyTorch model to RKNN format
+ # 'name' can be one of rk3588, rk3576, rk3566, rk3568, rk3562, rv1103, rv1106, rv1103b, rv1106b, rk2118
+ yolo export model=yolo11n.pt format=rknn name=rk3588 # creates '/yolo11n_rknn_model'
+ ```
+
+For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md).
+
+## Deploying Exported YOLO11 RKNN Models
+
+Once you've successfully exported your Ultralytics YOLO11 models to RKNN format, the next step is deploying these models on Rockchip-based devices.
+
+### Installation
+
+To install the required packages, run:
+
+!!! Tip "Installation"
+
+ === "CLI"
+
+ ```bash
+ # Install the required package for YOLO11
+ pip install ultralytics
+ ```
+
+### Usage
+
+!!! Example "Usage"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load the exported RKNN model
+ rknn_model = YOLO("./yolo11n_rknn_model")
+
+ # Run inference
+ results = rknn_model("https://ultralytics.com/images/bus.jpg")
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Run inference with the exported model
+ yolo predict model='./yolo11n_rknn_model' source='https://ultralytics.com/images/bus.jpg'
+ ```
+
+!!! note
+
+ If you encounter a log message indicating that the RKNN runtime version does not match the RKNN Toolkit version and the inference fails, please replace `/usr/lib/librknnrt.so` with official [librknnrt.so file](https://github.com/airockchip/rknn-toolkit2/blob/master/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so).
+
+ 
+
+## Benchmarks
+
+YOLO11 benchmarks below were run by the Ultralytics team on Radxa Rock 5B based on Rockchip RK3588 with `rknn` model format measuring speed and accuracy.
+
+| Model | Format | Status | Size (MB) | mAP50-95(B) | Inference time (ms/im) |
+| ------- | ------ | ------ | --------- | ----------- | ---------------------- |
+| YOLO11n | `rknn` | โ | 7.4 | 0.61 | 99.5 |
+| YOLO11s | `rknn` | โ | 20.7 | 0.741 | 122.3 |
+| YOLO11m | `rknn` | โ | 41.9 | 0.764 | 298.0 |
+| YOLO11l | `rknn` | โ | 53.3 | 0.72 | 319.6 |
+| YOLO11x | `rknn` | โ | 114.6 | 0.828 | 632.1 |
+
+!!! note
+
+ Validation for the above benchmark was done using coco8 dataset
+
+## Summary
+
+In this guide, you've learned how to export Ultralytics YOLO11 models to RKNN format to enhance their deployment on Rockchip platforms. You were also introduced to the RKNN Toolkit and the specific advantages of using RKNN models for edge AI applications.
+
+For further details on usage, visit the [RKNN official documentation](https://github.com/airockchip/rknn-toolkit2).
+
+Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there.
+
+## FAQ
+
+### How do I export my Ultralytics YOLO model to RKNN format?
+
+You can easily export your Ultralytics YOLO model to RKNN format using the `export()` method in the Ultralytics Python package or via the command-line interface (CLI). Ensure you are using an x86-based Linux PC for the export process, as ARM64 devices like Rockchip are not supported for this operation. You can specify the target Rockchip platform using the `name` argument, such as `rk3588`, `rk3566`, or others. This process generates an optimized RKNN model ready for deployment on your Rockchip device, taking advantage of its Neural Processing Unit (NPU) for accelerated inference.
+
+!!! Example
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load your YOLO model
+ model = YOLO("yolo11n.pt")
+
+ # Export to RKNN format for a specific Rockchip platform
+ model.export(format="rknn", name="rk3588")
+ ```
+
+ === "CLI"
+
+ ```bash
+ yolo export model=yolo11n.pt format=rknn name=rk3588
+ ```
+
+### What are the benefits of using RKNN models on Rockchip devices?
+
+RKNN models are specifically designed to leverage the hardware acceleration capabilities of Rockchip's Neural Processing Units (NPUs). This optimization results in significantly faster inference speeds and reduced latency compared to running generic model formats like ONNX or TensorFlow Lite on the same hardware. Using RKNN models allows for more efficient use of the device's resources, leading to lower power consumption and better overall performance, especially critical for real-time applications on edge devices. By converting your Ultralytics YOLO models to RKNN, you can achieve optimal performance on devices powered by Rockchip SoCs like the RK3588, RK3566, and others.
+
+### Can I deploy RKNN models on devices from other manufacturers like NVIDIA or Google?
+
+RKNN models are specifically optimized for Rockchip platforms and their integrated NPUs. While you can technically run an RKNN model on other platforms using software emulation, you will not benefit from the hardware acceleration provided by Rockchip devices. For optimal performance on other platforms, it's recommended to export your Ultralytics YOLO models to formats specifically designed for those platforms, such as TensorRT for NVIDIA GPUs or [TensorFlow Lite](https://docs.ultralytics.com/integrations/tflite/) for Google's Edge TPU. Ultralytics supports exporting to a wide range of formats, ensuring compatibility with various hardware accelerators.
+
+### What Rockchip platforms are supported for RKNN model deployment?
+
+The Ultralytics YOLO export to RKNN format supports a wide range of Rockchip platforms, including the popular RK3588, RK3576, RK3566, RK3568, RK3562, RV1103, RV1106, RV1103B, RV1106B, and RK2118. These platforms are commonly found in devices from manufacturers like Radxa, ASUS, Pine64, Orange Pi, Odroid, Khadas, and Banana Pi. This broad support ensures that you can deploy your optimized RKNN models on various Rockchip-powered devices, from single-board computers to industrial systems, taking full advantage of their AI acceleration capabilities for enhanced performance in your computer vision applications.
+
+### How does the performance of RKNN models compare to other formats on Rockchip devices?
+
+RKNN models generally outperform other formats like ONNX or TensorFlow Lite on Rockchip devices due to their optimization for Rockchip's NPUs. For instance, benchmarks on the Radxa Rock 5B (RK3588) show that [YOLO11n](https://www.ultralytics.com/blog/all-you-need-to-know-about-ultralytics-yolo11-and-its-applications) in RKNN format achieves an inference time of 99.5 ms/image, significantly faster than other formats. This performance advantage is consistent across various YOLO11 model sizes, as demonstrated in the [benchmarks section](#benchmarks). By leveraging the dedicated NPU hardware, RKNN models minimize latency and maximize throughput, making them ideal for real-time applications on Rockchip-based edge devices.
diff --git a/docs/en/integrations/seeedstudio-recamera.md b/docs/en/integrations/seeedstudio-recamera.md
new file mode 100644
index 00000000000..dcad49351d6
--- /dev/null
+++ b/docs/en/integrations/seeedstudio-recamera.md
@@ -0,0 +1,110 @@
+---
+comments: true
+description: Discover how to get started with Seeed Studio reCamera for edge AI applications using Ultralytics YOLO11. Learn about its powerful features, real-world applications, and how to export YOLO11 models to ONNX format for seamless integration.
+keywords: Seeed Studio reCamera, YOLO11, ONNX export, edge AI, computer vision, real-time detection, personal protective equipment detection, fire detection, waste detection, fall detection, modular AI devices, Ultralytics
+---
+
+# Quick Start Guide: Seeed Studio reCamera with Ultralytics YOLO11
+
+[reCamera](https://www.seeedstudio.com/recamera) was introduced for the AI community at [YOLO Vision 2024 (YV24)](https://www.youtube.com/watch?v=rfI5vOo3-_A), [Ultralytics](https://ultralytics.com/) annual hybrid event. It is mainly designed for edge AI applications, offering powerful processing capabilities and effortless deployment.
+
+With support for diverse hardware configurations and open-source resources, it serves as an ideal platform for prototyping and deploying innovative [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) [solutions](https://docs.ultralytics.com/solutions/#solutions) at the edge.
+
+
+
+## Why Choose reCamera?
+
+reCamera series is purpose-built for edge AI applications, tailored to meet the needs of developers and innovators. Here's why it stands out:
+
+- **RISC-V Powered Performance**: At its core is the SG200X processor, built on the RISC-V architecture, delivering exceptional performance for edge AI tasks while maintaining energy efficiency. With the ability to execute 1 trillion operations per second (1 TOPS), it handles demanding tasks like real-time object detection easily.
+
+- **Optimized Video Technologies**: Supports advanced video compression standards, including H.264 and H.265, to reduce storage and bandwidth requirements without sacrificing quality. Features like HDR imaging, 3D noise reduction, and lens correction ensure professional visuals, even in challenging environments.
+
+- **Energy-Efficient Dual Processing**: While the SG200X handles complex AI tasks, a smaller 8-bit microcontroller manages simpler operations to conserve power, making the reCamera ideal for battery-operated or low-power setups.
+
+- **Modular and Upgradable Design**: The reCamera is built with a modular structure, consisting of three main components: the core board, sensor board, and baseboard. This design allows developers to easily swap or upgrade components, ensuring flexibility and future-proofing for evolving projects.
+
+## Quick Hardware Setup of reCamera
+
+Please follow [reCamera Quick Start Guide](https://wiki.seeedstudio.com/recamera_getting_started) for initial onboarding of the device such as connecting the device to a WiFi network and access the [Node-RED](https://nodered.org) web UI for quick previewing of detection redsults with the pre-installed Ultralytics YOLO models.
+
+## Export to cvimodel: Converting Your YOLO11 Model
+
+Here we will first convert `PyTorch` model to `ONNX` and then convert it to `MLIR` model format. Finally `MLIR` will be converted to `cvimodel` in order to inference on-device
+
+
+
+
+
+### Export to ONNX
+
+Export an Ultralytics YOLO11 model to ONNX model format.
+
+#### Installation
+
+To install the required packages, run:
+
+!!! Tip "Installation"
+
+ === "CLI"
+
+ ```bash
+ pip install ultralytics
+ ```
+
+For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips.
+
+#### Usage
+
+!!! Example "Usage"
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load the YOLO11 model
+ model = YOLO("yolo11n.pt")
+
+ # Export the model to ONNX format
+ model.export(format="onnx") # creates 'yolo11n.onnx'
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Export a YOLO11n PyTorch model to ONNX format
+ yolo export model=yolo11n.pt format=onnx # creates 'yolo11n.onnx'
+ ```
+
+For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md).
+
+### Export ONNX to MLIR and cvimodel
+
+After obtaining an ONNX model, refer to [Convert and Quantize AI Models](https://wiki.seeedstudio.com/recamera_model_conversion) page to convert the ONNX model to MLIR and then to cvimodel.
+
+!!! note
+
+ We're actively working on adding reCamera support directly into the Ultralytics package, and it will be available soon. In the meantime, check out our blog on [Integrating Ultralytics YOLO Models with Seeed Studio's reCamera](https://www.ultralytics.com/blog/integrating-ultralytics-yolo-models-on-seeed-studios-recamera) for more insights.
+
+## Benchmarks
+
+Coming soon.
+
+## Real-World Applications of reCamera
+
+reCamera advanced computer vision capabilities and modular design make it suitable for a wide range of real-world scenarios, helping developers and businesses tackle unique challenges with ease.
+
+- **Fall Detection**: Designed for safety and healthcare applications, the reCamera can detect falls in real-time, making it ideal for elderly care, hospitals, and industrial settings where rapid response is critical.
+
+- **Personal Protective Equipment Detection**: The reCamera can be used to ensure workplace safety by detecting PPE compliance in real-time. It helps identify whether workers are wearing helmets, gloves, or other safety gear, reducing risks in industrial environments.
+
+
+
+- **Fire Detection**: The reCamera's real-time processing capabilities make it an excellent choice for fire detection in industrial and residential areas, providing early warnings to prevent potential disasters.
+
+- **Waste Detection**: It can also be utilized for waste detection applications, making it an excellent tool for environmental monitoring and waste management.
+
+- **Car Parts Detection**: In manufacturing and automotive industries, it aids in detecting and analyzing car parts for quality control, assembly line monitoring, and inventory management.
+
+
diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md
new file mode 100644
index 00000000000..ea0d70de986
--- /dev/null
+++ b/docs/en/integrations/sony-imx500.md
@@ -0,0 +1,330 @@
+---
+comments: true
+description: Learn to export Ultralytics YOLOv8 models to Sony's IMX500 format to optimize your models for efficient deployment.
+keywords: Sony, IMX500, IMX 500, Atrios, MCT, model export, quantization, pruning, deep learning optimization, Raspberry Pi AI Camera, edge AI, PyTorch, IMX
+---
+
+# Sony IMX500 Export for Ultralytics YOLOv8
+
+This guide covers exporting and deploying Ultralytics YOLOv8 models to Raspberry Pi AI Cameras that feature the Sony IMX500 sensor.
+
+Deploying computer vision models on devices with limited computational power, such as [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/), can be tricky. Using a model format optimized for faster performance makes a huge difference.
+
+The IMX500 model format is designed to use minimal power while delivering fast performance for neural networks. It allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through exporting and deploying your models to the IMX500 format while making it easier for your models to perform well on the [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/).
+
+
+
+
+
+## Why Should You Export to IMX500
+
+Sony's [IMX500 Intelligent Vision Sensor](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera) is a game-changing piece of hardware in edge AI processing. It's the world's first intelligent vision sensor with on-chip AI capabilities. This sensor helps overcome many challenges in edge AI, including data processing bottlenecks, privacy concerns, and performance limitations.
+While other sensors merely pass along images and frames, the IMX500 tells a whole story. It processes data directly on the sensor, allowing devices to generate insights in real-time.
+
+## Sony's IMX500 Export for YOLOv8 Models
+
+The IMX500 is designed to transform how devices handle data directly on the sensor, without needing to send it off to the cloud for processing.
+
+The IMX500 works with quantized models. Quantization makes models smaller and faster without losing much [accuracy](https://www.ultralytics.com/glossary/accuracy). It is ideal for the limited resources of edge computing, allowing applications to respond quickly by reducing latency and allowing for quick data processing locally, without cloud dependency. Local processing also keeps user data private and secure since it's not sent to a remote server.
+
+**IMX500 Key Features:**
+
+- **Metadata Output:** Instead of transmitting images only, the IMX500 can output both image and metadata (inference result), and can output metadata only for minimizing data size, reducing bandwidth, and lowering costs.
+- **Addresses Privacy Concerns:** By processing data on the device, the IMX500 addresses privacy concerns, ideal for human-centric applications like person counting and occupancy tracking.
+- **Real-time Processing:** Fast, on-sensor processing supports real-time decisions, perfect for edge AI applications such as autonomous systems.
+
+**Before You Begin:** For best results, ensure your YOLOv8 model is well-prepared for export by following our [Model Training Guide](https://docs.ultralytics.com/modes/train/), [Data Preparation Guide](https://docs.ultralytics.com/datasets/), and [Hyperparameter Tuning Guide](https://docs.ultralytics.com/guides/hyperparameter-tuning/).
+
+## Usage Examples
+
+Export an Ultralytics YOLOv8 model to IMX500 format and run inference with the exported model.
+
+!!! note
+
+ IMX export is currently only supported for the YOLOv8n model. Here we perform inference just to make sure the model works as expected. However, for deployment and inference on the Raspberry Pi AI Camera, please jump to [Using IMX500 Export in Deployment](#using-imx500-export-in-deployment) section.
+
+!!! example
+
+ === "Python"
+
+ ```python
+ from ultralytics import YOLO
+
+ # Load a YOLOv8n PyTorch model
+ model = YOLO("yolov8n.pt")
+
+ # Export the model
+ model.export(format="imx") # exports with PTQ quantization by default
+
+ # Load the exported model
+ imx_model = YOLO("yolov8n_imx_model")
+
+ # Run inference
+ results = imx_model("https://ultralytics.com/images/bus.jpg")
+ ```
+
+ === "CLI"
+
+ ```bash
+ # Export a YOLOv8n PyTorch model to imx format with Post-Training Quantization (PTQ)
+ yolo export model=yolov8n.pt format=imx
+
+ # Run inference with the exported model
+ yolo predict model=yolov8n_imx_model source='https://ultralytics.com/images/bus.jpg'
+ ```
+
+The export process will create an ONNX model for quantization validation, along with a directory named `_imx_model`. This directory will include the `packerOut.zip` file, which is essential for packaging the model for deployment on the IMX500 hardware. Additionally, the `_imx_model` folder will contain a text file (`labels.txt`) listing all the labels associated with the model.
+
+```bash
+yolov8n_imx_model
+โโโ dnnParams.xml
+โโโ labels.txt
+โโโ packerOut.zip
+โโโ yolov8n_imx.onnx
+โโโ yolov8n_imx500_model_MemoryReport.json
+โโโ yolov8n_imx500_model.pbtxt
+```
+
+## Arguments
+
+When exporting a model to IMX500 format, you can specify various arguments:
+
+| Key | Value | Description |
+| -------- | ------------ | -------------------------------------------------------------- |
+| `format` | `imx` | Format to export to (imx) |
+| `int8` | `True` | Enable INT8 quantization for the model (default: `True`) |
+| `imgsz` | `640` | Image size for the model input (default: `640`) |
+| `data` | `coco8.yaml` | Path to the dataset configuration file (default: `coco8.yaml`) |
+
+!!! note
+
+ When using `data` argument for quantization, please check [Dataset Guide](https://docs.ultralytics.com/datasets/detect) to learn more about the dataset format.
+
+## Using IMX500 Export in Deployment
+
+After exporting Ultralytics YOLOv8n model to IMX500 format, it can be deployed to Raspberry Pi AI Camera for inference.
+
+### Hardware Prerequisites
+
+Make sure you have the below hardware:
+
+1. Raspberry Pi 5 or Raspberry Pi 4 Model B
+2. Raspberry Pi AI Camera
+
+Connect the Raspberry Pi AI camera to the 15-pin MIPI CSI connector on the Raspberry Pi and power on the Raspberry Pi
+
+### Software Prerequisites
+
+!!! note
+
+ This guide has been tested with Raspberry Pi OS Bookworm running on a Raspberry Pi 5
+
+Step 1: Open a terminal window and execute the following commands to update the Raspberry Pi software to the latest version.
+
+```bash
+sudo apt update && sudo apt full-upgrade
+```
+
+Step 2: Install IMX500 firmware which is required to operate the IMX500 sensor along with a packager tool.
+
+```bash
+sudo apt install imx500-all imx500-tools
+```
+
+Step 3: Install prerequisites to run `picamera2` application. We will use this application later for the deployment process.
+
+```bash
+sudo apt install python3-opencv python3-munkres
+```
+
+Step 4: Reboot Raspberry Pi for the changes to take into effect
+
+```bash
+sudo reboot
+```
+
+### Package Model and Deploy to AI Camera
+
+After obtaining `packerOut.zip` from the IMX500 conversion process, you can pass this file into the packager tool to obtain an RPK file. This file can then be deployed directly to the AI Camera using `picamera2`.
+
+Step 1: Package the model into RPK file
+
+```bash
+imx500-package -i -o
## Connect and Contribute
Your journey with YOLOv5 doesn't have to be a solitary one. Join our vibrant community on [GitHub](https://github.com/ultralytics/yolov5), connect with professionals on [LinkedIn](https://www.linkedin.com/company/ultralytics/), share your results on [Twitter](https://twitter.com/ultralytics), and find educational resources on [YouTube](https://www.youtube.com/ultralytics?sub_confirmation=1). Follow us on [TikTok](https://www.tiktok.com/@ultralytics) and [BiliBili](https://ultralytics.com/bilibili) for more engaging content.
-Interested in contributing? We welcome contributions of all forms; from code improvements and bug reports to documentation updates. Check out our [contributing guidelines](../help/contributing.md/) for more information.
+Interested in contributing? We welcome contributions of all forms; from code improvements and bug reports to documentation updates. Check out our [contributing guidelines](../help/contributing.md) for more information.
We're excited to see the innovative ways you'll use YOLOv5. Dive in, experiment, and revolutionize your computer vision projects! ๐
diff --git a/docs/en/yolov5/tutorials/clearml_logging_integration.md b/docs/en/yolov5/tutorials/clearml_logging_integration.md
index f8ee5c348e5..17984f0f078 100644
--- a/docs/en/yolov5/tutorials/clearml_logging_integration.md
+++ b/docs/en/yolov5/tutorials/clearml_logging_integration.md
@@ -10,7 +10,7 @@ keywords: ClearML, YOLOv5, machine learning, experiment tracking, data versionin
## About ClearML
-[ClearML](https://clear.ml/) is an [open-source](https://github.com/allegroai/clearml) toolbox designed to save you time โฑ๏ธ.
+[ClearML](https://clear.ml/) is an [open-source](https://github.com/clearml/clearml) toolbox designed to save you time โฑ๏ธ.
๐จ Track every YOLOv5 training run in the experiment manager
@@ -102,7 +102,7 @@ Versioning your data separately from your code is generally a good idea and make
### Prepare Your Dataset
-The YOLOv5 repository supports a number of different datasets by using YAML files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the YAML or with the scripts provided by yolov5, you get this folder structure:
+The YOLOv5 repository supports a number of different datasets by using YAML files containing their information. By default, datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the YAML or with the scripts provided by yolov5, you get this folder structure:
```
..
diff --git a/docs/en/yolov5/tutorials/comet_logging_integration.md b/docs/en/yolov5/tutorials/comet_logging_integration.md
index c5f20dda93d..88e3e5a5770 100644
--- a/docs/en/yolov5/tutorials/comet_logging_integration.md
+++ b/docs/en/yolov5/tutorials/comet_logging_integration.md
@@ -138,7 +138,7 @@ python train.py \
### Controlling the number of Prediction Images logged to Comet
-When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
+When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default, a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable.
```shell
env COMET_MAX_IMAGE_UPLOADS=200 python train.py \
diff --git a/docs/en/yolov5/tutorials/hyperparameter_evolution.md b/docs/en/yolov5/tutorials/hyperparameter_evolution.md
index 80c0d39b524..e9830e14df9 100644
--- a/docs/en/yolov5/tutorials/hyperparameter_evolution.md
+++ b/docs/en/yolov5/tutorials/hyperparameter_evolution.md
@@ -153,7 +153,7 @@ We recommend a minimum of 300 generations of evolution for best results. Note th
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/model_ensembling.md b/docs/en/yolov5/tutorials/model_ensembling.md
index 814c8969218..cc76cc0cdab 100644
--- a/docs/en/yolov5/tutorials/model_ensembling.md
+++ b/docs/en/yolov5/tutorials/model_ensembling.md
@@ -134,7 +134,7 @@ Done. (0.223s)
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/model_export.md b/docs/en/yolov5/tutorials/model_export.md
index e5f0c73007c..4869efb0177 100644
--- a/docs/en/yolov5/tutorials/model_export.md
+++ b/docs/en/yolov5/tutorials/model_export.md
@@ -26,20 +26,20 @@ YOLOv5 inference is officially supported in 11 formats:
๐ก ProTip: Export to ONNX or OpenVINO for up to 3x CPU speedup. See [CPU Benchmarks](https://github.com/ultralytics/yolov5/pull/6613). ๐ก ProTip: Export to TensorRT for up to 5x GPU speedup. See [GPU Benchmarks](https://github.com/ultralytics/yolov5/pull/6963).
-| Format | `export.py --include` | Model |
-| :------------------------------------------------------------------------- | :-------------------- | :------------------------ |
-| [PyTorch](https://pytorch.org/) | - | `yolov5s.pt` |
-| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov5s.torchscript` |
-| [ONNX](https://onnx.ai/) | `onnx` | `yolov5s.onnx` |
-| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov5s_openvino_model/` |
-| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov5s.engine` |
-| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov5s.mlmodel` |
-| [TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov5s_saved_model/` |
-| [TensorFlow GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov5s.pb` |
-| [TensorFlow Lite](https://ai.google.dev/edge/litert) | `tflite` | `yolov5s.tflite` |
-| [TensorFlow Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov5s_edgetpu.tflite` |
-| [TensorFlow.js](https://www.tensorflow.org/js) | `tfjs` | `yolov5s_web_model/` |
-| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov5s_paddle_model/` |
+| Format | `export.py --include` | Model |
+| :----------------------------------------------------------- | :-------------------- | :------------------------ |
+| [PyTorch](https://pytorch.org/) | - | `yolov5s.pt` |
+| [TorchScript](../../integrations/torchscript.md) | `torchscript` | `yolov5s.torchscript` |
+| [ONNX](../../integrations/onnx.md) | `onnx` | `yolov5s.onnx` |
+| [OpenVINO](../../integrations/openvino.md) | `openvino` | `yolov5s_openvino_model/` |
+| [TensorRT](../../integrations/tensorrt.md) | `engine` | `yolov5s.engine` |
+| [CoreML](../../integrations/coreml.md) | `coreml` | `yolov5s.mlmodel` |
+| [TensorFlow SavedModel](../../integrations/tf-savedmodel.md) | `saved_model` | `yolov5s_saved_model/` |
+| [TensorFlow GraphDef](../../integrations/tf-graphdef.md) | `pb` | `yolov5s.pb` |
+| [TensorFlow Lite](../../integrations/tflite.md) | `tflite` | `yolov5s.tflite` |
+| [TensorFlow Edge TPU](../../integrations/edge-tpu.md) | `edgetpu` | `yolov5s_edgetpu.tflite` |
+| [TensorFlow.js](../../integrations/tfjs.md) | `tfjs` | `yolov5s_web_model/` |
+| [PaddlePaddle](../../integrations/paddlepaddle.md) | `paddle` | `yolov5s_paddle_model/` |
## Benchmarks
@@ -234,7 +234,7 @@ YOLOv5 OpenVINO C++ inference examples:
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md
index 8bda8772e1c..0adfb32c8ab 100644
--- a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md
+++ b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md
@@ -97,7 +97,7 @@ In the results we can observe that we have achieved a **sparsity of 30%** in our
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/multi_gpu_training.md b/docs/en/yolov5/tutorials/multi_gpu_training.md
index 53f3a1c1fd3..d61fab83278 100644
--- a/docs/en/yolov5/tutorials/multi_gpu_training.md
+++ b/docs/en/yolov5/tutorials/multi_gpu_training.md
@@ -173,7 +173,7 @@ If you went through all the above, feel free to raise an Issue by giving as much
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md
index 27e26f144ff..0f464adf890 100644
--- a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md
+++ b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md
@@ -361,7 +361,7 @@ model = torch.hub.load("ultralytics/yolov5", "custom", path="yolov5s_paddle_mode
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md
index 55728f21e74..53f29d6f626 100644
--- a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md
+++ b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md
@@ -29,7 +29,7 @@ After uploading data to Roboflow, you can label your data and review previous la
## Versioning
-You can make versions of your dataset with different preprocessing and offline augmentation options. YOLOv5 does online augmentations natively, so be intentional when layering Roboflow's offline augmentations on top.
+You can make versions of your dataset with different preprocessing and offline augmentation options. YOLOv5 does online augmentations natively, so be intentional when layering Roboflow offline augmentations on top.

@@ -60,7 +60,7 @@ The real world is messy and your model will invariably encounter situations your
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
@@ -102,4 +102,4 @@ Active learning is a machine learning strategy that iteratively improves a model
### How can I use Ultralytics environments for training YOLOv5 models on different platforms?
-Ultralytics provides ready-to-use environments with pre-installed dependencies like CUDA, CUDNN, Python, and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making it easier to kickstart your training projects. These environments are available on various platforms such as Google Cloud, AWS, Azure, and Docker. You can also access free GPU notebooks via [Paperspace](https://bit.ly/yolov5-paperspace-notebook), [Google Colab](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb), and [Kaggle](https://www.kaggle.com/ultralytics/yolov5). For specific setup instructions, visit the [Supported Environments](#supported-environments) section of the documentation.
+Ultralytics provides ready-to-use environments with pre-installed dependencies like CUDA, CUDNN, Python, and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making it easier to kickstart your training projects. These environments are available on various platforms such as Google Cloud, AWS, Azure, and Docker. You can also access free GPU notebooks via [Paperspace](https://bit.ly/yolov5-paperspace-notebook), [Google Colab](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb), and [Kaggle](https://www.kaggle.com/models/ultralytics/yolov5). For specific setup instructions, visit the [Supported Environments](#supported-environments) section of the documentation.
diff --git a/docs/en/yolov5/tutorials/test_time_augmentation.md b/docs/en/yolov5/tutorials/test_time_augmentation.md
index 336ad3f79bc..30c53b72301 100644
--- a/docs/en/yolov5/tutorials/test_time_augmentation.md
+++ b/docs/en/yolov5/tutorials/test_time_augmentation.md
@@ -151,7 +151,7 @@ You can customize the TTA ops applied in the YOLOv5 `forward_augment()` method [
Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects.
-- **Free GPU Notebooks**:
+- **Free GPU Notebooks**:
- **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md)
- **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md)
- **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md)
diff --git a/docs/en/yolov5/tutorials/tips_for_best_training_results.md b/docs/en/yolov5/tutorials/tips_for_best_training_results.md
index 634297203ea..5e7f55eb0ea 100644
--- a/docs/en/yolov5/tutorials/tips_for_best_training_results.md
+++ b/docs/en/yolov5/tutorials/tips_for_best_training_results.md
@@ -18,7 +18,7 @@ We've put together a full guide for users looking to get the best results on the
- **Instances per class.** โฅ 10000 instances (labeled objects) per class recommended
- **Image variety.** Must be representative of deployed environment. For real-world use cases we recommend images from different times of day, different seasons, different weather, different lighting, different angles, different sources (scraped online, collected locally, different cameras) etc.
- **Label consistency.** All instances of all classes in all images must be labelled. Partial labelling will not work.
-- **Label [accuracy](https://www.ultralytics.com/glossary/accuracy).** Labels must closely enclose each object. No space should exist between an object and it's [bounding box](https://www.ultralytics.com/glossary/bounding-box). No objects should be missing a label.
+- **Label [accuracy](https://www.ultralytics.com/glossary/accuracy).** Labels must closely enclose each object. No space should exist between an object, and it's [bounding box](https://www.ultralytics.com/glossary/bounding-box). No objects should be missing a label.
- **Label verification.** View `train_batch*.jpg` on train start to verify your labels appear correct, i.e. see [example](./train_custom_data.md#local-logging) mosaic.
- **Background images.** Background images are images with no objects that are added to a dataset to reduce False Positives (FP). We recommend about 0-10% background images to help reduce FPs (COCO has 1000 background images for reference, 1% of the total). No labels are required for background images.
diff --git a/docs/en/yolov5/tutorials/train_custom_data.md b/docs/en/yolov5/tutorials/train_custom_data.md
index aa093e4b81c..c6f9d6f2692 100644
--- a/docs/en/yolov5/tutorials/train_custom_data.md
+++ b/docs/en/yolov5/tutorials/train_custom_data.md
@@ -18,7 +18,7 @@ pip install -r requirements.txt # install
## Train On Custom Data
-
+
diff --git a/docs/overrides/stylesheets/style.css b/docs/overrides/stylesheets/style.css
index a9a89d9013e..5c9f3c22df2 100644
--- a/docs/overrides/stylesheets/style.css
+++ b/docs/overrides/stylesheets/style.css
@@ -76,7 +76,6 @@ div.highlight {
.banner-wrapper {
justify-content: space-between;
gap: 16px;
-
padding: 16px;
}
@@ -121,7 +120,6 @@ div.highlight {
.banner-wrapper > .banner-button-wrapper,
.banner-wrapper > .banner-button-wrapper > .banner-button-wrapper {
padding: 2px;
-
background-color: rgba(222, 255, 56, 0.2);
}
@@ -131,13 +129,10 @@ div.highlight {
.banner-wrapper > .banner-button-wrapper > .banner-button-wrapper > button {
cursor: pointer;
-
min-width: 132px;
padding: 10px;
-
font-weight: 500;
color: #111f68;
-
background-color: rgb(222, 255, 56);
}
@@ -156,13 +151,11 @@ div.highlight {
.banner-wrapper {
gap: 32px;
-
padding: 12px;
}
.banner-wrapper > .banner-content-wrapper {
gap: 24px;
-
margin: 0 auto;
}
}
@@ -217,6 +210,13 @@ div.highlight {
height: 50px;
border-radius: 50%;
margin-right: 3px;
+ background-color: #f0f0f0; /* Placeholder color */
+ opacity: 0; /* Start fully transparent */
+ transition: opacity 0.3s ease-in-out;
+}
+
+.author-link .hover-item[src] {
+ opacity: 1; /* Fade in when src is set (image loaded) */
}
.hover-item:hover {
@@ -264,3 +264,16 @@ div.highlight {
}
}
/* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */
+
+/* Inkeep ----------------------------------------------------------------------------------------------------------- */
+.ikp-floating-button {
+ color: #111f68;
+}
+#inkeepSearchBar {
+ transition: all 0.2s ease-in-out;
+}
+#inkeepSearchBar:hover {
+ transform: scale(1.1);
+ filter: brightness(1.2);
+}
+/* Inkeep ----------------------------------------------------------------------------------------------------------- */
diff --git a/examples/README.md b/examples/README.md
index 931bdc634c7..ee06d337b62 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,6 +1,6 @@
-## Ultralytics YOLOv8 Example Applications
+## Ultralytics Examples
-This repository features a collection of real-world applications and walkthroughs, provided as either Python files or notebooks. Explore the examples below to see how YOLOv8 can be integrated into various applications.
+This directory features a collection of real-world applications and walkthroughs, provided as either Python files or notebooks. Explore the examples below to see how YOLO can be integrated into various applications.
### Ultralytics YOLO Example Applications
@@ -8,18 +8,21 @@ This repository features a collection of real-world applications and walkthrough
| ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------ | ----------------------------------------------------------------------------------------- |
| [YOLO ONNX Detection Inference with C++](./YOLOv8-CPP-Inference) | C++/ONNX | [Justas Bartnykas](https://github.com/JustasBart) |
| [YOLO OpenCV ONNX Detection Python](./YOLOv8-OpenCV-ONNX-Python) | OpenCV/Python/ONNX | [Farid Inawan](https://github.com/frdteknikelektro) |
-| [YOLOv8 .NET ONNX ImageSharp](https://github.com/dme-compunet/YOLOv8) | C#/ONNX/ImageSharp | [Compunet](https://github.com/dme-compunet) |
+| [YOLO C# ONNX-Runtime](https://github.com/dme-compunet/YoloSharp) | .NET/ONNX-Runtime | [Compunet](https://github.com/dme-compunet) |
| [YOLO .Net ONNX Detection C#](https://www.nuget.org/packages/Yolov8.Net) | C# .Net | [Samuel Stainback](https://github.com/sstainba) |
| [YOLOv8 on NVIDIA Jetson(TensorRT and DeepStream)](https://wiki.seeedstudio.com/YOLOv8-DeepStream-TRT-Jetson/) | Python | [Lakshantha](https://github.com/lakshanthad) |
| [YOLOv8 ONNXRuntime Python](./YOLOv8-ONNXRuntime) | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel) |
+| [RTDETR ONNXRuntime Python](./RTDETR-ONNXRuntime-Python) | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel) |
| [YOLOv8 ONNXRuntime CPP](./YOLOv8-ONNXRuntime-CPP) | C++/ONNXRuntime | [DennisJcy](https://github.com/DennisJcy), [Onuralp Sezer](https://github.com/onuralpszr) |
| [RTDETR ONNXRuntime C#](https://github.com/Kayzwer/yolo-cs/blob/master/RTDETR.cs) | C#/ONNX | [Kayzwer](https://github.com/Kayzwer) |
| [YOLOv8 SAHI Video Inference](https://github.com/RizwanMunawar/ultralytics/blob/main/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py) | Python | [Muhammad Rizwan Munawar](https://github.com/RizwanMunawar) |
| [YOLOv8 Region Counter](https://github.com/RizwanMunawar/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py) | Python | [Muhammad Rizwan Munawar](https://github.com/RizwanMunawar) |
| [YOLOv8 Segmentation ONNXRuntime Python](./YOLOv8-Segmentation-ONNXRuntime-Python) | Python/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) |
| [YOLOv8 LibTorch CPP](./YOLOv8-LibTorch-CPP-Inference) | C++/LibTorch | [Myyura](https://github.com/Myyura) |
-| [YOLOv8 OpenCV INT8 TFLite Python](./YOLOv8-OpenCV-int8-tflite-Python) | Python | [Wamiq Raza](https://github.com/wamiqraza) |
+| [YOLOv8 OpenCV INT8 TFLite Python](./YOLOv8-TFLite-Python) | Python | [Wamiq Raza](https://github.com/wamiqraza) |
| [YOLOv8 All Tasks ONNXRuntime Rust](./YOLOv8-ONNXRuntime-Rust) | Rust/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) |
+| [YOLOv8 OpenVINO CPP](./YOLOv8-OpenVINO-CPP-Inference) | C++/OpenVINO | [Erlangga Yudi Pradana](https://github.com/rlggyp) |
+| [YOLOv5-YOLO11 ONNXRuntime Rust](./YOLO-Series-ONNXRuntime-Rust) | Rust/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) |
### How to Contribute
diff --git a/examples/RTDETR-ONNXRuntime-Python/README.md b/examples/RTDETR-ONNXRuntime-Python/README.md
new file mode 100644
index 00000000000..1861da8295d
--- /dev/null
+++ b/examples/RTDETR-ONNXRuntime-Python/README.md
@@ -0,0 +1,43 @@
+# RTDETR - ONNX Runtime
+
+This project implements RTDETR using ONNX Runtime.
+
+## Installation
+
+To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process.
+
+### Installing Required Dependencies
+
+You can install the required dependencies by running the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+### Installing `onnxruntime-gpu`
+
+If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command:
+
+```bash
+pip install onnxruntime-gpu
+```
+
+Note: Make sure you have the appropriate GPU drivers installed on your system.
+
+### Installing `onnxruntime` (CPU version)
+
+If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command:
+
+```bash
+pip install onnxruntime
+```
+
+### Usage
+
+After successfully installing the required packages, you can run the RTDETR implementation using the following command:
+
+```bash
+python main.py --model rtdetr-l.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5
+```
+
+Make sure to replace rtdetr-l.onnx with the path to your RTDETR ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed.
diff --git a/examples/RTDETR-ONNXRuntime-Python/main.py b/examples/RTDETR-ONNXRuntime-Python/main.py
new file mode 100644
index 00000000000..d794a7d648b
--- /dev/null
+++ b/examples/RTDETR-ONNXRuntime-Python/main.py
@@ -0,0 +1,222 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+import argparse
+
+import cv2
+import numpy as np
+import onnxruntime as ort
+import torch
+
+from ultralytics.utils import ASSETS, yaml_load
+from ultralytics.utils.checks import check_requirements, check_yaml
+
+
+class RTDETR:
+ """RTDETR object detection model class for handling inference and visualization."""
+
+ def __init__(self, model_path, img_path, conf_thres=0.5, iou_thres=0.5):
+ """
+ Initializes the RTDETR object with the specified parameters.
+
+ Args:
+ model_path: Path to the ONNX model file.
+ img_path: Path to the input image.
+ conf_thres: Confidence threshold for object detection.
+ iou_thres: IoU threshold for non-maximum suppression
+ """
+ self.model_path = model_path
+ self.img_path = img_path
+ self.conf_thres = conf_thres
+ self.iou_thres = iou_thres
+
+ # Set up the ONNX runtime session with CUDA and CPU execution providers
+ self.session = ort.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+ self.model_input = self.session.get_inputs()
+ self.input_width = self.model_input[0].shape[2]
+ self.input_height = self.model_input[0].shape[3]
+
+ # Load class names from the COCO dataset YAML file
+ self.classes = yaml_load(check_yaml("coco8.yaml"))["names"]
+
+ # Generate a color palette for drawing bounding boxes
+ self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
+
+ def draw_detections(self, box, score, class_id):
+ """
+ Draws bounding boxes and labels on the input image based on the detected objects.
+
+ Args:
+ box: Detected bounding box.
+ score: Corresponding detection score.
+ class_id: Class ID for the detected object.
+
+ Returns:
+ None
+ """
+ # Extract the coordinates of the bounding box
+ x1, y1, x2, y2 = box
+
+ # Retrieve the color for the class ID
+ color = self.color_palette[class_id]
+
+ # Draw the bounding box on the image
+ cv2.rectangle(self.img, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
+
+ # Create the label text with class name and score
+ label = f"{self.classes[class_id]}: {score:.2f}"
+
+ # Calculate the dimensions of the label text
+ (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+ # Calculate the position of the label text
+ label_x = x1
+ label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
+
+ # Draw a filled rectangle as the background for the label text
+ cv2.rectangle(
+ self.img,
+ (int(label_x), int(label_y - label_height)),
+ (int(label_x + label_width), int(label_y + label_height)),
+ color,
+ cv2.FILLED,
+ )
+
+ # Draw the label text on the image
+ cv2.putText(
+ self.img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA
+ )
+
+ def preprocess(self):
+ """
+ Preprocesses the input image before performing inference.
+
+ Returns:
+ image_data: Preprocessed image data ready for inference.
+ """
+ # Read the input image using OpenCV
+ self.img = cv2.imread(self.img_path)
+
+ # Get the height and width of the input image
+ self.img_height, self.img_width = self.img.shape[:2]
+
+ # Convert the image color space from BGR to RGB
+ img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
+
+ # Resize the image to match the input shape
+ img = cv2.resize(img, (self.input_width, self.input_height))
+
+ # Normalize the image data by dividing it by 255.0
+ image_data = np.array(img) / 255.0
+
+ # Transpose the image to have the channel dimension as the first dimension
+ image_data = np.transpose(image_data, (2, 0, 1)) # Channel first
+
+ # Expand the dimensions of the image data to match the expected input shape
+ image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
+
+ # Return the preprocessed image data
+ return image_data
+
+ def bbox_cxcywh_to_xyxy(self, boxes):
+ """
+ Converts bounding boxes from (center x, center y, width, height) format to (x_min, y_min, x_max, y_max) format.
+
+ Args:
+ boxes (numpy.ndarray): An array of shape (N, 4) where each row represents
+ a bounding box in (cx, cy, w, h) format.
+
+ Returns:
+ numpy.ndarray: An array of shape (N, 4) where each row represents
+ a bounding box in (x_min, y_min, x_max, y_max) format.
+ """
+ # Calculate half width and half height of the bounding boxes
+ half_width = boxes[:, 2] / 2
+ half_height = boxes[:, 3] / 2
+
+ # Calculate the coordinates of the bounding boxes
+ x_min = boxes[:, 0] - half_width
+ y_min = boxes[:, 1] - half_height
+ x_max = boxes[:, 0] + half_width
+ y_max = boxes[:, 1] + half_height
+
+ # Return the bounding boxes in (x_min, y_min, x_max, y_max) format
+ return np.column_stack((x_min, y_min, x_max, y_max))
+
+ def postprocess(self, model_output):
+ """
+ Postprocesses the model output to extract detections and draw them on the input image.
+
+ Args:
+ model_output: Output of the model inference.
+
+ Returns:
+ np.array: Annotated image with detections.
+ """
+ # Squeeze the model output to remove unnecessary dimensions
+ outputs = np.squeeze(model_output[0])
+
+ # Extract bounding boxes and scores from the model output
+ boxes = outputs[:, :4]
+ scores = outputs[:, 4:]
+
+ # Get the class labels and scores for each detection
+ labels = np.argmax(scores, axis=1)
+ scores = np.max(scores, axis=1)
+
+ # Apply confidence threshold to filter out low-confidence detections
+ mask = scores > self.conf_thres
+ boxes, scores, labels = boxes[mask], scores[mask], labels[mask]
+
+ # Convert bounding boxes to (x_min, y_min, x_max, y_max) format
+ boxes = self.bbox_cxcywh_to_xyxy(boxes)
+
+ # Scale bounding boxes to match the original image dimensions
+ boxes[:, 0::2] *= self.img_width
+ boxes[:, 1::2] *= self.img_height
+
+ # Draw detections on the image
+ for box, score, label in zip(boxes, scores, labels):
+ self.draw_detections(box, score, label)
+
+ # Return the annotated image
+ return self.img
+
+ def main(self):
+ """
+ Executes the detection on the input image using the ONNX model.
+
+ Returns:
+ np.array: Output image with annotations.
+ """
+ # Preprocess the image for model input
+ image_data = self.preprocess()
+
+ # Run the model inference
+ model_output = self.session.run(None, {self.model_input[0].name: image_data})
+
+ # Process and return the model output
+ return self.postprocess(model_output)
+
+
+if __name__ == "__main__":
+ # Set up argument parser for command-line arguments
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str, default="rtdetr-l.onnx", help="Path to the ONNX model file.")
+ parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to the input image.")
+ parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold for object detection.")
+ parser.add_argument("--iou-thres", type=float, default=0.5, help="IoU threshold for non-maximum suppression.")
+ args = parser.parse_args()
+
+ # Check for dependencies and set up ONNX runtime
+ check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime")
+
+ # Create the detector instance with specified parameters
+ detection = RTDETR(args.model, args.img, args.conf_thres, args.iou_thres)
+
+ # Perform detection and get the output image
+ output_image = detection.main()
+
+ # Display the annotated output image
+ cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
+ cv2.imshow("Output", output_image)
+ cv2.waitKey(0)
diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml b/examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml
new file mode 100644
index 00000000000..048ece887df
--- /dev/null
+++ b/examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml
@@ -0,0 +1,14 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+[package]
+name = "YOLO-ONNXRuntime-Rust"
+version = "0.1.0"
+edition = "2021"
+authors = ["Jamjamjon "]
+
+[dependencies]
+anyhow = "1.0.92"
+clap = "4.5.20"
+tracing = "0.1.40"
+tracing-subscriber = "0.3.18"
+usls = { version = "0.0.19", features = ["auto"] }
diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/README.md b/examples/YOLO-Series-ONNXRuntime-Rust/README.md
new file mode 100644
index 00000000000..0b6fabe20d9
--- /dev/null
+++ b/examples/YOLO-Series-ONNXRuntime-Rust/README.md
@@ -0,0 +1,94 @@
+# YOLO-Series ONNXRuntime Rust Demo for Core YOLO Tasks
+
+This repository provides a Rust demo for key YOLO-Series tasks such as `Classification`, `Segmentation`, `Detection`, `Pose Detection`, and `OBB` using ONNXRuntime. It supports various YOLO models (v5 - 11) across multiple vision tasks.
+
+## Introduction
+
+- This example leverages the latest versions of both ONNXRuntime and YOLO models.
+- We utilize the [usls](https://github.com/jamjamjon/usls/tree/main) crate to streamline YOLO model inference, providing efficient data loading, visualization, and optimized inference performance.
+
+## Features
+
+- **Extensive Model Compatibility**: Supports `YOLOv5`, `YOLOv6`, `YOLOv7`, `YOLOv8`, `YOLOv9`, `YOLOv10`, `YOLO11`, `YOLO-world`, `RTDETR`, and others, covering a wide range of YOLO versions.
+- **Versatile Task Coverage**: Includes `Classification`, `Segmentation`, `Detection`, `Pose`, and `OBB`.
+- **Precision Flexibility**: Works with `FP16` and `FP32` ONNX models.
+- **Execution Providers**: Accelerated support for `CPU`, `CUDA`, `CoreML`, and `TensorRT`.
+- **Dynamic Input Shapes**: Dynamically adjusts to variable `batch`, `width`, and `height` dimensions for flexible model input.
+- **Flexible Data Loading**: The `DataLoader` handles images, folders, videos, and video streams.
+- **Real-Time Display and Video Export**: `Viewer` provides real-time frame visualization and video export functions, similar to OpenCVโs `imshow()` and `imwrite()`.
+- **Enhanced Annotation and Visualization**: The `Annotator` facilitates comprehensive result rendering, with support for bounding boxes (HBB), oriented bounding boxes (OBB), polygons, masks, keypoints, and text labels.
+
+## Setup Instructions
+
+### 1. ONNXRuntime Linking
+
+
+You have two options to link the ONNXRuntime library:
+
+- **Option 1: Manual Linking**
+
+ - For detailed setup, consult the [ONNX Runtime linking documentation](https://ort.pyke.io/setup/linking).
+ - **Linux or macOS**:
+ 1. Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
+ 2. Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
+ ```shell
+ export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0
+ ```
+
+- **Option 2: Automatic Download**
+ - Use the `--features auto` flag to handle downloading automatically:
+ ```shell
+ cargo run -r --example yolo --features auto
+ ```
+
+
+
+### 2. \[Optional\] Install CUDA, CuDNN, and TensorRT
+
+- The CUDA execution provider requires CUDA version `12.x`.
+- The TensorRT execution provider requires both CUDA `12.x` and TensorRT `10.x`.
+
+### 3. \[Optional\] Install ffmpeg
+
+To view video frames and save video inferences, install `rust-ffmpeg`. For instructions, see:
+[https://github.com/zmwangx/rust-ffmpeg/wiki/Notes-on-building#dependencies](https://github.com/zmwangx/rust-ffmpeg/wiki/Notes-on-building#dependencies)
+
+## Get Started
+
+```Shell
+# customized
+cargo run -r -- --task detect --ver v8 --nc 6 --model xxx.onnx # YOLOv8
+
+# Classify
+cargo run -r -- --task classify --ver v5 --scale s --width 224 --height 224 --nc 1000 # YOLOv5
+cargo run -r -- --task classify --ver v8 --scale n --width 224 --height 224 --nc 1000 # YOLOv8
+cargo run -r -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLO11
+
+# Detect
+cargo run -r -- --task detect --ver v5 --scale n # YOLOv5
+cargo run -r -- --task detect --ver v6 --scale n # YOLOv6
+cargo run -r -- --task detect --ver v7 --scale t # YOLOv7
+cargo run -r -- --task detect --ver v8 --scale n # YOLOv8
+cargo run -r -- --task detect --ver v9 --scale t # YOLOv9
+cargo run -r -- --task detect --ver v10 --scale n # YOLOv10
+cargo run -r -- --task detect --ver v11 --scale n # YOLO11
+cargo run -r -- --task detect --ver rtdetr --scale l # RTDETR
+
+# Pose
+cargo run -r -- --task pose --ver v8 --scale n # YOLOv8-Pose
+cargo run -r -- --task pose --ver v11 --scale n # YOLO11-Pose
+
+# Segment
+cargo run -r -- --task segment --ver v5 --scale n # YOLOv5-Segment
+cargo run -r -- --task segment --ver v8 --scale n # YOLOv8-Segment
+cargo run -r -- --task segment --ver v11 --scale n # YOLOv8-Segment
+cargo run -r -- --task segment --ver v8 --model yolo/FastSAM-s-dyn-f16.onnx # FastSAM
+
+# OBB
+cargo run -r -- --ver v8 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv8-Obb
+cargo run -r -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLO11-Obb
+```
+
+**`cargo run -- --help` for more options**
+
+For more details, please refer to [usls-yolo](https://github.com/jamjamjon/usls/tree/main/examples/yolo).
diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs b/examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs
new file mode 100644
index 00000000000..3c71a253108
--- /dev/null
+++ b/examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs
@@ -0,0 +1,236 @@
+use anyhow::Result;
+use clap::Parser;
+
+use usls::{
+ models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOScale, YOLOTask,
+ YOLOVersion, COCO_SKELETONS_16,
+};
+
+#[derive(Parser, Clone)]
+#[command(author, version, about, long_about = None)]
+pub struct Args {
+ /// Path to the ONNX model
+ #[arg(long)]
+ pub model: Option,
+
+ /// Input source path
+ #[arg(long, default_value_t = String::from("../../ultralytics/assets/bus.jpg"))]
+ pub source: String,
+
+ /// YOLO Task
+ #[arg(long, value_enum, default_value_t = YOLOTask::Detect)]
+ pub task: YOLOTask,
+
+ /// YOLO Version
+ #[arg(long, value_enum, default_value_t = YOLOVersion::V8)]
+ pub ver: YOLOVersion,
+
+ /// YOLO Scale
+ #[arg(long, value_enum, default_value_t = YOLOScale::N)]
+ pub scale: YOLOScale,
+
+ /// Batch size
+ #[arg(long, default_value_t = 1)]
+ pub batch_size: usize,
+
+ /// Minimum input width
+ #[arg(long, default_value_t = 224)]
+ pub width_min: isize,
+
+ /// Input width
+ #[arg(long, default_value_t = 640)]
+ pub width: isize,
+
+ /// Maximum input width
+ #[arg(long, default_value_t = 1024)]
+ pub width_max: isize,
+
+ /// Minimum input height
+ #[arg(long, default_value_t = 224)]
+ pub height_min: isize,
+
+ /// Input height
+ #[arg(long, default_value_t = 640)]
+ pub height: isize,
+
+ /// Maximum input height
+ #[arg(long, default_value_t = 1024)]
+ pub height_max: isize,
+
+ /// Number of classes
+ #[arg(long, default_value_t = 80)]
+ pub nc: usize,
+
+ /// Class confidence
+ #[arg(long)]
+ pub confs: Vec,
+
+ /// Enable TensorRT support
+ #[arg(long)]
+ pub trt: bool,
+
+ /// Enable CUDA support
+ #[arg(long)]
+ pub cuda: bool,
+
+ /// Enable CoreML support
+ #[arg(long)]
+ pub coreml: bool,
+
+ /// Use TensorRT half precision
+ #[arg(long)]
+ pub half: bool,
+
+ /// Device ID to use
+ #[arg(long, default_value_t = 0)]
+ pub device_id: usize,
+
+ /// Enable performance profiling
+ #[arg(long)]
+ pub profile: bool,
+
+ /// Disable contour drawing, for saving time
+ #[arg(long)]
+ pub no_contours: bool,
+
+ /// Show result
+ #[arg(long)]
+ pub view: bool,
+
+ /// Do not save output
+ #[arg(long)]
+ pub nosave: bool,
+}
+
+fn main() -> Result<()> {
+ let args = Args::parse();
+
+ // logger
+ if args.profile {
+ tracing_subscriber::fmt()
+ .with_max_level(tracing::Level::INFO)
+ .init();
+ }
+
+ // model path
+ let path = match &args.model {
+ None => format!(
+ "yolo/{}-{}-{}.onnx",
+ args.ver.name(),
+ args.scale.name(),
+ args.task.name()
+ ),
+ Some(x) => x.to_string(),
+ };
+
+ // saveout
+ let saveout = match &args.model {
+ None => format!(
+ "{}-{}-{}",
+ args.ver.name(),
+ args.scale.name(),
+ args.task.name()
+ ),
+ Some(x) => {
+ let p = std::path::PathBuf::from(&x);
+ p.file_stem().unwrap().to_str().unwrap().to_string()
+ }
+ };
+
+ // device
+ let device = if args.cuda {
+ Device::Cuda(args.device_id)
+ } else if args.trt {
+ Device::Trt(args.device_id)
+ } else if args.coreml {
+ Device::CoreML(args.device_id)
+ } else {
+ Device::Cpu(args.device_id)
+ };
+
+ // build options
+ let options = Options::new()
+ .with_model(&path)?
+ .with_yolo_version(args.ver)
+ .with_yolo_task(args.task)
+ .with_device(device)
+ .with_trt_fp16(args.half)
+ .with_ixx(0, 0, (1, args.batch_size as _, 4).into())
+ .with_ixx(0, 2, (args.height_min, args.height, args.height_max).into())
+ .with_ixx(0, 3, (args.width_min, args.width, args.width_max).into())
+ .with_confs(if args.confs.is_empty() {
+ &[0.2, 0.15]
+ } else {
+ &args.confs
+ })
+ .with_nc(args.nc)
+ .with_find_contours(!args.no_contours) // find contours or not
+ // .with_names(&COCO_CLASS_NAMES_80) // detection class names
+ // .with_names2(&COCO_KEYPOINTS_17) // keypoints class names
+ // .exclude_classes(&[0])
+ // .retain_classes(&[0, 5])
+ .with_profile(args.profile);
+
+ // build model
+ let mut model = YOLO::new(options)?;
+
+ // build dataloader
+ let dl = DataLoader::new(&args.source)?
+ .with_batch(model.batch() as _)
+ .build()?;
+
+ // build annotator
+ let annotator = Annotator::default()
+ .with_skeletons(&COCO_SKELETONS_16)
+ .without_masks(true) // no masks plotting when doing segment task
+ .with_bboxes_thickness(3)
+ .with_keypoints_name(false) // enable keypoints names
+ .with_saveout_subs(&["YOLO"])
+ .with_saveout(&saveout);
+
+ // build viewer
+ let mut viewer = if args.view {
+ Some(Viewer::new().with_delay(5).with_scale(1.).resizable(true))
+ } else {
+ None
+ };
+
+ // run & annotate
+ for (xs, _paths) in dl {
+ let ys = model.forward(&xs, args.profile)?;
+ let images_plotted = annotator.plot(&xs, &ys, !args.nosave)?;
+
+ // show image
+ match &mut viewer {
+ Some(viewer) => viewer.imshow(&images_plotted)?,
+ None => continue,
+ }
+
+ // check out window and key event
+ match &mut viewer {
+ Some(viewer) => {
+ if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) {
+ break;
+ }
+ }
+ None => continue,
+ }
+
+ // write video
+ if !args.nosave {
+ match &mut viewer {
+ Some(viewer) => viewer.write_batch(&images_plotted)?,
+ None => continue,
+ }
+ }
+ }
+
+ // finish video write
+ if !args.nosave {
+ if let Some(viewer) = &mut viewer {
+ viewer.finish_write()?;
+ }
+ }
+
+ Ok(())
+}
diff --git a/examples/YOLOv8-Action-Recognition/action_recognition.py b/examples/YOLOv8-Action-Recognition/action_recognition.py
index aad74375a57..38b6a252693 100644
--- a/examples/YOLOv8-Action-Recognition/action_recognition.py
+++ b/examples/YOLOv8-Action-Recognition/action_recognition.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import argparse
import time
@@ -263,7 +263,7 @@ def crop_and_pad(frame, box, margin_percent):
def run(
- weights: str = "yolov8n.pt",
+ weights: str = "yolo11n.pt",
device: str = "",
source: str = "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
output_path: Optional[str] = None,
@@ -279,7 +279,7 @@ def run(
Run action recognition on a video source using YOLO for object detection and a video classifier.
Args:
- weights (str): Path to the YOLO model weights. Defaults to "yolov8n.pt".
+ weights (str): Path to the YOLO model weights. Defaults to "yolo11n.pt".
device (str): Device to run the model on. Use 'cuda' for NVIDIA GPU, 'mps' for Apple Silicon, or 'cpu'. Defaults to auto-detection.
source (str): Path to mp4 video file or YouTube URL. Defaults to a sample YouTube video.
output_path (Optional[str], optional): Path to save the output video. Defaults to None.
@@ -421,7 +421,7 @@ def run(
def parse_opt():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
- parser.add_argument("--weights", type=str, default="yolov8n.pt", help="ultralytics detector model path")
+ parser.add_argument("--weights", type=str, default="yolo11n.pt", help="ultralytics detector model path")
parser.add_argument("--device", default="", help='cuda device, i.e. 0 or 0,1,2,3 or cpu/mps, "" for auto-detection')
parser.add_argument(
"--source",
diff --git a/examples/YOLOv8-CPP-Inference/README.md b/examples/YOLOv8-CPP-Inference/README.md
index 5bb2586dd63..243d448e366 100644
--- a/examples/YOLOv8-CPP-Inference/README.md
+++ b/examples/YOLOv8-CPP-Inference/README.md
@@ -1,6 +1,6 @@
# YOLOv8/YOLOv5 Inference C++
-This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV's DNN API.
+This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV DNN API.
## Usage
@@ -27,13 +27,13 @@ make
To export YOLOv8 models:
-```commandline
+```bash
yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
```
To export YOLOv5 models:
-```commandline
+```bash
python3 export.py --weights yolov5s.pt --img 480 640 --include onnx --opset 12
```
@@ -45,6 +45,6 @@ yolov5s.onnx:

-This repository utilizes OpenCV's DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case.
+This repository utilizes OpenCV DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case.
The **main** branch version uses Qt as a GUI wrapper. The primary focus here is the **Inference** class file, which demonstrates how to transpose YOLOv8 models to work as YOLOv5 models.
diff --git a/examples/YOLOv8-LibTorch-CPP-Inference/README.md b/examples/YOLOv8-LibTorch-CPP-Inference/README.md
index 930c3cd2225..1380071ee4a 100644
--- a/examples/YOLOv8-LibTorch-CPP-Inference/README.md
+++ b/examples/YOLOv8-LibTorch-CPP-Inference/README.md
@@ -30,6 +30,6 @@ make
To export YOLOv8 models:
-```commandline
+```bash
yolo export model=yolov8s.pt imgsz=640 format=torchscript
```
diff --git a/examples/YOLOv8-LibTorch-CPP-Inference/main.cc b/examples/YOLOv8-LibTorch-CPP-Inference/main.cc
index b68b7f7e4bf..0937b56828e 100644
--- a/examples/YOLOv8-LibTorch-CPP-Inference/main.cc
+++ b/examples/YOLOv8-LibTorch-CPP-Inference/main.cc
@@ -226,6 +226,7 @@ int main() {
cv::Mat image = cv::imread("/path/to/bus.jpg");
cv::Mat input_image;
letterbox(image, input_image, {640, 640});
+ cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB);
torch::Tensor image_tensor = torch::from_blob(input_image.data, {input_image.rows, input_image.cols, 3}, torch::kByte).to(device);
image_tensor = image_tensor.toType(torch::kFloat32).div(255);
diff --git a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp
index a65391f5d7d..168df490c27 100644
--- a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp
+++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp
@@ -107,11 +107,11 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
iouThreshold = iParams.iouThreshold;
imgSize = iParams.imgSize;
modelType = iParams.modelType;
+ cudaEnable = iParams.cudaEnable;
env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo");
Ort::SessionOptions sessionOption;
if (iParams.cudaEnable)
{
- cudaEnable = iParams.cudaEnable;
OrtCUDAProviderOptions cudaOption;
cudaOption.device_id = 0;
sessionOption.AppendExecutionProvider_CUDA(cudaOption);
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml b/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml
index 8ac747e7e34..8eb421a86a1 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml
+++ b/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
[package]
name = "yolov8-rs"
@@ -9,11 +9,11 @@ edition = "2021"
[dependencies]
clap = { version = "4.2.4", features = ["derive"] }
-image = { version = "0.24.7", default-features = false, features = ["jpeg", "png", "webp-encoder"] }
-imageproc = { version = "0.23.0", default-features = false }
-ndarray = { version = "0.15.6" }
-ort = { version = "1.16.3", default-features = false, features = ["load-dynamic", "copy-dylibs", "half"] }
-rusttype = { version = "0.9", default-features = false }
+image = { version = "0.25.2"}
+imageproc = { version = "0.25.0"}
+ndarray = { version = "0.16" }
+ort = { version = "2.0.0-rc.5", features = ["cuda", "tensorrt", "load-dynamic", "copy-dylibs", "half"]}
+rusttype = { version = "0.9.3" }
anyhow = { version = "1.0.75" }
regex = { version = "1.5.4" }
rand = { version = "0.8.5" }
@@ -21,3 +21,4 @@ chrono = { version = "0.4.30" }
half = { version = "2.3.1" }
dirs = { version = "5.0.1" }
ureq = { version = "2.9.1" }
+ab_glyph = "0.2.29"
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/README.md b/examples/YOLOv8-ONNXRuntime-Rust/README.md
index 48a3017ce81..ec09edbf655 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/README.md
+++ b/examples/YOLOv8-ONNXRuntime-Rust/README.md
@@ -5,9 +5,9 @@ This repository provides a Rust demo for performing YOLOv8 tasks like `Classific
## Recently Updated
- Add YOLOv8-OBB demo
-- Update ONNXRuntime to 1.17.x
+- Update ONNXRuntime to 1.19.x
-Newly updated YOLOv8 example code is located in this repository (https://github.com/jamjamjon/usls/tree/main/examples/yolo)
+Newly updated YOLOv8 example code is located in [this repository](https://github.com/jamjamjon/usls/tree/main/examples/yolo)
## Features
@@ -22,25 +22,16 @@ Newly updated YOLOv8 example code is located in this repository (https://github.
Please follow the Rust official installation. (https://www.rust-lang.org/tools/install)
-### 2. Install ONNXRuntime
+### 2. ONNXRuntime Linking
-This repository use `ort` crate, which is ONNXRuntime wrapper for Rust. (https://docs.rs/ort/latest/ort/)
+- #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking).
-You can follow the instruction with `ort` doc or simply do this:
-
-- step1: Download ONNXRuntime(https://github.com/microsoft/onnxruntime/releases)
-- setp2: Set environment variable `PATH` for linking.
-
-On ubuntu, You can do like this:
-
-```bash
-vim ~/.bashrc
-
-# Add the path of ONNXRUntime lib
-export LD_LIBRARY_PATH=/home/qweasd/Documents/onnxruntime-linux-x64-gpu-1.16.3/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
-
-source ~/.bashrc
-```
+- #### For Linux or macOS Users:
+ - Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
+ - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
+ ```shell
+ export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0
+ ```
### 3. \[Optional\] Install CUDA & CuDNN & TensorRT
@@ -96,13 +87,13 @@ cargo run --release -- --cuda --device_id 0 --model --source
Set `--batch` to do multi-batch-size inference.
-If you're using `--trt`, you can also set `--batch-min` and `--batch-max` to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should exported with dynamic shapes)
+If you're using `--trt`, you can also set `--batch-min` and `--batch-max` to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should be exported with dynamic shapes.)
```bash
cargo run --release -- --cuda --batch 2 --model --source
```
-Set `--height` and `--width` to do dynamic image size inference. (Note that the ONNX model should exported with dynamic shapes)
+Set `--height` and `--width` to do dynamic image size inference. (Note that the ONNX model should be exported with dynamic shapes.)
```bash
cargo run --release -- --cuda --width 480 --height 640 --model --source
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs
index 2ba0dd49ec1..b5bc05a585a 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs
+++ b/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs
@@ -15,7 +15,7 @@ pub struct Args {
/// device id
#[arg(long, default_value_t = 0)]
- pub device_id: u32,
+ pub device_id: i32,
/// using TensorRT EP
#[arg(long)]
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs
index 1af7f7c5e12..0084535ee57 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs
+++ b/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs
@@ -117,3 +117,44 @@ pub fn check_font(font: &str) -> rusttype::Font<'static> {
let buffer = std::fs::read(font_path).unwrap();
rusttype::Font::try_from_vec(buffer).unwrap()
}
+
+use ab_glyph::FontArc;
+pub fn load_font() -> FontArc {
+ use std::path::Path;
+ let font_path = Path::new("./font/Arial.ttf");
+ match font_path.try_exists() {
+ Ok(true) => {
+ let buffer = std::fs::read(font_path).unwrap();
+ FontArc::try_from_vec(buffer).unwrap()
+ }
+ Ok(false) => {
+ std::fs::create_dir_all("./font").unwrap();
+ println!("Downloading font...");
+ let source_url = "https://ultralytics.com/assets/Arial.ttf";
+ let resp = ureq::get(source_url)
+ .timeout(std::time::Duration::from_secs(500))
+ .call()
+ .unwrap_or_else(|err| panic!("> Failed to download font: {source_url}: {err:?}"));
+
+ // read to buffer
+ let mut buffer = vec![];
+ let total_size = resp
+ .header("Content-Length")
+ .and_then(|s| s.parse::().ok())
+ .unwrap();
+ let _reader = resp
+ .into_reader()
+ .take(total_size)
+ .read_to_end(&mut buffer)
+ .unwrap();
+ // save
+ let mut fd = std::fs::File::create(font_path).unwrap();
+ fd.write_all(&buffer).unwrap();
+ println!("Font saved at: {:?}", font_path.display());
+ FontArc::try_from_vec(buffer).unwrap()
+ }
+ Err(e) => {
+ panic!("Failed to load font {}", e);
+ }
+ }
+}
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs
index 8dd1567990c..fd3845ced08 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs
+++ b/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs
@@ -6,7 +6,7 @@ fn main() -> Result<(), Box> {
let args = Args::parse();
// 1. load image
- let x = image::io::Reader::open(&args.source)?
+ let x = image::ImageReader::open(&args.source)?
.with_guessed_format()?
.decode()?;
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs
index 1c0e5e494d8..95b2bdfffaa 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs
+++ b/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs
@@ -1,5 +1,6 @@
#![allow(clippy::type_complexity)]
+use ab_glyph::FontArc;
use anyhow::Result;
use image::{DynamicImage, GenericImageView, ImageBuffer};
use ndarray::{s, Array, Axis, IxDyn};
@@ -7,7 +8,7 @@ use rand::{thread_rng, Rng};
use std::path::PathBuf;
use crate::{
- check_font, gen_time_string, non_max_suppression, Args, Batch, Bbox, Embedding, OrtBackend,
+ gen_time_string, load_font, non_max_suppression, Args, Batch, Bbox, Embedding, OrtBackend,
OrtConfig, OrtEP, Point2, YOLOResult, YOLOTask, SKELETON,
};
@@ -36,9 +37,9 @@ impl YOLOv8 {
let ep = if config.trt {
OrtEP::Trt(config.device_id)
} else if config.cuda {
- OrtEP::Cuda(config.device_id)
+ OrtEP::CUDA(config.device_id)
} else {
- OrtEP::Cpu
+ OrtEP::CPU
};
// batch
@@ -330,12 +331,19 @@ impl YOLOv8 {
// coefs * proto -> mask
let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm)
- let proto = proto.to_owned().into_shape((nm, nh * nw))?; // (nm, nh*nw)
- let mask = coefs.dot(&proto).into_shape((nh, nw, 1))?; // (nh, nw, n)
+
+ let proto = proto.to_owned();
+ let proto = proto.to_shape((nm, nh * nw))?; // (nm, nh*nw)
+ let mask = coefs.dot(&proto); // (nh, nw, n)
+ let mask = mask.to_shape((nh, nw, 1))?;
// build image from ndarray
let mask_im: ImageBuffer, Vec> =
- match ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec()) {
+ match ImageBuffer::from_raw(
+ nw as u32,
+ nh as u32,
+ mask.to_owned().into_raw_vec_and_offset().0,
+ ) {
Some(image) => image,
None => panic!("can not create image from ndarray"),
};
@@ -410,7 +418,7 @@ impl YOLOv8 {
skeletons: Option<&[(usize, usize)]>,
) {
// check font then load
- let font = check_font("Arial.ttf");
+ let font: FontArc = load_font();
for (_idb, (img0, y)) in xs0.iter().zip(ys.iter()).enumerate() {
let mut img = img0.to_rgb8();
@@ -422,12 +430,13 @@ impl YOLOv8 {
let legend_size = img.width().max(img.height()) / scale;
let x = img.width() / 20;
let y = img.height() / 20 + i as u32 * legend_size;
+
imageproc::drawing::draw_text_mut(
&mut img,
image::Rgb([0, 255, 0]),
x as i32,
y as i32,
- rusttype::Scale::uniform(legend_size as f32 - 1.),
+ legend_size as f32,
&font,
&legend,
);
@@ -454,7 +463,7 @@ impl YOLOv8 {
image::Rgb(self.color_palette[bbox.id()].into()),
bbox.xmin() as i32,
(bbox.ymin() - legend_size as f32) as i32,
- rusttype::Scale::uniform(legend_size as f32 - 1.),
+ legend_size as f32,
&font,
&legend,
);
@@ -551,7 +560,7 @@ impl YOLOv8 {
None => String::from(""),
},
self.engine.ep(),
- if let OrtEP::Cpu = self.engine.ep() {
+ if let OrtEP::CPU = self.engine.ep() {
""
} else {
"(May still fall back to CPU)"
diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs
index 857baaebae0..d88208dead3 100644
--- a/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs
+++ b/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs
@@ -2,11 +2,13 @@ use anyhow::Result;
use clap::ValueEnum;
use half::f16;
use ndarray::{Array, CowArray, IxDyn};
-use ort::execution_providers::{CUDAExecutionProviderOptions, TensorRTExecutionProviderOptions};
-use ort::tensor::TensorElementDataType;
-use ort::{Environment, ExecutionProvider, Session, SessionBuilder, Value};
+use ort::{
+ CPUExecutionProvider, CUDAExecutionProvider, ExecutionProvider, ExecutionProviderDispatch,
+ TensorRTExecutionProvider,
+};
+use ort::{Session, SessionBuilder};
+use ort::{TensorElementType, ValueType};
use regex::Regex;
-
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum YOLOTask {
// YOLO tasks
@@ -19,9 +21,9 @@ pub enum YOLOTask {
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum OrtEP {
// ONNXRuntime execution provider
- Cpu,
- Cuda(u32),
- Trt(u32),
+ CPU,
+ CUDA(i32),
+ Trt(i32),
}
#[derive(Debug)]
@@ -44,8 +46,9 @@ impl Default for Batch {
#[derive(Debug, Default)]
pub struct OrtInputs {
// ONNX model inputs attrs
- pub shapes: Vec>,
- pub dtypes: Vec,
+ pub shapes: Vec>,
+ //pub dtypes: Vec,
+ pub dtypes: Vec,
pub names: Vec,
pub sizes: Vec>,
}
@@ -56,12 +59,19 @@ impl OrtInputs {
let mut dtypes = Vec::new();
let mut names = Vec::new();
for i in session.inputs.iter() {
- let shape: Vec = i
+ /* let shape: Vec = i
.dimensions()
.map(|x| if let Some(x) = x { x as i32 } else { -1i32 })
.collect();
- shapes.push(shape);
- dtypes.push(i.input_type);
+ shapes.push(shape); */
+ if let ort::ValueType::Tensor { ty, dimensions } = &i.input_type {
+ dtypes.push(ty.clone());
+ let shape = dimensions.clone();
+ shapes.push(shape);
+ } else {
+ panic!("ไธๆฏๆ็ๆฐๆฎๆ ผๅผ, {} - {}", file!(), line!());
+ }
+ //dtypes.push(i.input_type);
names.push(i.name.clone());
}
Self {
@@ -97,12 +107,14 @@ pub struct OrtBackend {
impl OrtBackend {
pub fn build(args: OrtConfig) -> Result {
// build env & session
- let env = Environment::builder()
- .with_name("YOLOv8")
- .with_log_level(ort::LoggingLevel::Verbose)
- .build()?
- .into_arc();
- let session = SessionBuilder::new(&env)?.with_model_from_file(&args.f)?;
+ // in version 2.x environment is removed
+ /* let env = ort::EnvironmentBuilder
+ ::with_name("YOLOv8")
+ .build()?
+ .into_arc(); */
+ let sessionbuilder = SessionBuilder::new()?;
+ let session = sessionbuilder.commit_from_file(&args.f)?;
+ //let session = SessionBuilder::new(&env)?.with_model_from_file(&args.f)?;
// get inputs
let mut inputs = OrtInputs::new(&session);
@@ -142,16 +154,19 @@ impl OrtBackend {
// build provider
let (ep, provider) = match args.ep {
- OrtEP::Cuda(device_id) => Self::set_ep_cuda(device_id),
+ OrtEP::CUDA(device_id) => Self::set_ep_cuda(device_id),
OrtEP::Trt(device_id) => Self::set_ep_trt(device_id, args.trt_fp16, &batch, &inputs),
- _ => (OrtEP::Cpu, ExecutionProvider::CPU(Default::default())),
+ _ => (
+ OrtEP::CPU,
+ ExecutionProviderDispatch::from(CPUExecutionProvider::default()),
+ ),
};
// build session again with the new provider
- let session = SessionBuilder::new(&env)?
+ let session = SessionBuilder::new()?
// .with_optimization_level(ort::GraphOptimizationLevel::Level3)?
.with_execution_providers([provider])?
- .with_model_from_file(args.f)?;
+ .commit_from_file(args.f)?;
// task: using given one or guessing
let task = match args.task {
@@ -185,57 +200,58 @@ impl OrtBackend {
pub fn fetch_inputs_from_session(
session: &Session,
- ) -> (Vec>, Vec, Vec) {
+ ) -> (Vec>, Vec, Vec) {
// get inputs attrs from ONNX model
let mut shapes = Vec::new();
let mut dtypes = Vec::new();
let mut names = Vec::new();
for i in session.inputs.iter() {
- let shape: Vec = i
- .dimensions()
- .map(|x| if let Some(x) = x { x as i32 } else { -1i32 })
- .collect();
- shapes.push(shape);
- dtypes.push(i.input_type);
+ if let ort::ValueType::Tensor { ty, dimensions } = &i.input_type {
+ dtypes.push(ty.clone());
+ let shape = dimensions.clone();
+ shapes.push(shape);
+ } else {
+ panic!("ไธๆฏๆ็ๆฐๆฎๆ ผๅผ, {} - {}", file!(), line!());
+ }
names.push(i.name.clone());
}
(shapes, dtypes, names)
}
- pub fn set_ep_cuda(device_id: u32) -> (OrtEP, ExecutionProvider) {
- // set CUDA
- if ExecutionProvider::CUDA(Default::default()).is_available() {
+ pub fn set_ep_cuda(device_id: i32) -> (OrtEP, ExecutionProviderDispatch) {
+ let cuda_provider = CUDAExecutionProvider::default().with_device_id(device_id);
+ if let Ok(true) = cuda_provider.is_available() {
(
- OrtEP::Cuda(device_id),
- ExecutionProvider::CUDA(CUDAExecutionProviderOptions {
- device_id,
- ..Default::default()
- }),
+ OrtEP::CUDA(device_id),
+ ExecutionProviderDispatch::from(cuda_provider), //PlantForm::CUDA(cuda_provider)
)
} else {
println!("> CUDA is not available! Using CPU.");
- (OrtEP::Cpu, ExecutionProvider::CPU(Default::default()))
+ (
+ OrtEP::CPU,
+ ExecutionProviderDispatch::from(CPUExecutionProvider::default()), //PlantForm::CPU(CPUExecutionProvider::default())
+ )
}
}
pub fn set_ep_trt(
- device_id: u32,
+ device_id: i32,
fp16: bool,
batch: &Batch,
inputs: &OrtInputs,
- ) -> (OrtEP, ExecutionProvider) {
+ ) -> (OrtEP, ExecutionProviderDispatch) {
// set TensorRT
- if ExecutionProvider::TensorRT(Default::default()).is_available() {
- let (height, width) = (inputs.sizes[0][0], inputs.sizes[0][1]);
+ let trt_provider = TensorRTExecutionProvider::default().with_device_id(device_id);
- // dtype match checking
- if inputs.dtypes[0] == TensorElementDataType::Float16 && !fp16 {
+ //trt_provider.
+ if let Ok(true) = trt_provider.is_available() {
+ let (height, width) = (inputs.sizes[0][0], inputs.sizes[0][1]);
+ if inputs.dtypes[0] == TensorElementType::Float16 && !fp16 {
panic!(
"Dtype mismatch! Expected: Float32, got: {:?}. You should use `--fp16`",
inputs.dtypes[0]
);
}
-
// dynamic shape: input_tensor_1:dim_1xdim_2x...,input_tensor_2:dim_3xdim_4x...,...
let mut opt_string = String::new();
let mut min_string = String::new();
@@ -251,17 +267,16 @@ impl OrtBackend {
let _ = opt_string.pop();
let _ = min_string.pop();
let _ = max_string.pop();
+
+ let trt_provider = trt_provider
+ .with_profile_opt_shapes(opt_string)
+ .with_profile_min_shapes(min_string)
+ .with_profile_max_shapes(max_string)
+ .with_fp16(fp16)
+ .with_timing_cache(true);
(
OrtEP::Trt(device_id),
- ExecutionProvider::TensorRT(TensorRTExecutionProviderOptions {
- device_id,
- fp16_enable: fp16,
- timing_cache_enable: true,
- profile_min_shapes: min_string,
- profile_max_shapes: max_string,
- profile_opt_shapes: opt_string,
- ..Default::default()
- }),
+ ExecutionProviderDispatch::from(trt_provider),
)
} else {
println!("> TensorRT is not available! Try using CUDA...");
@@ -283,8 +298,8 @@ impl OrtBackend {
pub fn run(&self, xs: Array, profile: bool) -> Result>> {
// ORT inference
match self.dtype() {
- TensorElementDataType::Float16 => self.run_fp16(xs, profile),
- TensorElementDataType::Float32 => self.run_fp32(xs, profile),
+ TensorElementType::Float16 => self.run_fp16(xs, profile),
+ TensorElementType::Float32 => self.run_fp32(xs, profile),
_ => todo!(),
}
}
@@ -300,14 +315,13 @@ impl OrtBackend {
// h2d
let t = std::time::Instant::now();
let xs = CowArray::from(xs);
- let xs = vec![Value::from_array(self.session.allocator(), &xs)?];
if profile {
println!("[ORT H2D]: {:?}", t.elapsed());
}
// run
let t = std::time::Instant::now();
- let ys = self.session.run(xs)?;
+ let ys = self.session.run(ort::inputs![xs.view()]?)?;
if profile {
println!("[ORT Inference]: {:?}", t.elapsed());
}
@@ -315,21 +329,22 @@ impl OrtBackend {
// d2h
Ok(ys
.iter()
- .map(|x| {
+ .map(|(_k, v)| {
// d2h
let t = std::time::Instant::now();
- let x = x.try_extract::<_>().unwrap().view().clone().into_owned();
+ let v = v.try_extract_tensor().unwrap();
+ //let v = v.try_extract::<_>().unwrap().view().clone().into_owned();
if profile {
println!("[ORT D2H]: {:?}", t.elapsed());
}
// f16->f32
let t_ = std::time::Instant::now();
- let x = x.mapv(f16::to_f32);
+ let v = v.mapv(f16::to_f32);
if profile {
println!("[ORT f16->f32]: {:?}", t_.elapsed());
}
- x
+ v
})
.collect::>>())
}
@@ -338,14 +353,13 @@ impl OrtBackend {
// h2d
let t = std::time::Instant::now();
let xs = CowArray::from(xs);
- let xs = vec![Value::from_array(self.session.allocator(), &xs)?];
if profile {
println!("[ORT H2D]: {:?}", t.elapsed());
}
// run
let t = std::time::Instant::now();
- let ys = self.session.run(xs)?;
+ let ys = self.session.run(ort::inputs![xs.view()]?)?;
if profile {
println!("[ORT Inference]: {:?}", t.elapsed());
}
@@ -353,39 +367,44 @@ impl OrtBackend {
// d2h
Ok(ys
.iter()
- .map(|x| {
+ .map(|(_k, v)| {
let t = std::time::Instant::now();
- let x = x.try_extract::<_>().unwrap().view().clone().into_owned();
+ let v = v.try_extract_tensor::().unwrap().into_owned();
+ //let x = x.try_extract::<_>().unwrap().view().clone().into_owned();
if profile {
println!("[ORT D2H]: {:?}", t.elapsed());
}
- x
+ v
})
.collect::>>())
}
- pub fn output_shapes(&self) -> Vec> {
+ pub fn output_shapes(&self) -> Vec> {
let mut shapes = Vec::new();
- for o in &self.session.outputs {
- let shape: Vec<_> = o
- .dimensions()
- .map(|x| if let Some(x) = x { x as i32 } else { -1i32 })
- .collect();
- shapes.push(shape);
+ for output in &self.session.outputs {
+ if let ValueType::Tensor { ty: _, dimensions } = &output.output_type {
+ let shape = dimensions.clone();
+ shapes.push(shape);
+ } else {
+ panic!("not support data format, {} - {}", file!(), line!());
+ }
}
shapes
}
- pub fn output_dtypes(&self) -> Vec {
+ pub fn output_dtypes(&self) -> Vec {
let mut dtypes = Vec::new();
- self.session
- .outputs
- .iter()
- .for_each(|x| dtypes.push(x.output_type));
+ for output in &self.session.outputs {
+ if let ValueType::Tensor { ty, dimensions: _ } = &output.output_type {
+ dtypes.push(ty.clone());
+ } else {
+ panic!("not support data format, {} - {}", file!(), line!());
+ }
+ }
dtypes
}
- pub fn input_shapes(&self) -> &Vec> {
+ pub fn input_shapes(&self) -> &Vec> {
&self.inputs.shapes
}
@@ -393,11 +412,11 @@ impl OrtBackend {
&self.inputs.names
}
- pub fn input_dtypes(&self) -> &Vec {
+ pub fn input_dtypes(&self) -> &Vec {
&self.inputs.dtypes
}
- pub fn dtype(&self) -> TensorElementDataType {
+ pub fn dtype(&self) -> TensorElementType {
self.input_dtypes()[0]
}
diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py
index 71b251d37a3..d1e18a404c1 100644
--- a/examples/YOLOv8-ONNXRuntime/main.py
+++ b/examples/YOLOv8-ONNXRuntime/main.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import argparse
diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/main.py b/examples/YOLOv8-OpenCV-ONNX-Python/main.py
index c58b9ced5df..e9e095dd462 100644
--- a/examples/YOLOv8-OpenCV-ONNX-Python/main.py
+++ b/examples/YOLOv8-OpenCV-ONNX-Python/main.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import argparse
diff --git a/examples/YOLOv8-OpenCV-int8-tflite-Python/README.md b/examples/YOLOv8-OpenCV-int8-tflite-Python/README.md
deleted file mode 100644
index ea14e4440ec..00000000000
--- a/examples/YOLOv8-OpenCV-int8-tflite-Python/README.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# YOLOv8 - Int8-TFLite Runtime
-
-Welcome to the YOLOv8 Int8 TFLite Runtime for efficient and optimized object detection project. This README provides comprehensive instructions for installing and using our YOLOv8 implementation.
-
-## Installation
-
-Ensure a smooth setup by following these steps to install necessary dependencies.
-
-### Installing Required Dependencies
-
-Install all required dependencies with this simple command:
-
-```bash
-pip install -r requirements.txt
-```
-
-### Installing `tflite-runtime`
-
-To load TFLite models, install the `tflite-runtime` package using:
-
-```bash
-pip install tflite-runtime
-```
-
-### Installing `tensorflow-gpu` (For NVIDIA GPU Users)
-
-Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`:
-
-```bash
-pip install tensorflow-gpu
-```
-
-**Note:** Ensure you have compatible GPU drivers installed on your system.
-
-### Installing `tensorflow` (CPU Version)
-
-For CPU usage or non-NVIDIA GPUs, install TensorFlow with:
-
-```bash
-pip install tensorflow
-```
-
-## Usage
-
-Follow these instructions to run YOLOv8 after successful installation.
-
-Convert the YOLOv8 model to Int8 TFLite format:
-
-```bash
-yolo export model=yolov8n.pt imgsz=640 format=tflite int8
-```
-
-Locate the Int8 TFLite model in `yolov8n_saved_model`. Choose `best_full_integer_quant` or verify quantization at [Netron](https://netron.app/). Then, execute the following in your terminal:
-
-```bash
-python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf-thres 0.5 --iou-thres 0.5
-```
-
-Replace `best_full_integer_quant.tflite` with your model file's path, `image.jpg` with your input image, and adjust the confidence (conf-thres) and IoU thresholds (iou-thres) as necessary.
-
-### Output
-
-The output is displayed as annotated images, showcasing the model's detection capabilities:
-
-
diff --git a/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py b/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py
deleted file mode 100644
index 70bccfa1865..00000000000
--- a/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-import argparse
-
-import cv2
-import numpy as np
-from tflite_runtime import interpreter as tflite
-
-from ultralytics.utils import ASSETS, yaml_load
-from ultralytics.utils.checks import check_yaml
-
-# Declare as global variables, can be updated based trained model image size
-img_width = 640
-img_height = 640
-
-
-class LetterBox:
- """Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models."""
-
- def __init__(
- self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32
- ):
- """Initializes LetterBox with parameters for reshaping and transforming image while maintaining aspect ratio."""
- self.new_shape = new_shape
- self.auto = auto
- self.scaleFill = scaleFill
- self.scaleup = scaleup
- self.stride = stride
- self.center = center # Put the image in the middle or top-left
-
- def __call__(self, labels=None, image=None):
- """Return updated labels and image with added border."""
- if labels is None:
- labels = {}
- img = labels.get("img") if image is None else image
- shape = img.shape[:2] # current shape [height, width]
- new_shape = labels.pop("rect_shape", self.new_shape)
- if isinstance(new_shape, int):
- new_shape = (new_shape, new_shape)
-
- # Scale ratio (new / old)
- r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
- if not self.scaleup: # only scale down, do not scale up (for better val mAP)
- r = min(r, 1.0)
-
- # Compute padding
- ratio = r, r # width, height ratios
- new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
- dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
- if self.auto: # minimum rectangle
- dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding
- elif self.scaleFill: # stretch
- dw, dh = 0.0, 0.0
- new_unpad = (new_shape[1], new_shape[0])
- ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
-
- if self.center:
- dw /= 2 # divide padding into 2 sides
- dh /= 2
-
- if shape[::-1] != new_unpad: # resize
- img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
- top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
- left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
- img = cv2.copyMakeBorder(
- img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
- ) # add border
- if labels.get("ratio_pad"):
- labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation
-
- if len(labels):
- labels = self._update_labels(labels, ratio, dw, dh)
- labels["img"] = img
- labels["resized_shape"] = new_shape
- return labels
- else:
- return img
-
- def _update_labels(self, labels, ratio, padw, padh):
- """Update labels."""
- labels["instances"].convert_bbox(format="xyxy")
- labels["instances"].denormalize(*labels["img"].shape[:2][::-1])
- labels["instances"].scale(*ratio)
- labels["instances"].add_padding(padw, padh)
- return labels
-
-
-class Yolov8TFLite:
- """Class for performing object detection using YOLOv8 model converted to TensorFlow Lite format."""
-
- def __init__(self, tflite_model, input_image, confidence_thres, iou_thres):
- """
- Initializes an instance of the Yolov8TFLite class.
-
- Args:
- tflite_model: Path to the TFLite model.
- input_image: Path to the input image.
- confidence_thres: Confidence threshold for filtering detections.
- iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
- """
- self.tflite_model = tflite_model
- self.input_image = input_image
- self.confidence_thres = confidence_thres
- self.iou_thres = iou_thres
-
- # Load the class names from the COCO dataset
- self.classes = yaml_load(check_yaml("coco8.yaml"))["names"]
-
- # Generate a color palette for the classes
- self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
-
- def draw_detections(self, img, box, score, class_id):
- """
- Draws bounding boxes and labels on the input image based on the detected objects.
-
- Args:
- img: The input image to draw detections on.
- box: Detected bounding box.
- score: Corresponding detection score.
- class_id: Class ID for the detected object.
-
- Returns:
- None
- """
- # Extract the coordinates of the bounding box
- x1, y1, w, h = box
-
- # Retrieve the color for the class ID
- color = self.color_palette[class_id]
-
- # Draw the bounding box on the image
- cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
-
- # Create the label text with class name and score
- label = f"{self.classes[class_id]}: {score:.2f}"
-
- # Calculate the dimensions of the label text
- (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-
- # Calculate the position of the label text
- label_x = x1
- label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
-
- # Draw a filled rectangle as the background for the label text
- cv2.rectangle(
- img,
- (int(label_x), int(label_y - label_height)),
- (int(label_x + label_width), int(label_y + label_height)),
- color,
- cv2.FILLED,
- )
-
- # Draw the label text on the image
- cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
-
- def preprocess(self):
- """
- Preprocesses the input image before performing inference.
-
- Returns:
- image_data: Preprocessed image data ready for inference.
- """
- # Read the input image using OpenCV
- self.img = cv2.imread(self.input_image)
-
- print("image before", self.img)
- # Get the height and width of the input image
- self.img_height, self.img_width = self.img.shape[:2]
-
- letterbox = LetterBox(new_shape=[img_width, img_height], auto=False, stride=32)
- image = letterbox(image=self.img)
- image = [image]
- image = np.stack(image)
- image = image[..., ::-1].transpose((0, 3, 1, 2))
- img = np.ascontiguousarray(image)
- # n, h, w, c
- image = img.astype(np.float32)
- return image / 255
-
- def postprocess(self, input_image, output):
- """
- Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
-
- Args:
- input_image (numpy.ndarray): The input image.
- output (numpy.ndarray): The output of the model.
-
- Returns:
- numpy.ndarray: The input image with detections drawn on it.
- """
- boxes = []
- scores = []
- class_ids = []
- for pred in output:
- pred = np.transpose(pred)
- for box in pred:
- x, y, w, h = box[:4]
- x1 = x - w / 2
- y1 = y - h / 2
- boxes.append([x1, y1, w, h])
- idx = np.argmax(box[4:])
- scores.append(box[idx + 4])
- class_ids.append(idx)
-
- indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)
-
- for i in indices:
- # Get the box, score, and class ID corresponding to the index
- box = boxes[i]
- gain = min(img_width / self.img_width, img_height / self.img_height)
- pad = (
- round((img_width - self.img_width * gain) / 2 - 0.1),
- round((img_height - self.img_height * gain) / 2 - 0.1),
- )
- box[0] = (box[0] - pad[0]) / gain
- box[1] = (box[1] - pad[1]) / gain
- box[2] = box[2] / gain
- box[3] = box[3] / gain
- score = scores[i]
- class_id = class_ids[i]
- if score > 0.25:
- print(box, score, class_id)
- # Draw the detection on the input image
- self.draw_detections(input_image, box, score, class_id)
-
- return input_image
-
- def main(self):
- """
- Performs inference using a TFLite model and returns the output image with drawn detections.
-
- Returns:
- output_img: The output image with drawn detections.
- """
- # Create an interpreter for the TFLite model
- interpreter = tflite.Interpreter(model_path=self.tflite_model)
- self.model = interpreter
- interpreter.allocate_tensors()
-
- # Get the model inputs
- input_details = interpreter.get_input_details()
- output_details = interpreter.get_output_details()
-
- # Store the shape of the input for later use
- input_shape = input_details[0]["shape"]
- self.input_width = input_shape[1]
- self.input_height = input_shape[2]
-
- # Preprocess the image data
- img_data = self.preprocess()
- img_data = img_data
- # img_data = img_data.cpu().numpy()
- # Set the input tensor to the interpreter
- print(input_details[0]["index"])
- print(img_data.shape)
- img_data = img_data.transpose((0, 2, 3, 1))
-
- scale, zero_point = input_details[0]["quantization"]
- img_data_int8 = (img_data / scale + zero_point).astype(np.int8)
- interpreter.set_tensor(input_details[0]["index"], img_data_int8)
-
- # Run inference
- interpreter.invoke()
-
- # Get the output tensor from the interpreter
- output = interpreter.get_tensor(output_details[0]["index"])
- scale, zero_point = output_details[0]["quantization"]
- output = (output.astype(np.float32) - zero_point) * scale
-
- output[:, [0, 2]] *= img_width
- output[:, [1, 3]] *= img_height
- print(output)
- # Perform post-processing on the outputs to obtain output image.
- return self.postprocess(self.img, output)
-
-
-if __name__ == "__main__":
- # Create an argument parser to handle command-line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument(
- "--model", type=str, default="yolov8n_full_integer_quant.tflite", help="Input your TFLite model."
- )
- parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
- parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
- parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
- args = parser.parse_args()
-
- # Create an instance of the Yolov8TFLite class with the specified arguments
- detection = Yolov8TFLite(args.model, args.img, args.conf_thres, args.iou_thres)
-
- # Perform object detection and obtain the output image
- output_image = detection.main()
-
- # Display the output image in a window
- cv2.imshow("Output", output_image)
-
- # Wait for a key press to exit
- cv2.waitKey(0)
diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/README.md b/examples/YOLOv8-OpenVINO-CPP-Inference/README.md
index e668a0e7e75..6c6c794dea3 100644
--- a/examples/YOLOv8-OpenVINO-CPP-Inference/README.md
+++ b/examples/YOLOv8-OpenVINO-CPP-Inference/README.md
@@ -50,7 +50,7 @@ Once built, you can run inference on an image using the following command:
To use your YOLOv8 model with OpenVINO, you need to export it first. Use the command below to export the model:
-```commandline
+```bash
yolo export model=yolov8s.pt imgsz=640 format=openvino
```
diff --git a/examples/YOLOv8-Region-Counter/readme.md b/examples/YOLOv8-Region-Counter/readme.md
index a0811359eac..3ed06799107 100644
--- a/examples/YOLOv8-Region-Counter/readme.md
+++ b/examples/YOLOv8-Region-Counter/readme.md
@@ -1,7 +1,14 @@
# Regions Counting Using YOLOv8 (Inference on Video)
-- Region counting is a method employed to tally the objects within a specified area, allowing for more sophisticated analyses when multiple regions are considered. These regions can be adjusted interactively using a Left Mouse Click, and the counting process occurs in real time.
-- Regions can be adjusted to suit the user's preferences and requirements.
+> **Region Counter** is now part of **[Ultralytics Solutions](https://docs.ultralytics.com/solutions/)**, offering improved features and regular updates. Enjoy improved features and regular updates!
+
+๐ **[Explore Object Counting in Regions Here](https://docs.ultralytics.com/guides/region-counting/)**
+
+> ๐ **Notice:**
+
+> The GitHub example will remain available but **will no longer be actively maintained**. For the latest updates and improvements, please use the official [link](https://docs.ultralytics.com/guides/region-counting/). Thank you!
+
+Region counting is a method employed to tally the objects within a specified area, allowing for more sophisticated analyses when multiple regions are considered. These regions can be adjusted interactively using a Left Mouse Click, and the counting process occurs in real time. Regions can be adjusted to suit the user's preferences and requirements.
@@ -73,7 +80,7 @@ Region counting is a computational method utilized to ascertain the quantity of
**2. Is Friendly Region Plotting Supported by the Region Counter?**
-The Region Counter offers the capability to create regions in various formats, such as polygons and rectangles. You have the flexibility to modify region attributes, including coordinates, colors, and other details, as demonstrated in the following code:
+The Region Counting offers the capability to create regions in various formats, such as polygons and rectangles. You have the flexibility to modify region attributes, including coordinates, colors, and other details, as demonstrated in the following code:
```python
from shapely.geometry import Polygon
diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
index a6c739b7e58..915804ec2a9 100644
--- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
+++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import argparse
from collections import defaultdict
@@ -91,7 +91,7 @@ def mouse_callback(event, x, y, flags, param):
def run(
- weights="yolov8n.pt",
+ weights="yolo11n.pt",
source=None,
device="cpu",
view_img=False,
@@ -132,17 +132,19 @@ def run(
model.to("cuda") if device == "0" else model.to("cpu")
# Extract classes names
- names = model.model.names
+ names = model.names
# Video setup
videocapture = cv2.VideoCapture(source)
- frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
- fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v")
+ frame_width = int(videocapture.get(3))
+ frame_height = int(videocapture.get(4))
+ fps = int(videocapture.get(5))
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
# Output setup
save_dir = increment_path(Path("ultralytics_rc_output") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
- video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height))
+ video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.avi"), fourcc, fps, (frame_width, frame_height))
# Iterate over video frames
while videocapture.isOpened():
@@ -183,7 +185,7 @@ def run(
region_color = region["region_color"]
region_text_color = region["text_color"]
- polygon_coords = np.array(region["polygon"].exterior.coords, dtype=np.int32)
+ polygon_coordinates = np.array(region["polygon"].exterior.coords, dtype=np.int32)
centroid_x, centroid_y = int(region["polygon"].centroid.x), int(region["polygon"].centroid.y)
text_size, _ = cv2.getTextSize(
@@ -201,7 +203,7 @@ def run(
cv2.putText(
frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color, line_thickness
)
- cv2.polylines(frame, [polygon_coords], isClosed=True, color=region_color, thickness=region_thickness)
+ cv2.polylines(frame, [polygon_coordinates], isClosed=True, color=region_color, thickness=region_thickness)
if view_img:
if vid_frame_count == 1:
@@ -227,7 +229,7 @@ def run(
def parse_opt():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
- parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
+ parser.add_argument("--weights", type=str, default="yolo11n.pt", help="initial weights path")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
@@ -241,9 +243,9 @@ def parse_opt():
return parser.parse_args()
-def main(opt):
+def main(options):
"""Main function."""
- run(**vars(opt))
+ run(**vars(options))
if __name__ == "__main__":
diff --git a/examples/YOLOv8-SAHI-Inference-Video/readme.md b/examples/YOLOv8-SAHI-Inference-Video/readme.md
index 525aca5ac02..4dc169b3e17 100644
--- a/examples/YOLOv8-SAHI-Inference-Video/readme.md
+++ b/examples/YOLOv8-SAHI-Inference-Video/readme.md
@@ -1,11 +1,11 @@
-# YOLOv8 with SAHI (Inference on Video)
+# YOLO11 with SAHI (Inference on Video)
-[SAHI](https://docs.ultralytics.com/guides/sahi-tiled-inference/) is designed to optimize object detection algorithms for large-scale and high-resolution imagery. It partitions images into manageable slices, performs object detection on each slice, and then stitches the results back together. This tutorial will guide you through the process of running YOLOv8 inference on video files with the aid of SAHI.
+[SAHI](https://docs.ultralytics.com/guides/sahi-tiled-inference/) is designed to optimize object detection algorithms for large-scale and high-resolution imagery. It partitions images into manageable slices, performs object detection on each slice, and then stitches the results back together. This tutorial will guide you through the process of running YOLO11 inference on video files with the aid of SAHI.
## Table of Contents
- [Step 1: Install the Required Libraries](#step-1-install-the-required-libraries)
-- [Step 2: Run the Inference with SAHI using Ultralytics YOLOv8](#step-2-run-the-inference-with-sahi-using-ultralytics-yolov8)
+- [Step 2: Run the Inference with SAHI using Ultralytics YOLO11](#step-2-run-the-inference-with-sahi-using-ultralytics-yolo11)
- [Usage Options](#usage-options)
- [FAQ](#faq)
@@ -18,13 +18,13 @@ Clone the repository, install dependencies and `cd` to this local directory for
git clone https://github.com/ultralytics/ultralytics
# Install dependencies
-pip install sahi ultralytics
+pip install -U sahi ultralytics
# cd to local directory
cd ultralytics/examples/YOLOv8-SAHI-Inference-Video
```
-## Step 2: Run the Inference with SAHI using Ultralytics YOLOv8
+## Step 2: Run the Inference with SAHI using Ultralytics YOLO11
Here are the basic commands for running the inference:
@@ -33,14 +33,14 @@ Here are the basic commands for running the inference:
python yolov8_sahi.py --source "path/to/video.mp4" --save-img
#if you want to change model file
-python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolov8n.pt"
+python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolo11n.pt"
```
## Usage Options
- `--source`: Specifies the path to the video file you want to run inference on.
- `--save-img`: Flag to save the detection results as images.
-- `--weights`: Specifies a different YOLOv8 model file (e.g., `yolov8n.pt`, `yolov8s.pt`, `yolov8m.pt`, `yolov8l.pt`, `yolov8x.pt`).
+- `--weights`: Specifies a different YOLO11 model file (e.g., `yolo11n.pt`, `yolov8s.pt`, `yolo11m.pt`, `yolo11l.pt`, `yolo11x.pt`).
## FAQ
@@ -48,9 +48,9 @@ python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolov8n
SAHI stands for Slicing Aided Hyper Inference. It is a library designed to optimize object detection algorithms for large-scale and high-resolution images. The library source code is available on [GitHub](https://github.com/obss/sahi).
-**2. Why use SAHI with YOLOv8?**
+**2. Why use SAHI with YOLO11?**
-SAHI can handle large-scale images by slicing them into smaller, more manageable sizes without compromising the detection quality. This makes it a great companion to YOLOv8, especially when working with high-resolution videos.
+SAHI can handle large-scale images by slicing them into smaller, more manageable sizes without compromising the detection quality. This makes it a great companion to YOLO11, especially when working with high-resolution videos.
**3. How do I debug issues?**
@@ -66,4 +66,4 @@ Yes, you can specify different YOLO model weights using the `--weights` option.
**5. Where can I find more information?**
-For a full guide to YOLOv8 with SAHI see [https://docs.ultralytics.com/guides/sahi-tiled-inference](https://docs.ultralytics.com/guides/sahi-tiled-inference/).
+For a full guide to YOLO11 with SAHI see [https://docs.ultralytics.com/guides/sahi-tiled-inference](https://docs.ultralytics.com/guides/sahi-tiled-inference/).
diff --git a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
index 4243cc35bb9..69872dcc9e4 100644
--- a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
+++ b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import argparse
from pathlib import Path
@@ -6,32 +6,37 @@
import cv2
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
-from sahi.utils.yolov8 import download_yolov8s_model
+from sahi.utils.ultralytics import download_yolo11n_model
from ultralytics.utils.files import increment_path
from ultralytics.utils.plotting import Annotator, colors
class SAHIInference:
- """Runs YOLOv8 and SAHI for object detection on video with options to view, save, and track results."""
+ """Runs Ultralytics YOLO11 and SAHI for object detection on video with options to view, save, and track results."""
def __init__(self):
- """Initializes the SAHIInference class for performing sliced inference using SAHI with YOLOv8 models."""
+ """Initializes the SAHIInference class for performing sliced inference using SAHI with YOLO11 models."""
self.detection_model = None
def load_model(self, weights):
- """Loads a YOLOv8 model with specified weights for object detection using SAHI."""
- yolov8_model_path = f"models/{weights}"
- download_yolov8s_model(yolov8_model_path)
+ """Loads a YOLO11 model with specified weights for object detection using SAHI."""
+ yolo11_model_path = f"models/{weights}"
+ download_yolo11n_model(yolo11_model_path)
self.detection_model = AutoDetectionModel.from_pretrained(
- model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cpu"
+ model_type="ultralytics", model_path=yolo11_model_path, device="cpu"
)
def inference(
- self, weights="yolov8n.pt", source="test.mp4", view_img=False, save_img=False, exist_ok=False, track=False
+ self,
+ weights="yolo11n.pt",
+ source="test.mp4",
+ view_img=False,
+ save_img=False,
+ exist_ok=False,
):
"""
- Run object detection on a video using YOLOv8 and SAHI.
+ Run object detection on a video using YOLO11 and SAHI.
Args:
weights (str): Model weights path.
@@ -39,7 +44,6 @@ def inference(
view_img (bool): Show results.
save_img (bool): Save results.
exist_ok (bool): Overwrite existing files.
- track (bool): Enable object tracking with SAHI
"""
# Video setup
cap = cv2.VideoCapture(source)
@@ -50,8 +54,8 @@ def inference(
save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(
- str(save_dir / f"{Path(source).stem}.mp4"),
- cv2.VideoWriter_fourcc(*"mp4v"),
+ str(save_dir / f"{Path(source).stem}.avi"),
+ cv2.VideoWriter_fourcc(*"MJPG"),
int(cap.get(5)),
(frame_width, frame_height),
)
@@ -64,12 +68,10 @@ def inference(
break
annotator = Annotator(frame) # Initialize annotator for plotting detection and tracking results
results = get_sliced_prediction(
- frame,
+ frame[..., ::-1],
self.detection_model,
slice_height=512,
slice_width=512,
- overlap_height_ratio=0.2,
- overlap_width_ratio=0.2,
)
detection_data = [
(det.category.name, det.category.id, (det.bbox.minx, det.bbox.miny, det.bbox.maxx, det.bbox.maxy))
@@ -93,7 +95,7 @@ def inference(
def parse_opt(self):
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
- parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
+ parser.add_argument("--weights", type=str, default="yolo11n.pt", help="initial weights path")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
diff --git a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py
index c1779deaa06..b8e2e7d55d8 100644
--- a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py
+++ b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import argparse
diff --git a/examples/YOLOv8-TFLite-Python/README.md b/examples/YOLOv8-TFLite-Python/README.md
new file mode 100644
index 00000000000..0156759fdba
--- /dev/null
+++ b/examples/YOLOv8-TFLite-Python/README.md
@@ -0,0 +1,55 @@
+# YOLOv8 - TFLite Runtime
+
+This example shows how to run inference with YOLOv8 TFLite model. It supports FP32, FP16 and INT8 models.
+
+## Installation
+
+### Installing `tflite-runtime`
+
+To load TFLite models, install the `tflite-runtime` package using:
+
+```bash
+pip install tflite-runtime
+```
+
+### Installing `tensorflow-gpu` (For NVIDIA GPU Users)
+
+Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`:
+
+```bash
+pip install tensorflow-gpu
+```
+
+**Note:** Ensure you have compatible GPU drivers installed on your system.
+
+### Installing `tensorflow` (CPU Version)
+
+For CPU usage or non-NVIDIA GPUs, install TensorFlow with:
+
+```bash
+pip install tensorflow
+```
+
+## Usage
+
+Follow these instructions to run YOLOv8 after successful installation.
+
+Convert the YOLOv8 model to TFLite format:
+
+```bash
+yolo export model=yolov8n.pt imgsz=640 format=tflite int8
+```
+
+Locate the TFLite model in `yolov8n_saved_model`. Then, execute the following in your terminal:
+
+```bash
+python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf 0.25 --iou 0.45 --metadata "metadata.yaml"
+```
+
+Replace `best_full_integer_quant.tflite` with the TFLite model path, `image.jpg` with the input image path, `metadata.yaml` with the one generated by `ultralytics` during export, and adjust the confidence (conf) and IoU thresholds (iou) as necessary.
+
+### Output
+
+The output would show the detections along with the class labels and confidences of each detected object.
+
+
diff --git a/examples/YOLOv8-TFLite-Python/main.py b/examples/YOLOv8-TFLite-Python/main.py
new file mode 100644
index 00000000000..00c40303285
--- /dev/null
+++ b/examples/YOLOv8-TFLite-Python/main.py
@@ -0,0 +1,221 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+import argparse
+from typing import Tuple, Union
+
+import cv2
+import numpy as np
+import tensorflow as tf
+import yaml
+
+from ultralytics.utils import ASSETS
+
+try:
+ from tflite_runtime.interpreter import Interpreter
+except ImportError:
+ import tensorflow as tf
+
+ Interpreter = tf.lite.Interpreter
+
+
+class YOLOv8TFLite:
+ """
+ YOLOv8TFLite.
+
+ A class for performing object detection using the YOLOv8 model with TensorFlow Lite.
+
+ Attributes:
+ model (str): Path to the TensorFlow Lite model file.
+ conf (float): Confidence threshold for filtering detections.
+ iou (float): Intersection over Union threshold for non-maximum suppression.
+ metadata (Optional[str]): Path to the metadata file, if any.
+
+ Methods:
+ detect(img_path: str) -> np.ndarray:
+ Performs inference and returns the output image with drawn detections.
+ """
+
+ def __init__(self, model: str, conf: float = 0.25, iou: float = 0.45, metadata: Union[str, None] = None):
+ """
+ Initializes an instance of the YOLOv8TFLite class.
+
+ Args:
+ model (str): Path to the TFLite model.
+ conf (float, optional): Confidence threshold for filtering detections. Defaults to 0.25.
+ iou (float, optional): IoU (Intersection over Union) threshold for non-maximum suppression. Defaults to 0.45.
+ metadata (Union[str, None], optional): Path to the metadata file or None if not used. Defaults to None.
+ """
+ self.conf = conf
+ self.iou = iou
+ if metadata is None:
+ self.classes = {i: i for i in range(1000)}
+ else:
+ with open(metadata) as f:
+ self.classes = yaml.safe_load(f)["names"]
+ np.random.seed(42)
+ self.color_palette = np.random.uniform(128, 255, size=(len(self.classes), 3))
+
+ self.model = Interpreter(model_path=model)
+ self.model.allocate_tensors()
+
+ input_details = self.model.get_input_details()[0]
+
+ self.in_width, self.in_height = input_details["shape"][1:3]
+ self.in_index = input_details["index"]
+ self.in_scale, self.in_zero_point = input_details["quantization"]
+ self.int8 = input_details["dtype"] == np.int8
+
+ output_details = self.model.get_output_details()[0]
+ self.out_index = output_details["index"]
+ self.out_scale, self.out_zero_point = output_details["quantization"]
+
+ def letterbox(self, img: np.ndarray, new_shape: Tuple = (640, 640)) -> Tuple[np.ndarray, Tuple[float, float]]:
+ """Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models."""
+ shape = img.shape[:2] # current shape [height, width]
+
+ # Scale ratio (new / old)
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+
+ # Compute padding
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
+
+ if shape[::-1] != new_unpad: # resize
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
+
+ return img, (top / img.shape[0], left / img.shape[1])
+
+ def draw_detections(self, img: np.ndarray, box: np.ndarray, score: np.float32, class_id: int) -> None:
+ """
+ Draws bounding boxes and labels on the input image based on the detected objects.
+
+ Args:
+ img (np.ndarray): The input image to draw detections on.
+ box (np.ndarray): Detected bounding box in the format [x1, y1, width, height].
+ score (np.float32): Corresponding detection score.
+ class_id (int): Class ID for the detected object.
+
+ Returns:
+ None
+ """
+ x1, y1, w, h = box
+ color = self.color_palette[class_id]
+
+ cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
+
+ label = f"{self.classes[class_id]}: {score:.2f}"
+
+ (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+
+ label_x = x1
+ label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
+
+ cv2.rectangle(
+ img,
+ (int(label_x), int(label_y - label_height)),
+ (int(label_x + label_width), int(label_y + label_height)),
+ color,
+ cv2.FILLED,
+ )
+
+ cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
+
+ def preprocess(self, img: np.ndarray) -> Tuple[np.ndarray, Tuple[float, float]]:
+ """
+ Preprocesses the input image before performing inference.
+
+ Args:
+ img (np.ndarray): The input image to be preprocessed.
+
+ Returns:
+ Tuple[np.ndarray, Tuple[float, float]]: A tuple containing:
+ - The preprocessed image (np.ndarray).
+ - A tuple of two float values representing the padding applied (top/bottom, left/right).
+ """
+ img, pad = self.letterbox(img, (self.in_width, self.in_height))
+ img = img[..., ::-1][None] # N,H,W,C for TFLite
+ img = np.ascontiguousarray(img)
+ img = img.astype(np.float32)
+ return img / 255, pad
+
+ def postprocess(self, img: np.ndarray, outputs: np.ndarray, pad: Tuple[float, float]) -> np.ndarray:
+ """
+ Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
+
+ Args:
+ img (numpy.ndarray): The input image.
+ outputs (numpy.ndarray): The output of the model.
+ pad (Tuple[float, float]): Padding used by letterbox.
+
+ Returns:
+ numpy.ndarray: The input image with detections drawn on it.
+ """
+ outputs[:, 0] -= pad[1]
+ outputs[:, 1] -= pad[0]
+ outputs[:, :4] *= max(img.shape)
+
+ outputs = outputs.transpose(0, 2, 1)
+ outputs[..., 0] -= outputs[..., 2] / 2
+ outputs[..., 1] -= outputs[..., 3] / 2
+
+ for out in outputs:
+ scores = out[:, 4:].max(-1)
+ keep = scores > self.conf
+ boxes = out[keep, :4]
+ scores = scores[keep]
+ class_ids = out[keep, 4:].argmax(-1)
+
+ indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf, self.iou).flatten()
+
+ [self.draw_detections(img, boxes[i], scores[i], class_ids[i]) for i in indices]
+
+ return img
+
+ def detect(self, img_path: str) -> np.ndarray:
+ """
+ Performs inference using a TFLite model and returns the output image with drawn detections.
+
+ Args:
+ img_path (str): The path to the input image file.
+
+ Returns:
+ np.ndarray: The output image with drawn detections.
+ """
+ img = cv2.imread(img_path)
+ x, pad = self.preprocess(img)
+ if self.int8:
+ x = (x / self.in_scale + self.in_zero_point).astype(np.int8)
+ self.model.set_tensor(self.in_index, x)
+
+ self.model.invoke()
+
+ y = self.model.get_tensor(self.out_index)
+
+ if self.int8:
+ y = (y.astype(np.float32) - self.out_zero_point) * self.out_scale
+
+ return self.postprocess(img, y, pad)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model",
+ type=str,
+ default="yolov8n_saved_model/yolov8n_full_integer_quant.tflite",
+ help="Path to TFLite model.",
+ )
+ parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image")
+ parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold")
+ parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold")
+ parser.add_argument("--metadata", type=str, default="yolov8n_saved_model/metadata.yaml", help="Metadata yaml")
+ args = parser.parse_args()
+
+ detector = YOLOv8TFLite(args.model, args.conf, args.iou, args.metadata)
+ result = detector.detect(str(ASSETS / "bus.jpg"))
+
+ cv2.imshow("Output", result)
+ cv2.waitKey(0)
diff --git a/examples/heatmaps.ipynb b/examples/heatmaps.ipynb
index c8064cc7f4a..4f34da35a4c 100644
--- a/examples/heatmaps.ipynb
+++ b/examples/heatmaps.ipynb
@@ -13,17 +13,17 @@
"\n",
" [ไธญๆ](https://docs.ultralytics.com/zh/) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko/) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja/) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi/) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar/)\n",
"\n",
- " \n",
+ " \n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
- "Welcome to the Ultralytics YOLOv8 ๐ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
+ "Welcome to the Ultralytics YOLO11 ๐ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
- "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
+ "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
- "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Heatmap Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
+ "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Heatmap Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
"\n",
"
"
]
@@ -38,7 +38,7 @@
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
- "[](https://pypi.org/project/ultralytics/) [](https://pepy.tech/project/ultralytics) [](https://pypi.org/project/ultralytics/)"
+ "[](https://pypi.org/project/ultralytics/) [](https://www.pepy.tech/projects/ultralytics) [](https://pypi.org/project/ultralytics/)"
]
},
{
@@ -56,7 +56,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Ultralytics YOLOv8.2.17 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
+ "Ultralytics 8.2.17 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete โ (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n"
]
}
@@ -76,14 +76,14 @@
"source": [
"# Introduction to Heatmaps\n",
"\n",
- "A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.\n",
+ "A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.\n",
"\n",
"## Real World Applications\n",
"\n",
"| Transportation | Retail |\n",
"|:-----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------:|\n",
- "|  |  |\n",
- "| Ultralytics YOLOv8 Transportation Heatmap | Ultralytics YOLOv8 Retail Heatmap |\n"
+ "|  |  |\n",
+ "| Ultralytics YOLO11 Transportation Heatmap | Ultralytics YOLO11 Retail Heatmap |\n"
]
},
{
@@ -96,10 +96,7 @@
"source": [
"import cv2\n",
"\n",
- "from ultralytics import YOLO, solutions\n",
- "\n",
- "# Load YOLO model\n",
- "model = YOLO(\"yolov8n.pt\")\n",
+ "from ultralytics import solutions\n",
"\n",
"# Open video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
@@ -113,10 +110,9 @@
"\n",
"# Initialize heatmap object\n",
"heatmap_obj = solutions.Heatmap(\n",
- " colormap=cv2.COLORMAP_PARULA,\n",
- " view_img=True,\n",
- " shape=\"circle\",\n",
- " names=model.names,\n",
+ " colormap=cv2.COLORMAP_PARULA, # Color of the heatmap\n",
+ " show=True, # Display the image during processing\n",
+ " model=\"yolo11n.pt\", # Ultralytics YOLO11 model file\n",
")\n",
"\n",
"while cap.isOpened():\n",
@@ -125,11 +121,8 @@
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
- " # Perform tracking on the current frame\n",
- " tracks = model.track(im0, persist=True, show=False)\n",
- "\n",
" # Generate heatmap on the frame\n",
- " im0 = heatmap_obj.generate_heatmap(im0, tracks)\n",
+ " im0 = heatmap_obj.generate_heatmap(im0)\n",
"\n",
" # Write the frame to the output video\n",
" video_writer.write(im0)\n",
@@ -161,15 +154,15 @@
"- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n",
"- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n",
"\n",
- "## YOLOv8 ๐ Resources\n",
+ "## YOLO11 ๐ Resources\n",
"\n",
- "YOLOv8 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLOv8:\n",
+ "YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n",
"\n",
- "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLOv8 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
- "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLOv8, including installation guides, tutorials, and detailed API references.\n",
+ "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
+ "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n",
"- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n",
"\n",
- "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLOv8. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
+ "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
]
}
],
diff --git a/examples/hub.ipynb b/examples/hub.ipynb
index e86b795e5eb..05657155dfe 100644
--- a/examples/hub.ipynb
+++ b/examples/hub.ipynb
@@ -13,7 +13,7 @@
"\n",
"[ไธญๆ](https://docs.ultralytics.com/zh/hub/) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko/hub/) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja/hub/) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru/hub/) | [Deutsch](https://docs.ultralytics.com/de/hub/) | [Franรงais](https://docs.ultralytics.com/fr/hub/) | [Espaรฑol](https://docs.ultralytics.com/es/hub/) | [Portuguรชs](https://docs.ultralytics.com/pt/hub/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/hub/) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi/hub/) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar/hub/)\n",
"\n",
- " \n",
+ " \n",
" \n",
"\n",
" \n",
@@ -36,7 +36,7 @@
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
- "[](https://pypi.org/project/ultralytics/) [](https://pepy.tech/project/ultralytics) [](https://pypi.org/project/ultralytics/)"
+ "[](https://pypi.org/project/ultralytics/) [](https://www.pepy.tech/projects/ultralytics) [](https://pypi.org/project/ultralytics/)"
]
},
{
@@ -54,7 +54,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
+ "Ultralytics 8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete โ (2 CPUs, 12.7 GB RAM, 28.8/78.2 GB disk)\n"
]
}
diff --git a/examples/object_counting.ipynb b/examples/object_counting.ipynb
index 988bc3269d7..b1f0c523f29 100644
--- a/examples/object_counting.ipynb
+++ b/examples/object_counting.ipynb
@@ -13,17 +13,17 @@
"\n",
" [ไธญๆ](https://docs.ultralytics.com/zh/) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko/) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja/) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi/) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar/)\n",
"\n",
- " \n",
+ " \n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
- "Welcome to the Ultralytics YOLOv8 ๐ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
+ "Welcome to the Ultralytics YOLO11 ๐ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
- "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
+ "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
- "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Object Counting Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
+ "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Object Counting Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
"\n",
""
]
@@ -38,7 +38,7 @@
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
- "[](https://pypi.org/project/ultralytics/) [](https://pepy.tech/project/ultralytics) [](https://pypi.org/project/ultralytics/)"
+ "[](https://pypi.org/project/ultralytics/) [](https://www.pepy.tech/projects/ultralytics) [](https://pypi.org/project/ultralytics/)"
]
},
{
@@ -56,7 +56,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Ultralytics YOLOv8.2.18 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
+ "Ultralytics 8.2.18 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete โ (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n"
]
}
@@ -74,11 +74,11 @@
"id": "m7VkxQ2aeg7k"
},
"source": [
- "# Object Counting using Ultralytics YOLOv8 ๐\n",
+ "# Object Counting using Ultralytics YOLO11 ๐\n",
"\n",
"## What is Object Counting?\n",
"\n",
- "Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLOv8 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and deep learning capabilities.\n",
+ "Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLO11 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and deep learning capabilities.\n",
"\n",
"## Advantages of Object Counting?\n",
"\n",
@@ -90,8 +90,8 @@
"\n",
"| Logistics | Aquaculture |\n",
"|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------:|\n",
- "|  |  |\n",
- "| Conveyor Belt Packets Counting Using Ultralytics YOLOv8 | Fish Counting in Sea using Ultralytics YOLOv8 |\n"
+ "|  |  |\n",
+ "| Conveyor Belt Packets Counting Using Ultralytics YOLO11 | Fish Counting in Sea using Ultralytics YOLO11 |\n"
]
},
{
@@ -104,10 +104,7 @@
"source": [
"import cv2\n",
"\n",
- "from ultralytics import YOLO, solutions\n",
- "\n",
- "# Load the pre-trained YOLOv8 model\n",
- "model = YOLO(\"yolov8n.pt\")\n",
+ "from ultralytics import solutions\n",
"\n",
"# Open the video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
@@ -119,19 +116,15 @@
"# Define points for a line or region of interest in the video frame\n",
"line_points = [(20, 400), (1080, 400)] # Line coordinates\n",
"\n",
- "# Specify classes to count, for example: person (0) and car (2)\n",
- "classes_to_count = [0, 2] # Class IDs for person and car\n",
- "\n",
"# Initialize the video writer to save the output video\n",
"video_writer = cv2.VideoWriter(\"object_counting_output.avi\", cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (w, h))\n",
"\n",
"# Initialize the Object Counter with visualization options and other parameters\n",
"counter = solutions.ObjectCounter(\n",
- " view_img=True, # Display the image during processing\n",
- " reg_pts=line_points, # Region of interest points\n",
- " names=model.names, # Class names from the YOLO model\n",
- " draw_tracks=True, # Draw tracking lines for objects\n",
- " line_thickness=2, # Thickness of the lines drawn\n",
+ " show=True, # Display the image during processing\n",
+ " region=line_points, # Region of interest points\n",
+ " model=\"yolo11n.pt\", # Ultralytics YOLO11 model file\n",
+ " line_width=2, # Thickness of the lines and bounding boxes\n",
")\n",
"\n",
"# Process video frames in a loop\n",
@@ -141,11 +134,8 @@
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
- " # Perform object tracking on the current frame, filtering by specified classes\n",
- " tracks = model.track(im0, persist=True, show=False, classes=classes_to_count)\n",
- "\n",
" # Use the Object Counter to count objects in the frame and get the annotated image\n",
- " im0 = counter.start_counting(im0, tracks)\n",
+ " im0 = counter.count(im0)\n",
"\n",
" # Write the annotated frame to the output video\n",
" video_writer.write(im0)\n",
@@ -179,15 +169,15 @@
"- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n",
"- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n",
"\n",
- "## YOLOv8 ๐ Resources\n",
+ "## YOLO11 ๐ Resources\n",
"\n",
- "YOLOv8 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLOv8:\n",
+ "YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n",
"\n",
- "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLOv8 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
- "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLOv8, including installation guides, tutorials, and detailed API references.\n",
+ "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
+ "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n",
"- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n",
"\n",
- "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLOv8. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
+ "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
]
}
],
diff --git a/examples/object_tracking.ipynb b/examples/object_tracking.ipynb
index af43cc51745..f89c34ddeae 100644
--- a/examples/object_tracking.ipynb
+++ b/examples/object_tracking.ipynb
@@ -13,17 +13,17 @@
"\n",
" [ไธญๆ](https://docs.ultralytics.com/zh/) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko/) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja/) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi/) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar/)\n",
"\n",
- " \n",
+ " \n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
- "Welcome to the Ultralytics YOLOv8 ๐ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
+ "Welcome to the Ultralytics YOLO11 ๐ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
- "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
+ "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
- "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Tracking Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
+ "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Tracking Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
"\n",
""
]
@@ -38,7 +38,7 @@
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
- "[](https://pypi.org/project/ultralytics/) [](https://pepy.tech/project/ultralytics) [](https://pypi.org/project/ultralytics/)"
+ "[](https://pypi.org/project/ultralytics/) [](https://www.pepy.tech/projects/ultralytics) [](https://pypi.org/project/ultralytics/)"
]
},
{
@@ -56,7 +56,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Ultralytics YOLOv8.2.17 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
+ "Ultralytics 8.2.17 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
"Setup complete โ (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n"
]
}
@@ -76,7 +76,7 @@
"source": [
"# Ultralytics Object Tracking\n",
"\n",
- "[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike semantic segmentation, it uniquely labels and precisely delineates each object, crucial for tasks like object detection and medical imaging.\n",
+ "[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike semantic segmentation, it uniquely labels and precisely delineates each object, crucial for tasks like object detection and medical imaging.\n",
"\n",
"There are two types of instance segmentation tracking available in the Ultralytics package:\n",
"\n",
@@ -144,7 +144,7 @@
"track_history = defaultdict(lambda: [])\n",
"\n",
"# Load the YOLO model with segmentation capabilities\n",
- "model = YOLO(\"yolov8n-seg.pt\")\n",
+ "model = YOLO(\"yolo11n-seg.pt\")\n",
"\n",
"# Open the video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
@@ -176,7 +176,7 @@
"\n",
" # Annotate each mask with its corresponding tracking ID and color\n",
" for mask, track_id in zip(masks, track_ids):\n",
- " annotator.seg_bbox(mask=mask, mask_color=colors(track_id, True), track_label=str(track_id))\n",
+ " annotator.seg_bbox(mask=mask, mask_color=colors(int(track_id), True), label=str(track_id))\n",
"\n",
" # Write the annotated frame to the output video\n",
" out.write(im0)\n",
@@ -214,15 +214,15 @@
"- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n",
"- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n",
"\n",
- "## YOLOv8 ๐ Resources\n",
+ "## YOLO11 ๐ Resources\n",
"\n",
- "YOLOv8 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLOv8:\n",
+ "YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n",
"\n",
- "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLOv8 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
- "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLOv8, including installation guides, tutorials, and detailed API references.\n",
+ "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n",
+ "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n",
"- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n",
"\n",
- "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLOv8. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
+ "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed."
]
}
],
diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb
index 1ecb2d98095..1d19aeee37d 100644
--- a/examples/tutorial.ipynb
+++ b/examples/tutorial.ipynb
@@ -3,7 +3,7 @@
"nbformat_minor": 0,
"metadata": {
"colab": {
- "name": "YOLOv8 Tutorial",
+ "name": "YOLO11 Tutorial",
"provenance": [],
"toc_visible": true
},
@@ -27,21 +27,28 @@
"\n",
" [ไธญๆ](https://docs.ultralytics.com/zh/) | [ํ๊ตญ์ด](https://docs.ultralytics.com/ko/) | [ๆฅๆฌ่ช](https://docs.ultralytics.com/ja/) | [ะ ัััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแปt](https://docs.ultralytics.com/vi/) | [ุงูุนุฑุจูุฉ](https://docs.ultralytics.com/ar/)\n",
"\n",
- " \n",
+ " \n",
" \n",
" \n",
- " \n",
+ " \n",
"\n",
" \n",
" \n",
" \n",
"\n",
- "Welcome to the Ultralytics YOLOv8 ๐ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
+ "Welcome to the Ultralytics YOLO11 ๐ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n",
"\n",
- "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
+ "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
"\n",
- "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
+ "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n",
"\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ " Watch: How to Train\n",
+ " Ultralytics\n",
+ " YOLO11 Model on Custom Dataset using Google Colab Notebook ๐
\n",
""
]
},
@@ -55,7 +62,7 @@
"\n",
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
"\n",
- "[](https://pypi.org/project/ultralytics/) [](https://pepy.tech/project/ultralytics) [](https://pypi.org/project/ultralytics/)"
+ "[](https://pypi.org/project/ultralytics/) [](https://www.pepy.tech/projects/ultralytics) [](https://pypi.org/project/ultralytics/)"
]
},
{
@@ -65,21 +72,21 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "96335d4c-20a9-4864-f7a4-bb2eb0077a9d"
+ "outputId": "2e992f9f-90bb-4668-de12-fed629975285"
},
"source": [
"%pip install ultralytics\n",
"import ultralytics\n",
"ultralytics.checks()"
],
- "execution_count": null,
+ "execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
- "Setup complete โ (2 CPUs, 12.7 GB RAM, 28.8/78.2 GB disk)\n"
+ "Ultralytics 8.3.2 ๐ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
+ "Setup complete โ (2 CPUs, 12.7 GB RAM, 41.1/112.6 GB disk)\n"
]
}
]
@@ -92,7 +99,7 @@
"source": [
"# 1. Predict\n",
"\n",
- "YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLOv8 Predict Docs](https://docs.ultralytics.com/modes/train/).\n"
+ "YOLO11 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLO11 Predict Docs](https://docs.ultralytics.com/modes/train/).\n"
]
},
{
@@ -102,27 +109,27 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "84f32db2-80b0-4f35-9a2a-a56d11f7863f"
+ "outputId": "e3ebec6f-658a-4803-d80c-e07d12908767"
},
"source": [
- "# Run inference on an image with YOLOv8n\n",
- "!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'"
+ "# Run inference on an image with YOLO11n\n",
+ "!yolo predict model=yolo11n.pt source='https://ultralytics.com/images/zidane.jpg'"
],
- "execution_count": null,
+ "execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...\n",
- "100% 6.23M/6.23M [00:00<00:00, 83.2MB/s]\n",
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
- "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs\n",
+ "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...\n",
+ "100% 5.35M/5.35M [00:00<00:00, 72.7MB/s]\n",
+ "Ultralytics 8.3.2 ๐ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
+ "YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
"\n",
"Downloading https://ultralytics.com/images/zidane.jpg to 'zidane.jpg'...\n",
- "100% 165k/165k [00:00<00:00, 11.1MB/s]\n",
- "image 1/1 /content/zidane.jpg: 384x640 2 persons, 1 tie, 21.4ms\n",
- "Speed: 1.9ms preprocess, 21.4ms inference, 6.2ms postprocess per image at shape (1, 3, 384, 640)\n",
+ "100% 49.2k/49.2k [00:00<00:00, 5.37MB/s]\n",
+ "image 1/1 /content/zidane.jpg: 384x640 2 persons, 1 tie, 63.4ms\n",
+ "Speed: 14.5ms preprocess, 63.4ms inference, 820.9ms postprocess per image at shape (1, 3, 384, 640)\n",
"Results saved to \u001b[1mruns/detect/predict\u001b[0m\n",
"๐ก Learn more at https://docs.ultralytics.com/modes/predict\n"
]
@@ -146,7 +153,7 @@
},
"source": [
"# 2. Val\n",
- "Validate a model's accuracy on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLOv8 Val Docs](https://docs.ultralytics.com/modes/val/) for more information."
+ "Validate a model's accuracy on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset's `val` or `test` splits. The latest YOLO11 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLO11 Val Docs](https://docs.ultralytics.com/modes/val/) for more information."
]
},
{
@@ -167,43 +174,43 @@
"cell_type": "code",
"metadata": {
"id": "X58w8JLpMnjH",
- "outputId": "bed10d45-ceb6-4b6f-86b7-9428208b142a",
+ "outputId": "af2a5deb-029b-466d-96a4-bd3e406987fa",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"source": [
- "# Validate YOLOv8n on COCO8 val\n",
- "!yolo val model=yolov8n.pt data=coco8.yaml"
+ "# Validate YOLO11n on COCO8 val\n",
+ "!yolo val model=yolo11n.pt data=coco8.yaml"
],
- "execution_count": null,
+ "execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
- "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs\n",
+ "Ultralytics 8.3.2 ๐ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
+ "YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
"\n",
"Dataset 'coco8.yaml' images not found โ ๏ธ, missing path '/content/datasets/coco8/images/val'\n",
"Downloading https://ultralytics.com/assets/coco8.zip to '/content/datasets/coco8.zip'...\n",
- "100% 433k/433k [00:00<00:00, 14.2MB/s]\n",
- "Unzipping /content/datasets/coco8.zip to /content/datasets/coco8...: 100% 25/25 [00:00<00:00, 1093.93file/s]\n",
- "Dataset download success โ (1.3s), saved to \u001b[1m/content/datasets\u001b[0m\n",
+ "100% 433k/433k [00:00<00:00, 15.8MB/s]\n",
+ "Unzipping /content/datasets/coco8.zip to /content/datasets/coco8...: 100% 25/25 [00:00<00:00, 1188.35file/s]\n",
+ "Dataset download success โ (1.4s), saved to \u001b[1m/content/datasets\u001b[0m\n",
"\n",
"Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...\n",
- "100% 755k/755k [00:00<00:00, 17.4MB/s]\n",
- "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 157.00it/s]\n",
+ "100% 755k/755k [00:00<00:00, 17.7MB/s]\n",
+ "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 142.04it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco8/labels/val.cache\n",
- " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:06<00:00, 6.89s/it]\n",
- " all 4 17 0.621 0.833 0.888 0.63\n",
- " person 4 10 0.721 0.5 0.519 0.269\n",
- " dog 4 1 0.37 1 0.995 0.597\n",
- " horse 4 2 0.751 1 0.995 0.631\n",
- " elephant 4 2 0.505 0.5 0.828 0.394\n",
- " umbrella 4 1 0.564 1 0.995 0.995\n",
- " potted plant 4 1 0.814 1 0.995 0.895\n",
- "Speed: 0.3ms preprocess, 4.9ms inference, 0.0ms loss, 1.3ms postprocess per image\n",
+ " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:04<00:00, 4.75s/it]\n",
+ " all 4 17 0.57 0.85 0.847 0.632\n",
+ " person 3 10 0.557 0.6 0.585 0.272\n",
+ " dog 1 1 0.548 1 0.995 0.697\n",
+ " horse 1 2 0.531 1 0.995 0.674\n",
+ " elephant 1 2 0.371 0.5 0.516 0.256\n",
+ " umbrella 1 1 0.569 1 0.995 0.995\n",
+ " potted plant 1 1 0.847 1 0.995 0.895\n",
+ "Speed: 1.0ms preprocess, 73.8ms inference, 0.0ms loss, 561.4ms postprocess per image\n",
"Results saved to \u001b[1mruns/detect/val\u001b[0m\n",
"๐ก Learn more at https://docs.ultralytics.com/modes/val\n"
]
@@ -220,13 +227,13 @@
"\n",
"
\n",
"\n",
- "Train YOLOv8 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLOv8 Train Docs](https://docs.ultralytics.com/modes/train/) for more information."
+ "Train YOLO11 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLO11 Train Docs](https://docs.ultralytics.com/modes/train/) for more information."
]
},
{
"cell_type": "code",
"source": [
- "#@title Select YOLOv8 ๐ logger {run: 'auto'}\n",
+ "#@title Select YOLO11 ๐ logger {run: 'auto'}\n",
"logger = 'Comet' #@param ['Comet', 'TensorBoard']\n",
"\n",
"if logger == 'Comet':\n",
@@ -246,64 +253,62 @@
"cell_type": "code",
"metadata": {
"id": "1NcFxRcFdJ_O",
- "outputId": "9f60c6cb-fa9c-4785-cb7a-71d40abeaf38",
+ "outputId": "952f35f7-666f-4121-fbdf-2b3a33b28081",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"source": [
- "# Train YOLOv8n on COCO8 for 3 epochs\n",
- "!yolo train model=yolov8n.pt data=coco8.yaml epochs=3 imgsz=640"
+ "# Train YOLO11n on COCO8 for 3 epochs\n",
+ "!yolo train model=yolo11n.pt data=coco8.yaml epochs=3 imgsz=640"
],
- "execution_count": null,
+ "execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
- "\u001b[34m\u001b[1mengine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train\n",
+ "Ultralytics 8.3.2 ๐ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
+ "\u001b[34m\u001b[1mengine/trainer: \u001b[0mtask=detect, mode=train, model=yolo11n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train3\n",
"\n",
" from n params module arguments \n",
" 0 -1 1 464 ultralytics.nn.modules.conv.Conv [3, 16, 3, 2] \n",
" 1 -1 1 4672 ultralytics.nn.modules.conv.Conv [16, 32, 3, 2] \n",
- " 2 -1 1 7360 ultralytics.nn.modules.block.C2f [32, 32, 1, True] \n",
- " 3 -1 1 18560 ultralytics.nn.modules.conv.Conv [32, 64, 3, 2] \n",
- " 4 -1 2 49664 ultralytics.nn.modules.block.C2f [64, 64, 2, True] \n",
- " 5 -1 1 73984 ultralytics.nn.modules.conv.Conv [64, 128, 3, 2] \n",
- " 6 -1 2 197632 ultralytics.nn.modules.block.C2f [128, 128, 2, True] \n",
+ " 2 -1 1 6640 ultralytics.nn.modules.block.C3k2 [32, 64, 1, False, 0.25] \n",
+ " 3 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n",
+ " 4 -1 1 26080 ultralytics.nn.modules.block.C3k2 [64, 128, 1, False, 0.25] \n",
+ " 5 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n",
+ " 6 -1 1 87040 ultralytics.nn.modules.block.C3k2 [128, 128, 1, True] \n",
" 7 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] \n",
- " 8 -1 1 460288 ultralytics.nn.modules.block.C2f [256, 256, 1, True] \n",
+ " 8 -1 1 346112 ultralytics.nn.modules.block.C3k2 [256, 256, 1, True] \n",
" 9 -1 1 164608 ultralytics.nn.modules.block.SPPF [256, 256, 5] \n",
- " 10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
- " 11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
- " 12 -1 1 148224 ultralytics.nn.modules.block.C2f [384, 128, 1] \n",
- " 13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
- " 14 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
- " 15 -1 1 37248 ultralytics.nn.modules.block.C2f [192, 64, 1] \n",
- " 16 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n",
- " 17 [-1, 12] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
- " 18 -1 1 123648 ultralytics.nn.modules.block.C2f [192, 128, 1] \n",
- " 19 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n",
- " 20 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
- " 21 -1 1 493056 ultralytics.nn.modules.block.C2f [384, 256, 1] \n",
- " 22 [15, 18, 21] 1 897664 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]] \n",
- "Model summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs\n",
+ " 10 -1 1 249728 ultralytics.nn.modules.block.C2PSA [256, 256, 1] \n",
+ " 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
+ " 12 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
+ " 13 -1 1 111296 ultralytics.nn.modules.block.C3k2 [384, 128, 1, False] \n",
+ " 14 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
+ " 15 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
+ " 16 -1 1 32096 ultralytics.nn.modules.block.C3k2 [256, 64, 1, False] \n",
+ " 17 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n",
+ " 18 [-1, 13] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
+ " 19 -1 1 86720 ultralytics.nn.modules.block.C3k2 [192, 128, 1, False] \n",
+ " 20 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n",
+ " 21 [-1, 10] 1 0 ultralytics.nn.modules.conv.Concat [1] \n",
+ " 22 -1 1 378880 ultralytics.nn.modules.block.C3k2 [384, 256, 1, True] \n",
+ " 23 [16, 19, 22] 1 464912 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]] \n",
+ "YOLO11n summary: 319 layers, 2,624,080 parameters, 2,624,064 gradients, 6.6 GFLOPs\n",
"\n",
- "Transferred 355/355 items from pretrained weights\n",
+ "Transferred 499/499 items from pretrained weights\n",
"\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/detect/train', view at http://localhost:6006/\n",
- "Freezing layer 'model.22.dfl.conv.weight'\n",
- "\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...\n",
+ "Freezing layer 'model.23.dfl.conv.weight'\n",
+ "\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks with YOLO11n...\n",
"\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed โ \n",
- "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco8/labels/train... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 837.19it/s]\n",
- "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco8/labels/train.cache\n",
- "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
- "/usr/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
- " self.pid = os.fork()\n",
+ "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00, ?it/s]\n",
+ "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val.cache... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00, ?it/s]\n",
"Plotting labels to runs/detect/train/labels.jpg... \n",
"\u001b[34m\u001b[1moptimizer:\u001b[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... \n",
- "\u001b[34m\u001b[1moptimizer:\u001b[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)\n",
+ "\u001b[34m\u001b[1moptimizer:\u001b[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)\n",
"\u001b[34m\u001b[1mTensorBoard: \u001b[0mmodel graph visualization added โ \n",
"Image sizes 640 train, 640 val\n",
"Using 2 dataloader workers\n",
@@ -311,36 +316,36 @@
"Starting training for 3 epochs...\n",
"\n",
" Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n",
- " 1/3 0.81G 1.039 3.146 1.498 25 640: 100% 1/1 [00:01<00:00, 1.51s/it]\n",
- " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 2.32it/s]\n",
- " all 4 17 0.62 0.885 0.888 0.621\n",
+ " 1/3 0.719G 1.004 3.249 1.367 30 640: 100% 1/1 [00:00<00:00, 1.16it/s]\n",
+ " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 5.07it/s]\n",
+ " all 4 17 0.58 0.85 0.849 0.631\n",
"\n",
" Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n",
- " 2/3 0.772G 1.169 2.779 1.442 36 640: 100% 1/1 [00:00<00:00, 8.14it/s]\n",
- " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 3.22it/s]\n",
- " all 4 17 0.595 0.903 0.888 0.616\n",
+ " 2/3 0.715G 1.31 4.043 1.603 35 640: 100% 1/1 [00:00<00:00, 6.88it/s]\n",
+ " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 9.08it/s]\n",
+ " all 4 17 0.581 0.85 0.851 0.63\n",
"\n",
" Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size\n",
- " 3/3 0.776G 0.6701 3.697 1.096 17 640: 100% 1/1 [00:00<00:00, 6.45it/s]\n",
- " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 5.66it/s]\n",
- " all 4 17 0.577 0.833 0.874 0.614\n",
+ " 3/3 0.692G 1.134 3.174 1.599 18 640: 100% 1/1 [00:00<00:00, 6.75it/s]\n",
+ " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 11.60it/s]\n",
+ " all 4 17 0.582 0.85 0.855 0.632\n",
"\n",
- "3 epochs completed in 0.002 hours.\n",
- "Optimizer stripped from runs/detect/train/weights/last.pt, 6.5MB\n",
- "Optimizer stripped from runs/detect/train/weights/best.pt, 6.5MB\n",
+ "3 epochs completed in 0.003 hours.\n",
+ "Optimizer stripped from runs/detect/train/weights/last.pt, 5.5MB\n",
+ "Optimizer stripped from runs/detect/train/weights/best.pt, 5.5MB\n",
"\n",
"Validating runs/detect/train/weights/best.pt...\n",
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n",
- "Model summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs\n",
- " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 18.23it/s]\n",
- " all 4 17 0.617 0.884 0.888 0.622\n",
- " person 4 10 0.67 0.5 0.52 0.278\n",
- " dog 4 1 0.361 1 0.995 0.597\n",
- " horse 4 2 0.728 1 0.995 0.631\n",
- " elephant 4 2 0.602 0.805 0.828 0.332\n",
- " umbrella 4 1 0.553 1 0.995 0.995\n",
- " potted plant 4 1 0.789 1 0.995 0.895\n",
- "Speed: 0.3ms preprocess, 4.1ms inference, 0.0ms loss, 1.2ms postprocess per image\n",
+ "Ultralytics 8.3.2 ๐ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n",
+ "YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
+ " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:00<00:00, 23.42it/s]\n",
+ " all 4 17 0.579 0.85 0.855 0.615\n",
+ " person 3 10 0.579 0.6 0.623 0.268\n",
+ " dog 1 1 0.549 1 0.995 0.697\n",
+ " horse 1 2 0.553 1 0.995 0.675\n",
+ " elephant 1 2 0.364 0.5 0.528 0.261\n",
+ " umbrella 1 1 0.571 1 0.995 0.895\n",
+ " potted plant 1 1 0.857 1 0.995 0.895\n",
+ "Speed: 0.2ms preprocess, 4.3ms inference, 0.0ms loss, 1.2ms postprocess per image\n",
"Results saved to \u001b[1mruns/detect/train\u001b[0m\n",
"๐ก Learn more at https://docs.ultralytics.com/modes/train\n"
]
@@ -352,26 +357,29 @@
"source": [
"# 4. Export\n",
"\n",
- "Export a YOLOv8 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLOv8 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.\n",
+ "Export a YOLO11 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLO11 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.\n",
"\n",
"- ๐ก ProTip: Export to [ONNX](https://docs.ultralytics.com/integrations/onnx/) or [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) for up to 3x CPU speedup. \n",
"- ๐ก ProTip: Export to [TensorRT](https://docs.ultralytics.com/integrations/tensorrt/) for up to 5x GPU speedup.\n",
"\n",
"| Format | `format` Argument | Model | Metadata | Arguments |\n",
"|--------------------------------------------------------------------------|-------------------|---------------------------|----------|----------------------------------------------------------------------|\n",
- "| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | โ | - |\n",
- "| [TorchScript](https://docs.ultralytics.com/integrations/torchscript) | `torchscript` | `yolov8n.torchscript` | โ | `imgsz`, `optimize`, `batch` |\n",
- "| [ONNX](https://docs.ultralytics.com/integrations/onnx) | `onnx` | `yolov8n.onnx` | โ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |\n",
- "| [OpenVINO](https://docs.ultralytics.com/integrations/openvino) | `openvino` | `yolov8n_openvino_model/` | โ | `imgsz`, `half`, `int8`, `batch` |\n",
- "| [TensorRT](https://docs.ultralytics.com/integrations/tensorrt) | `engine` | `yolov8n.engine` | โ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |\n",
- "| [CoreML](https://docs.ultralytics.com/integrations/coreml) | `coreml` | `yolov8n.mlpackage` | โ | `imgsz`, `half`, `int8`, `nms`, `batch` |\n",
- "| [TF SavedModel](https://docs.ultralytics.com/integrations/tf-savedmodel) | `saved_model` | `yolov8n_saved_model/` | โ | `imgsz`, `keras`, `int8`, `batch` |\n",
- "| [TF GraphDef](https://docs.ultralytics.com/integrations/tf-graphdef) | `pb` | `yolov8n.pb` | โ | `imgsz`, `batch` |\n",
- "| [TF Lite](https://docs.ultralytics.com/integrations/tflite) | `tflite` | `yolov8n.tflite` | โ | `imgsz`, `half`, `int8`, `batch` |\n",
- "| [TF Edge TPU](https://docs.ultralytics.com/integrations/edge-tpu) | `edgetpu` | `yolov8n_edgetpu.tflite` | โ | `imgsz`, `batch` |\n",
- "| [TF.js](https://docs.ultralytics.com/integrations/tfjs) | `tfjs` | `yolov8n_web_model/` | โ | `imgsz`, `half`, `int8`, `batch` |\n",
- "| [PaddlePaddle](https://docs.ultralytics.com/integrations/paddlepaddle) | `paddle` | `yolov8n_paddle_model/` | โ | `imgsz`, `batch` |\n",
- "| [NCNN](https://docs.ultralytics.com/integrations/ncnn) | `ncnn` | `yolov8n_ncnn_model/` | โ | `imgsz`, `half`, `batch` |"
+ "| [PyTorch](https://pytorch.org/) | - | `yolo11n.pt` | โ | - |\n",
+ "| [TorchScript](https://docs.ultralytics.com/integrations/torchscript) | `torchscript` | `yolo11n.torchscript` | โ | `imgsz`, `optimize`, `batch` |\n",
+ "| [ONNX](https://docs.ultralytics.com/integrations/onnx) | `onnx` | `yolo11n.onnx` | โ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |\n",
+ "| [OpenVINO](https://docs.ultralytics.com/integrations/openvino) | `openvino` | `yolo11n_openvino_model/` | โ | `imgsz`, `half`, `dynamic`, `int8`, `batch`, `data` |\n",
+ "| [TensorRT](https://docs.ultralytics.com/integrations/tensorrt) | `engine` | `yolo11n.engine` | โ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch`, `data` |\n",
+ "| [CoreML](https://docs.ultralytics.com/integrations/coreml) | `coreml` | `yolo11n.mlpackage` | โ | `imgsz`, `half`, `int8`, `nms`, `batch` |\n",
+ "| [TF SavedModel](https://docs.ultralytics.com/integrations/tf-savedmodel) | `saved_model` | `yolo11n_saved_model/` | โ | `imgsz`, `keras`, `int8`, `batch` |\n",
+ "| [TF GraphDef](https://docs.ultralytics.com/integrations/tf-graphdef) | `pb` | `yolo11n.pb` | โ | `imgsz`, `batch` |\n",
+ "| [TF Lite](https://docs.ultralytics.com/integrations/tflite) | `tflite` | `yolo11n.tflite` | โ | `imgsz`, `half`, `int8`, `batch`, `data` |\n",
+ "| [TF Edge TPU](https://docs.ultralytics.com/integrations/edge-tpu) | `edgetpu` | `yolo11n_edgetpu.tflite` | โ | `imgsz` |\n",
+ "| [TF.js](https://docs.ultralytics.com/integrations/tfjs) | `tfjs` | `yolo11n_web_model/` | โ | `imgsz`, `half`, `int8`, `batch` |\n",
+ "| [PaddlePaddle](https://docs.ultralytics.com/integrations/paddlepaddle) | `paddle` | `yolo11n_paddle_model/` | โ | `imgsz`, `batch` |\n",
+ "| [MNN](https://docs.ultralytics.com/integrations/mnn) | `mnn` | `yolo11n.mnn` | โ | `imgsz`, `batch`, `int8`, `half` |\n",
+ "| [NCNN](https://docs.ultralytics.com/integrations/ncnn) | `ncnn` | `yolo11n_ncnn_model/` | โ | `imgsz`, `half`, `batch` |\n",
+ "| [IMX500](https://docs.ultralytics.com/integrations/sony-imx500) | `imx` | `yolov8n_imx_model/` | โ | `imgsz`, `int8`, `data` |\n",
+ "| [RKNN](https://docs.ultralytics.com/integrations/rockchip-rknn) | `rknn` | `yolo11n_rknn_model/` | โ | `imgsz`, `batch`, `name` |"
],
"metadata": {
"id": "nPZZeNrLCQG6"
@@ -380,33 +388,33 @@
{
"cell_type": "code",
"source": [
- "!yolo export model=yolov8n.pt format=torchscript"
+ "!yolo export model=yolo11n.pt format=torchscript"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CYIjW4igCjqD",
- "outputId": "947e65cc-79c8-4713-bfd4-3139903ac05a"
+ "outputId": "5357fa04-6749-4508-effe-8d4078533539"
},
- "execution_count": null,
+ "execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Ultralytics YOLOv8.2.3 ๐ Python-3.10.12 torch-2.2.1+cu121 CPU (Intel Xeon 2.00GHz)\n",
- "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs\n",
+ "Ultralytics 8.3.2 ๐ Python-3.10.12 torch-2.4.1+cu121 CPU (Intel Xeon 2.20GHz)\n",
+ "YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n",
"\n",
- "\u001b[34m\u001b[1mPyTorch:\u001b[0m starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)\n",
+ "\u001b[34m\u001b[1mPyTorch:\u001b[0m starting from 'yolo11n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (5.4 MB)\n",
"\n",
- "\u001b[34m\u001b[1mTorchScript:\u001b[0m starting export with torch 2.2.1+cu121...\n",
- "\u001b[34m\u001b[1mTorchScript:\u001b[0m export success โ 2.0s, saved as 'yolov8n.torchscript' (12.4 MB)\n",
+ "\u001b[34m\u001b[1mTorchScript:\u001b[0m starting export with torch 2.4.1+cu121...\n",
+ "\u001b[34m\u001b[1mTorchScript:\u001b[0m export success โ 2.4s, saved as 'yolo11n.torchscript' (10.5 MB)\n",
"\n",
- "Export complete (4.0s)\n",
+ "Export complete (4.2s)\n",
"Results saved to \u001b[1m/content\u001b[0m\n",
- "Predict: yolo predict task=detect model=yolov8n.torchscript imgsz=640 \n",
- "Validate: yolo val task=detect model=yolov8n.torchscript imgsz=640 data=coco.yaml \n",
+ "Predict: yolo predict task=detect model=yolo11n.torchscript imgsz=640 \n",
+ "Validate: yolo val task=detect model=yolo11n.torchscript imgsz=640 data=coco.yaml \n",
"Visualize: https://netron.app\n",
"๐ก Learn more at https://docs.ultralytics.com/modes/export\n"
]
@@ -418,7 +426,7 @@
"source": [
"# 5. Python Usage\n",
"\n",
- "YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See detailed Python usage examples in the [YOLOv8 Python Docs](https://docs.ultralytics.com/usage/python/)."
+ "YOLO11 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLO11 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See detailed Python usage examples in the [YOLO11 Python Docs](https://docs.ultralytics.com/usage/python/)."
],
"metadata": {
"id": "kUMOQ0OeDBJG"
@@ -430,8 +438,8 @@
"from ultralytics import YOLO\n",
"\n",
"# Load a model\n",
- "model = YOLO('yolov8n.yaml') # build a new model from scratch\n",
- "model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)\n",
+ "model = YOLO('yolo11n.yaml') # build a new model from scratch\n",
+ "model = YOLO('yolo11n.pt') # load a pretrained model (recommended for training)\n",
"\n",
"# Use the model\n",
"results = model.train(data='coco8.yaml', epochs=3) # train the model\n",
@@ -450,7 +458,7 @@
"source": [
"# 6. Tasks\n",
"\n",
- "YOLOv8 can train, val, predict and export models for the most common tasks in vision AI: [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/). See [YOLOv8 Tasks Docs](https://docs.ultralytics.com/tasks/) for more information.\n",
+ "YOLO11 can train, val, predict and export models for the most common tasks in vision AI: [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/). See [YOLO11 Tasks Docs](https://docs.ultralytics.com/tasks/) for more information.\n",
"\n",
" \n"
],
@@ -463,7 +471,7 @@
"source": [
"## 1. Detection\n",
"\n",
- "YOLOv8 _detection_ models have no suffix and are the default YOLOv8 models, i.e. `yolov8n.pt` and are pretrained on COCO. See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for full details.\n"
+ "YOLO11 _detection_ models have no suffix and are the default YOLO11 models, i.e. `yolo11n.pt` and are pretrained on COCO. See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for full details.\n"
],
"metadata": {
"id": "yq26lwpYK1lq"
@@ -472,10 +480,10 @@
{
"cell_type": "code",
"source": [
- "# Load YOLOv8n, train it on COCO128 for 3 epochs and predict an image with it\n",
+ "# Load YOLO11n, train it on COCO128 for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
- "model = YOLO('yolov8n.pt') # load a pretrained YOLOv8n detection model\n",
+ "model = YOLO('yolo11n.pt') # load a pretrained YOLO detection model\n",
"model.train(data='coco8.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
@@ -490,7 +498,7 @@
"source": [
"## 2. Segmentation\n",
"\n",
- "YOLOv8 _segmentation_ models use the `-seg` suffix, i.e. `yolov8n-seg.pt` and are pretrained on COCO. See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for full details.\n"
+ "YOLO11 _segmentation_ models use the `-seg` suffix, i.e. `yolo11n-seg.pt` and are pretrained on COCO. See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for full details.\n"
],
"metadata": {
"id": "7ZW58jUzK66B"
@@ -499,10 +507,10 @@
{
"cell_type": "code",
"source": [
- "# Load YOLOv8n-seg, train it on COCO128-seg for 3 epochs and predict an image with it\n",
+ "# Load YOLO11n-seg, train it on COCO128-seg for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
- "model = YOLO('yolov8n-seg.pt') # load a pretrained YOLOv8n segmentation model\n",
+ "model = YOLO('yolo11n-seg.pt') # load a pretrained YOLO segmentation model\n",
"model.train(data='coco8-seg.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
@@ -517,7 +525,7 @@
"source": [
"## 3. Classification\n",
"\n",
- "YOLOv8 _classification_ models use the `-cls` suffix, i.e. `yolov8n-cls.pt` and are pretrained on ImageNet. See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for full details.\n"
+ "YOLO11 _classification_ models use the `-cls` suffix, i.e. `yolo11n-cls.pt` and are pretrained on ImageNet. See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for full details.\n"
],
"metadata": {
"id": "ax3p94VNK9zR"
@@ -526,10 +534,10 @@
{
"cell_type": "code",
"source": [
- "# Load YOLOv8n-cls, train it on mnist160 for 3 epochs and predict an image with it\n",
+ "# Load YOLO11n-cls, train it on mnist160 for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
- "model = YOLO('yolov8n-cls.pt') # load a pretrained YOLOv8n classification model\n",
+ "model = YOLO('yolo11n-cls.pt') # load a pretrained YOLO classification model\n",
"model.train(data='mnist160', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
@@ -544,7 +552,7 @@
"source": [
"## 4. Pose\n",
"\n",
- "YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details."
+ "YOLO11 _pose_ models use the `-pose` suffix, i.e. `yolo11n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details."
],
"metadata": {
"id": "SpIaFLiO11TG"
@@ -553,10 +561,10 @@
{
"cell_type": "code",
"source": [
- "# Load YOLOv8n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n",
+ "# Load YOLO11n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
- "model = YOLO('yolov8n-pose.pt') # load a pretrained YOLOv8n pose model\n",
+ "model = YOLO('yolo11n-pose.pt') # load a pretrained YOLO pose model\n",
"model.train(data='coco8-pose.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
],
@@ -571,7 +579,7 @@
"source": [
"## 4. Oriented Bounding Boxes (OBB)\n",
"\n",
- "YOLOv8 _OBB_ models use the `-obb` suffix, i.e. `yolov8n-obb.pt` and are pretrained on the DOTA dataset. See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for full details."
+ "YOLO11 _OBB_ models use the `-obb` suffix, i.e. `yolo11n-obb.pt` and are pretrained on the DOTA dataset. See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for full details."
],
"metadata": {
"id": "cf5j_T9-B5F0"
@@ -580,12 +588,12 @@
{
"cell_type": "code",
"source": [
- "# Load YOLOv8n-obb, train it on DOTA8 for 3 epochs and predict an image with it\n",
+ "# Load YOLO11n-obb, train it on DOTA8 for 3 epochs and predict an image with it\n",
"from ultralytics import YOLO\n",
"\n",
- "model = YOLO('yolov8n-obb.pt') # load a pretrained YOLOv8n OBB model\n",
- "model.train(data='coco8-dota.yaml', epochs=3) # train the model\n",
- "model('https://ultralytics.com/images/bus.jpg') # predict on an image"
+ "model = YOLO('yolo11n-obb.pt') # load a pretrained YOLO OBB model\n",
+ "model.train(data='dota8.yaml', epochs=3) # train the model\n",
+ "model('https://ultralytics.com/images/boats.jpg') # predict on an image"
],
"metadata": {
"id": "IJNKClOOB5YS"
@@ -646,7 +654,7 @@
"source": [
"# Validate multiple models\n",
"for x in 'nsmlx':\n",
- " !yolo val model=yolov8{x}.pt data=coco.yaml"
+ " !yolo val model=yolo11{x}.pt data=coco.yaml"
],
"metadata": {
"id": "Wdc6t_bfzDDk"
diff --git a/mkdocs.yml b/mkdocs.yml
index ee6a25d21b5..3e735dfb466 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
# Configuration file for building the Ultralytics YOLO documentation site using MkDocs.
# Provides settings to control site metadata, customize the appearance using the
@@ -15,6 +15,7 @@ repo_name: ultralytics/ultralytics
remote_name: https://github.com/ultralytics/docs
docs_dir: "docs/en/" # where to find the markdown files
site_dir: "site/" # where to publish to
+use_directory_urls: true # don't display 'index.html' in slugs
# Theme customization
theme:
@@ -22,7 +23,7 @@ theme:
language: en
custom_dir: docs/overrides/
logo: https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Reverse.svg
- favicon: assets/favicon.ico
+ favicon: https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/logo/favicon-yolo.png
icon:
repo: fontawesome/brands/github
# font: # disabled for faster page load times
@@ -69,8 +70,9 @@ theme:
- content.tabs.link # all code tabs change simultaneously
# Customization
-copyright: ยฉ 2024 Ultralytics Inc. All rights reserved.
+copyright: ยฉ 2025 Ultralytics Inc. All rights reserved.
extra: # version:
+ homepage: https://www.ultralytics.com/
# provider: mike # version drop-down menu
robots: robots.txt
analytics:
@@ -90,14 +92,16 @@ extra: # version:
- icon: fontawesome/brands/python
link: https://pypi.org/project/ultralytics/
- icon: fontawesome/brands/discord
- link: https://ultralytics.com/discord
+ link: https://discord.com/invite/ultralytics
- icon: fontawesome/brands/reddit
link: https://reddit.com/r/ultralytics
extra_css:
- stylesheets/style.css
+
extra_javascript:
- javascript/extra.js
+ - javascript/giscus.js
markdown_extensions:
- admonition
@@ -135,6 +139,10 @@ validation:
unrecognized_links: warn
# Primary navigation ---------------------------------------------------------------------------------------------------
+not_in_nav: |
+ /compare
+ /macros
+
nav:
- Home:
- Home: index.md
@@ -162,9 +170,7 @@ nav:
- solutions/index.md
- Guides:
- guides/index.md
- - Explorer:
- - datasets/explorer/index.md
- - NEW ๐ Live Inference: guides/streamlit-live-inference.md # for promotion of new pages
+ - YOLO11 ๐ NEW: models/yolo11.md # for promotion of new pages
- Languages:
- ๐ฌ๐ง  English: https://ultralytics.com/docs/
- ๐จ๐ณ  ็ฎไฝไธญๆ: https://docs.ultralytics.com/zh/
@@ -251,7 +257,7 @@ nav:
- YOLOv8: models/yolov8.md
- YOLOv9: models/yolov9.md
- YOLOv10: models/yolov10.md
- - YOLO11: models/yolo11.md
+ - YOLO11 ๐ NEW: models/yolo11.md
- SAM (Segment Anything Model): models/sam.md
- SAM 2 (Segment Anything Model 2): models/sam-2.md
- MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md
@@ -261,11 +267,6 @@ nav:
- YOLO-World (Real-Time Open-Vocabulary Object Detection): models/yolo-world.md
- Datasets:
- datasets/index.md
- - Explorer:
- - datasets/explorer/index.md
- - Explorer API: datasets/explorer/api.md
- - Explorer Dashboard: datasets/explorer/dashboard.md
- - VOC Exploration Example: datasets/explorer/explorer.ipynb
- Detection:
- datasets/detect/index.md
- Argoverse: datasets/detect/argoverse.md
@@ -279,10 +280,11 @@ nav:
- VisDrone: datasets/detect/visdrone.md
- VOC: datasets/detect/voc.md
- xView: datasets/detect/xview.md
- - Roboflow 100: datasets/detect/roboflow-100.md
+ - RF100: datasets/detect/roboflow-100.md
- Brain-tumor: datasets/detect/brain-tumor.md
- African-wildlife: datasets/detect/african-wildlife.md
- Signature: datasets/detect/signature.md
+ - Medical-pills: datasets/detect/medical-pills.md
- Segmentation:
- datasets/segment/index.md
- COCO: datasets/segment/coco.md
@@ -296,6 +298,7 @@ nav:
- COCO8-pose: datasets/pose/coco8-pose.md
- Tiger-pose: datasets/pose/tiger-pose.md
- Hand-keypoints: datasets/pose/hand-keypoints.md
+ - Dog-pose: datasets/pose/dog-pose.md
- Classification:
- datasets/classify/index.md
- Caltech 101: datasets/classify/caltech101.md
@@ -314,9 +317,8 @@ nav:
- DOTA8: datasets/obb/dota8.md
- Multi-Object Tracking:
- datasets/track/index.md
- - NEW ๐ Solutions:
+ - Solutions ๐ NEW:
- solutions/index.md
- - Analytics: guides/analytics.md
- Object Counting: guides/object-counting.md
- Object Cropping: guides/object-cropping.md
- Object Blurring: guides/object-blurring.md
@@ -330,7 +332,9 @@ nav:
- Distance Calculation: guides/distance-calculation.md
- Queue Management: guides/queue-management.md
- Parking Management: guides/parking-management.md
- - NEW ๐ Live Inference: guides/streamlit-live-inference.md
+ - Analytics: guides/analytics.md
+ - Live Inference: guides/streamlit-live-inference.md
+ - Track Objects in Zone ๐ NEW: guides/trackzone.md
- Guides:
- guides/index.md
- YOLO Common Issues: guides/yolo-common-issues.md
@@ -364,7 +368,7 @@ nav:
- datasets/explorer/index.md
- Explorer API: datasets/explorer/api.md
- Explorer Dashboard Demo: datasets/explorer/dashboard.md
- - VOC Exploration Example: datasets/explorer/explorer.ipynb
+ - VOC Exploration Example: datasets/explorer/explorer.md
- YOLOv5:
- yolov5/index.md
- Quickstart: yolov5/quickstart_tutorial.md
@@ -391,35 +395,40 @@ nav:
- Clearml Logging: yolov5/tutorials/clearml_logging_integration.md
- Integrations:
- integrations/index.md
- - TorchScript: integrations/torchscript.md
+ - Amazon SageMaker: integrations/amazon-sagemaker.md
+ - ClearML: integrations/clearml.md
+ - Comet ML: integrations/comet.md
+ - CoreML: integrations/coreml.md
+ - DVC: integrations/dvc.md
+ - Google Colab: integrations/google-colab.md
+ - Gradio: integrations/gradio.md
+ - IBM Watsonx: integrations/ibm-watsonx.md
+ - JupyterLab: integrations/jupyterlab.md
+ - Kaggle: integrations/kaggle.md
+ - MLflow: integrations/mlflow.md
+ - Neural Magic: integrations/neural-magic.md
- ONNX: integrations/onnx.md
- OpenVINO: integrations/openvino.md
- - TensorRT: integrations/tensorrt.md
- - CoreML: integrations/coreml.md
- - TF SavedModel: integrations/tf-savedmodel.md
- - TF GraphDef: integrations/tf-graphdef.md
- - TFLite: integrations/tflite.md
- - TFLite Edge TPU: integrations/edge-tpu.md
- - TF.js: integrations/tfjs.md
- PaddlePaddle: integrations/paddlepaddle.md
+ - MNN: integrations/mnn.md
- NCNN: integrations/ncnn.md
- - Comet ML: integrations/comet.md
+ - Paperspace Gradient: integrations/paperspace.md
- Ray Tune: integrations/ray-tune.md
- Roboflow: integrations/roboflow.md
- - MLflow: integrations/mlflow.md
- - ClearML: integrations/clearml.md
- - DVC: integrations/dvc.md
- - Weights & Biases: integrations/weights-biases.md
- - Neural Magic: integrations/neural-magic.md
- - Gradio: integrations/gradio.md
+ - TF GraphDef: integrations/tf-graphdef.md
+ - TF SavedModel: integrations/tf-savedmodel.md
+ - TF.js: integrations/tfjs.md
+ - TFLite: integrations/tflite.md
+ - TFLite Edge TPU: integrations/edge-tpu.md
- TensorBoard: integrations/tensorboard.md
- - Amazon SageMaker: integrations/amazon-sagemaker.md
- - Paperspace Gradient: integrations/paperspace.md
- - Google Colab: integrations/google-colab.md
- - Kaggle: integrations/kaggle.md
- - JupyterLab: integrations/jupyterlab.md
- - IBM Watsonx: integrations/ibm-watsonx.md
+ - TensorRT: integrations/tensorrt.md
+ - TorchScript: integrations/torchscript.md
- VS Code: integrations/vscode.md
+ - Weights & Biases: integrations/weights-biases.md
+ - Albumentations: integrations/albumentations.md
+ - SONY IMX500: integrations/sony-imx500.md
+ - Rockchip RKNN: integrations/rockchip-rknn.md
+ - Seeed Studio reCamera: integrations/seeedstudio-recamera.md
- HUB:
- hub/index.md
- Web:
@@ -475,11 +484,6 @@ nav:
- build: reference/data/build.md
- converter: reference/data/converter.md
- dataset: reference/data/dataset.md
- - explorer:
- - explorer: reference/data/explorer/explorer.md
- - gui:
- - dash: reference/data/explorer/gui/dash.md
- - utils: reference/data/explorer/utils.md
- loaders: reference/data/loaders.md
- split_dota: reference/data/split_dota.md
- utils: reference/data/utils.md
@@ -573,8 +577,12 @@ nav:
- object_counter: reference/solutions/object_counter.md
- parking_management: reference/solutions/parking_management.md
- queue_management: reference/solutions/queue_management.md
+ - region_counter: reference/solutions/region_counter.md
+ - security_alarm: reference/solutions/security_alarm.md
+ - solutions: reference/solutions/solutions.md
- speed_estimation: reference/solutions/speed_estimation.md
- streamlit_inference: reference/solutions/streamlit_inference.md
+ - trackzone: reference/solutions/trackzone.md
- trackers:
- basetrack: reference/trackers/basetrack.md
- bot_sort: reference/trackers/bot_sort.md
@@ -621,8 +629,8 @@ nav:
- Contributing Guide: help/contributing.md
- Continuous Integration (CI) Guide: help/CI.md
- Contributor License Agreement (CLA): help/CLA.md
- - Minimum Reproducible Example (MRE) Guide: help/minimum_reproducible_example.md
- - Code of Conduct: help/code_of_conduct.md
+ - Minimum Reproducible Example (MRE) Guide: help/minimum-reproducible-example.md
+ - Code of Conduct: help/code-of-conduct.md
- Environmental, Health and Safety (EHS) Policy: help/environmental-health-safety.md
- Security Policy: help/security.md
- Privacy Policy: help/privacy.md
@@ -630,8 +638,8 @@ nav:
# Plugins including 301 redirects navigation ---------------------------------------------------------------------------
plugins:
- macros
- - search:
- lang: en
+ # - search:
+ # lang: en
- mkdocstrings:
enabled: true
default_handler: python
@@ -658,7 +666,7 @@ plugins:
add_share_buttons: True
add_css: False
default_image: https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png
- - mkdocs-jupyter
+ default_author: glenn.jocher@ultralytics.com
- redirects:
redirect_maps:
hi/index.md: index.md
@@ -697,6 +705,8 @@ plugins:
tasks/keypoints.md: tasks/pose.md
tasks/tracking.md: modes/track.md
SECURITY.md: help/security.md
+ help/minimum_reproducible_example.md: help/minimum-reproducible-example.md
+ help/code_of_conduct.md: help/code-of-conduct.md
tutorials/architecture-summary.md: yolov5/tutorials/architecture_description.md
tutorials/clearml-logging.md: yolov5/tutorials/clearml_logging_integration.md
tutorials/comet-logging.md: yolov5/tutorials/comet_logging_integration.md
@@ -760,3 +770,6 @@ plugins:
yolov5/environments/yolov5_amazon_web_services_quickstart_tutorial.md: yolov5/environments/aws_quickstart_tutorial.md
yolov5/environments/yolov5_google_cloud_platform_quickstart_tutorial.md: yolov5/environments/google_cloud_quickstart_tutorial.md
yolov5/environments/yolov5_docker_image_quickstart_tutorial.md: yolov5/environments/docker_image_quickstart_tutorial.md
+ reference/data/explorer/explorer.md: datasets/explorer/index.md
+ reference/data/explorer/gui/dash.md: datasets/explorer/index.md
+ reference/data/explorer/utils.md: datasets/explorer/index.md
diff --git a/pyproject.toml b/pyproject.toml
index a59b15ef04f..1d1cc60312d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
# Overview:
# This pyproject.toml file manages the build, packaging, and distribution of the Ultralytics library.
@@ -19,25 +19,24 @@
# For comprehensive documentation and usage instructions, visit: https://docs.ultralytics.com
[build-system]
-requires = ["setuptools>=57.0.0", "wheel"]
+requires = ["setuptools>=70.0.0", "wheel"]
build-backend = "setuptools.build_meta"
# Project settings -----------------------------------------------------------------------------------------------------
[project]
name = "ultralytics"
dynamic = ["version"]
-description = "Ultralytics YOLO for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification."
+description = "Ultralytics YOLO ๐ for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification."
readme = "README.md"
requires-python = ">=3.8"
license = { "text" = "AGPL-3.0" }
keywords = ["machine-learning", "deep-learning", "computer-vision", "ML", "DL", "AI", "YOLO", "YOLOv3", "YOLOv5", "YOLOv8", "YOLOv9", "YOLOv10", "YOLO11", "HUB", "Ultralytics"]
authors = [
- { name = "Glenn Jocher", email = "glenn.jocher@ultralytics.com"},
- { name = "Jing Qiu", email = "jing.qiu@ultralytics.com"},
- { name = "Ayush Chaurasia" }
+ { name = "Glenn Jocher", email = "glenn.jocher@ultralytics.com" },
+ { name = "Jing Qiu", email = "jing.qiu@ultralytics.com" },
]
maintainers = [
- { name = "Ultralytics", email = "hello@ultralytics.com" }
+ { name = "Ultralytics", email = "hello@ultralytics.com" },
]
classifiers = [
"Development Status :: 4 - Beta",
@@ -62,7 +61,7 @@ classifiers = [
# Required dependencies ------------------------------------------------------------------------------------------------
dependencies = [
- "numpy>=1.23.0,<2.0.0", # temporary patch for compat errors https://github.com/ultralytics/yolov5/actions/runs/9538130424/job/26286956354
+ "numpy>=1.23.0,<=2.1.1", # OpenVINO and TFLite errors on '--slow' CI Tests https://github.com/ultralytics/ultralytics/pull/18943
"matplotlib>=3.3.0",
"opencv-python>=4.6.0",
"pillow>=7.1.2",
@@ -70,7 +69,7 @@ dependencies = [
"requests>=2.23.0",
"scipy>=1.4.1",
"torch>=1.8.0",
- "torch>=1.8.0,!=2.4.0; sys_platform == 'win32'", # Windows CPU errors w/ 2.4.0 https://github.com/ultralytics/ultralytics/issues/15049
+ "torch>=1.8.0,!=2.4.0; sys_platform == 'win32'", # Windows CPU errors w/ 2.4.0 https://github.com/ultralytics/ultralytics/issues/15049
"torchvision>=0.9.0",
"tqdm>=4.64.0", # progress bars
"psutil", # system utilization
@@ -88,29 +87,28 @@ dev = [
"pytest-cov",
"coverage[toml]",
"mkdocs>=1.6.0",
+ "beautifulsoup4<=4.12.3", # For docs https://github.com/ultralytics/ultralytics/pull/19067
"mkdocs-material>=9.5.9",
"mkdocstrings[python]",
- "mkdocs-jupyter", # notebooks
"mkdocs-redirects", # 301 redirects
- "mkdocs-ultralytics-plugin>=0.1.8", # for meta descriptions and images, dates and authors
+ "mkdocs-ultralytics-plugin>=0.1.16", # for meta descriptions and images, dates and authors
"mkdocs-macros-plugin>=1.0.5" # duplicating content (i.e. export tables) in multiple places
]
export = [
"onnx>=1.12.0", # ONNX export
"coremltools>=7.0; platform_system != 'Windows' and python_version <= '3.11'", # CoreML supported on macOS and Linux
+ "scikit-learn>=1.3.2; platform_system != 'Windows' and python_version <= '3.11'", # CoreML k-means quantization
"openvino>=2024.0.0", # OpenVINO export
"tensorflow>=2.0.0", # TF bug https://github.com/ultralytics/ultralytics/issues/5161
"tensorflowjs>=3.9.0", # TF.js export, automatically installs tensorflow
- "tensorstore>=0.1.63; platform_machine == 'aarch64' and python_version >= '3.9'", # for TF Raspberry Pi exports
- "keras", # not installed automatically by tensorflow>=2.16
+ "tensorstore>=0.1.63; platform_machine == 'aarch64' and python_version >= '3.9'", # for TF Raspberry Pi exports
+ "keras", # not installed automatically by tensorflow>=2.16
"flatbuffers>=23.5.26,<100; platform_machine == 'aarch64'", # update old 'flatbuffers' included inside tensorflow package
- "numpy==1.23.5; platform_machine == 'aarch64'", # fix error: `np.bool` was a deprecated alias for the builtin `bool` when using TensorRT models on NVIDIA Jetson
"h5py!=3.11.0; platform_machine == 'aarch64'", # fix h5py build issues due to missing aarch64 wheels in 3.11 release
]
-explorer = [
- "lancedb", # vector search
- "duckdb<=0.9.2", # SQL queries, duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181
- "streamlit", # visualizing with GUI
+solutions = [
+ "shapely>=2.0.0", # shapely for point and polygon data matching
+ "streamlit", # for live inference on web browser i.e `yolo streamlit-predict`
]
logging = [
"comet", # https://docs.ultralytics.com/integrations/comet/
@@ -129,7 +127,7 @@ extra = [
"Source" = "https://github.com/ultralytics/ultralytics"
"Documentation" = "https://docs.ultralytics.com"
"Bug Reports" = "https://github.com/ultralytics/ultralytics/issues"
-"Changelog" = "https://github.com/ultralytics/ultralytics/releases"
+"Changelog" = "https://github.com/ultralytics/ultralytics/releases"
[project.scripts]
yolo = "ultralytics.cfg:entrypoint"
diff --git a/tests/__init__.py b/tests/__init__.py
index ea8afff5a80..9e86aa3c593 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import ASSETS, ROOT, WEIGHTS_DIR, checks
@@ -17,7 +17,6 @@
"SOURCE",
"SOURCES_LIST",
"TMP",
- "IS_TMP_WRITEABLE",
"CUDA_IS_AVAILABLE",
"CUDA_DEVICE_COUNT",
)
diff --git a/tests/conftest.py b/tests/conftest.py
index 7b0539b467f..8703d81fce7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import shutil
from pathlib import Path
@@ -74,10 +74,10 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config):
# Remove files
models = [path for x in ["*.onnx", "*.torchscript"] for path in WEIGHTS_DIR.rglob(x)]
- for file in ["bus.jpg", "yolo11n.onnx", "yolo11n.torchscript"] + models:
+ for file in ["decelera_portrait_min.mov", "bus.jpg", "yolo11n.onnx", "yolo11n.torchscript"] + models:
Path(file).unlink(missing_ok=True)
# Remove directories
models = [path for x in ["*.mlpackage", "*_openvino_model"] for path in WEIGHTS_DIR.rglob(x)]
- for directory in [TMP.parents[1] / ".pytest_cache", TMP] + models:
+ for directory in [WEIGHTS_DIR / "path with spaces", TMP.parents[1] / ".pytest_cache", TMP] + models:
shutil.rmtree(directory, ignore_errors=True)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3eadf3c24e3..aab6d8b4ac7 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import subprocess
@@ -59,7 +59,8 @@ def test_rtdetr(task="detect", model="yolov8n-rtdetr.yaml", data="coco8.yaml"):
run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk fraction=0.25")
run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
if TORCH_1_9:
- run(f"yolo predict {task} model='rtdetr-l.pt' source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
+ weights = WEIGHTS_DIR / "rtdetr-l.pt"
+ run(f"yolo predict {task} model={weights} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="MobileSAM with CLIP is not supported in Python 3.12")
@@ -97,9 +98,12 @@ def test_mobilesam():
# Source
source = ASSETS / "zidane.jpg"
- # Predict a segment based on a point prompt
+ # Predict a segment based on a 1D point prompt and 1D labels.
model.predict(source, points=[900, 370], labels=[1])
+ # Predict a segment based on 3D points and 2D labels (multiple points per object).
+ model.predict(source, points=[[[900, 370], [1000, 100]]], labels=[[1, 1]])
+
# Predict a segment based on a box prompt
model.predict(source, bboxes=[439, 437, 524, 709], save=True)
diff --git a/tests/test_cuda.py b/tests/test_cuda.py
index 0b3429d0565..d94f95bd06b 100644
--- a/tests/test_cuda.py
+++ b/tests/test_cuda.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from itertools import product
from pathlib import Path
@@ -10,6 +10,7 @@
from ultralytics import YOLO
from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS
from ultralytics.utils import ASSETS, WEIGHTS_DIR
+from ultralytics.utils.checks import check_amp
def test_checks():
@@ -18,6 +19,13 @@ def test_checks():
assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
+@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
+def test_amp():
+ """Test AMP training checks."""
+ model = YOLO("yolo11n.pt").model.cuda()
+ assert check_amp(model)
+
+
@pytest.mark.slow
@pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability")
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
@@ -108,7 +116,7 @@ def test_predict_sam():
from ultralytics.models.sam import Predictor as SAMPredictor
# Load a model
- model = SAM(WEIGHTS_DIR / "sam_b.pt")
+ model = SAM(WEIGHTS_DIR / "sam2.1_b.pt")
# Display model information (optional)
model.info()
@@ -119,9 +127,21 @@ def test_predict_sam():
# Run inference with bboxes prompt
model(SOURCE, bboxes=[439, 437, 524, 709], device=0)
- # Run inference with points prompt
+ # Run inference with no labels
+ model(ASSETS / "zidane.jpg", points=[900, 370], device=0)
+
+ # Run inference with 1D points and 1D labels
model(ASSETS / "zidane.jpg", points=[900, 370], labels=[1], device=0)
+ # Run inference with 2D points and 1D labels
+ model(ASSETS / "zidane.jpg", points=[[900, 370]], labels=[1], device=0)
+
+ # Run inference with multiple 2D points and 1D labels
+ model(ASSETS / "zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1], device=0)
+
+ # Run inference with 3D points and 2D labels (multiple points per object)
+ model(ASSETS / "zidane.jpg", points=[[[900, 370], [1000, 100]]], labels=[[1, 1]], device=0)
+
# Create SAMPredictor
overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model=WEIGHTS_DIR / "mobile_sam.pt")
predictor = SAMPredictor(overrides=overrides)
diff --git a/tests/test_engine.py b/tests/test_engine.py
index aa4b671eaa0..fe95a5ca5dd 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import sys
from unittest import mock
diff --git a/tests/test_explorer.py b/tests/test_explorer.py
deleted file mode 100644
index 45b0a31e369..00000000000
--- a/tests/test_explorer.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-import PIL
-import pytest
-
-from ultralytics import Explorer
-from ultralytics.utils import ASSETS
-from ultralytics.utils.torch_utils import TORCH_1_13
-
-
-@pytest.mark.slow
-@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13")
-def test_similarity():
- """Test the correctness and response length of similarity calculations and SQL queries in the Explorer."""
- exp = Explorer(data="coco8.yaml")
- exp.create_embeddings_table()
- similar = exp.get_similar(idx=1)
- assert len(similar) == 4
- similar = exp.get_similar(img=ASSETS / "bus.jpg")
- assert len(similar) == 4
- similar = exp.get_similar(idx=[1, 2], limit=2)
- assert len(similar) == 2
- sim_idx = exp.similarity_index()
- assert len(sim_idx) == 4
- sql = exp.sql_query("WHERE labels LIKE '%zebra%'")
- assert len(sql) == 1
-
-
-@pytest.mark.slow
-@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13")
-def test_det():
- """Test detection functionalities and verify embedding table includes bounding boxes."""
- exp = Explorer(data="coco8.yaml", model="yolo11n.pt")
- exp.create_embeddings_table(force=True)
- assert len(exp.table.head()["bboxes"]) > 0
- similar = exp.get_similar(idx=[1, 2], limit=10)
- assert len(similar) > 0
- # This is a loose test, just checks errors not correctness
- similar = exp.plot_similar(idx=[1, 2], limit=10)
- assert isinstance(similar, PIL.Image.Image)
-
-
-@pytest.mark.slow
-@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13")
-def test_seg():
- """Test segmentation functionalities and ensure the embedding table includes segmentation masks."""
- exp = Explorer(data="coco8-seg.yaml", model="yolo11n-seg.pt")
- exp.create_embeddings_table(force=True)
- assert len(exp.table.head()["masks"]) > 0
- similar = exp.get_similar(idx=[1, 2], limit=10)
- assert len(similar) > 0
- similar = exp.plot_similar(idx=[1, 2], limit=10)
- assert isinstance(similar, PIL.Image.Image)
-
-
-@pytest.mark.slow
-@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13")
-def test_pose():
- """Test pose estimation functionality and verify the embedding table includes keypoints."""
- exp = Explorer(data="coco8-pose.yaml", model="yolo11n-pose.pt")
- exp.create_embeddings_table(force=True)
- assert len(exp.table.head()["keypoints"]) > 0
- similar = exp.get_similar(idx=[1, 2], limit=10)
- assert len(similar) > 0
- similar = exp.plot_similar(idx=[1, 2], limit=10)
- assert isinstance(similar, PIL.Image.Image)
diff --git a/tests/test_exports.py b/tests/test_exports.py
index e6e2ec15986..0faba6d4c88 100644
--- a/tests/test_exports.py
+++ b/tests/test_exports.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import shutil
import uuid
@@ -11,6 +11,7 @@
from ultralytics import YOLO
from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS
from ultralytics.utils import (
+ ARM64,
IS_RASPBERRYPI,
LINUX,
MACOS,
@@ -42,14 +43,16 @@ def test_export_openvino():
@pytest.mark.slow
@pytest.mark.skipif(not TORCH_1_13, reason="OpenVINO requires torch>=1.13")
@pytest.mark.parametrize(
- "task, dynamic, int8, half, batch",
- [ # generate all combinations but exclude those where both int8 and half are True
- (task, dynamic, int8, half, batch)
- for task, dynamic, int8, half, batch in product(TASKS, [True, False], [True, False], [True, False], [1, 2])
- if not (int8 and half) # exclude cases where both int8 and half are True
+ "task, dynamic, int8, half, batch, nms",
+ [ # generate all combinations except for exclusion cases
+ (task, dynamic, int8, half, batch, nms)
+ for task, dynamic, int8, half, batch, nms in product(
+ TASKS, [True, False], [True, False], [True, False], [1, 2], [True, False]
+ )
+ if not ((int8 and half) or (task == "classify" and nms))
],
)
-def test_export_openvino_matrix(task, dynamic, int8, half, batch):
+def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms):
"""Test YOLO model exports to OpenVINO under various configuration matrix conditions."""
file = YOLO(TASK2MODEL[task]).export(
format="openvino",
@@ -59,6 +62,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch):
half=half,
batch=batch,
data=TASK2DATA[task],
+ nms=nms,
)
if WINDOWS:
# Use unique filenames due to Windows file permissions bug possibly due to latent threaded use
@@ -71,36 +75,39 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch):
@pytest.mark.slow
@pytest.mark.parametrize(
- "task, dynamic, int8, half, batch, simplify", product(TASKS, [True, False], [False], [False], [1, 2], [True, False])
+ "task, dynamic, int8, half, batch, simplify, nms",
+ [ # generate all combinations except for exclusion cases
+ (task, dynamic, int8, half, batch, simplify, nms)
+ for task, dynamic, int8, half, batch, simplify, nms in product(
+ TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False]
+ )
+ if not ((int8 and half) or (task == "classify" and nms) or (task == "obb" and nms and not TORCH_1_13))
+ ],
)
-def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify):
+def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms):
"""Test YOLO exports to ONNX format with various configurations and parameters."""
file = YOLO(TASK2MODEL[task]).export(
- format="onnx",
- imgsz=32,
- dynamic=dynamic,
- int8=int8,
- half=half,
- batch=batch,
- simplify=simplify,
+ format="onnx", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, simplify=simplify, nms=nms
)
YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference
Path(file).unlink() # cleanup
@pytest.mark.slow
-@pytest.mark.parametrize("task, dynamic, int8, half, batch", product(TASKS, [False], [False], [False], [1, 2]))
-def test_export_torchscript_matrix(task, dynamic, int8, half, batch):
+@pytest.mark.parametrize(
+ "task, dynamic, int8, half, batch, nms",
+ [ # generate all combinations except for exclusion cases
+ (task, dynamic, int8, half, batch, nms)
+ for task, dynamic, int8, half, batch, nms in product(TASKS, [False], [False], [False], [1, 2], [True, False])
+ if not (task == "classify" and nms)
+ ],
+)
+def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms):
"""Tests YOLO model exports to TorchScript format under varied configurations."""
file = YOLO(TASK2MODEL[task]).export(
- format="torchscript",
- imgsz=32,
- dynamic=dynamic,
- int8=int8,
- half=half,
- batch=batch,
+ format="torchscript", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms
)
- YOLO(file)([SOURCE] * 3, imgsz=64 if dynamic else 32) # exported model inference at batch=3
+ YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference
Path(file).unlink() # cleanup
@@ -110,10 +117,10 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch):
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="CoreML not supported in Python 3.12")
@pytest.mark.parametrize(
"task, dynamic, int8, half, batch",
- [ # generate all combinations but exclude those where both int8 and half are True
+ [ # generate all combinations except for exclusion cases
(task, dynamic, int8, half, batch)
for task, dynamic, int8, half, batch in product(TASKS, [False], [True, False], [True, False], [1])
- if not (int8 and half) # exclude cases where both int8 and half are True
+ if not (int8 and half)
],
)
def test_export_coreml_matrix(task, dynamic, int8, half, batch):
@@ -134,22 +141,19 @@ def test_export_coreml_matrix(task, dynamic, int8, half, batch):
@pytest.mark.skipif(not checks.IS_PYTHON_MINIMUM_3_10, reason="TFLite export requires Python>=3.10")
@pytest.mark.skipif(not LINUX, reason="Test disabled as TF suffers from install conflicts on Windows and macOS")
@pytest.mark.parametrize(
- "task, dynamic, int8, half, batch",
- [ # generate all combinations but exclude those where both int8 and half are True
- (task, dynamic, int8, half, batch)
- for task, dynamic, int8, half, batch in product(TASKS, [False], [True, False], [True, False], [1])
- if not (int8 and half) # exclude cases where both int8 and half are True
+ "task, dynamic, int8, half, batch, nms",
+ [ # generate all combinations except for exclusion cases
+ (task, dynamic, int8, half, batch, nms)
+ for task, dynamic, int8, half, batch, nms in product(
+ TASKS, [False], [True, False], [True, False], [1], [True, False]
+ )
+ if not ((int8 and half) or (task == "classify" and nms))
],
)
-def test_export_tflite_matrix(task, dynamic, int8, half, batch):
+def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms):
"""Test YOLO exports to TFLite format considering various export configurations."""
file = YOLO(TASK2MODEL[task]).export(
- format="tflite",
- imgsz=32,
- dynamic=dynamic,
- int8=int8,
- half=half,
- batch=batch,
+ format="tflite", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms
)
YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference at batch=3
Path(file).unlink() # cleanup
@@ -157,7 +161,7 @@ def test_export_tflite_matrix(task, dynamic, int8, half, batch):
@pytest.mark.skipif(not TORCH_1_9, reason="CoreML>=7.2 not supported with PyTorch<=1.8")
@pytest.mark.skipif(WINDOWS, reason="CoreML not supported on Windows") # RuntimeError: BlobWriter not loaded
-@pytest.mark.skipif(IS_RASPBERRYPI, reason="CoreML not supported on Raspberry Pi")
+@pytest.mark.skipif(LINUX and ARM64, reason="CoreML not supported on aarch64 Linux")
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="CoreML not supported in Python 3.12")
def test_export_coreml():
"""Test YOLO exports to CoreML format, optimized for macOS only."""
@@ -192,8 +196,25 @@ def test_export_paddle():
YOLO(MODEL).export(format="paddle", imgsz=32)
+@pytest.mark.slow
+@pytest.mark.skipif(IS_RASPBERRYPI, reason="MNN not supported on Raspberry Pi")
+def test_export_mnn():
+ """Test YOLO exports to MNN format (WARNING: MNN test must precede NCNN test or CI error on Windows)."""
+ file = YOLO(MODEL).export(format="mnn", imgsz=32)
+ YOLO(file)(SOURCE, imgsz=32) # exported model inference
+
+
@pytest.mark.slow
def test_export_ncnn():
"""Test YOLO exports to NCNN format."""
file = YOLO(MODEL).export(format="ncnn", imgsz=32)
YOLO(file)(SOURCE, imgsz=32) # exported model inference
+
+
+@pytest.mark.skipif(True, reason="Test disabled as keras and tensorflow version conflicts with tflite export.")
+@pytest.mark.skipif(not LINUX or MACOS, reason="Skipping test on Windows and Macos")
+def test_export_imx():
+ """Test YOLO exports to IMX format."""
+ model = YOLO("yolov8n.pt")
+ file = model.export(format="imx", imgsz=32)
+ YOLO(file)(SOURCE, imgsz=32)
diff --git a/tests/test_integrations.py b/tests/test_integrations.py
index 4c8e066978a..8067a1787f1 100644
--- a/tests/test_integrations.py
+++ b/tests/test_integrations.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import contextlib
import os
diff --git a/tests/test_python.py b/tests/test_python.py
index 117e6f802e1..644176fb482 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import contextlib
import csv
@@ -409,15 +409,6 @@ def test_utils_torchutils():
time_sync()
-@pytest.mark.slow
-@pytest.mark.skipif(not ONLINE, reason="environment is offline")
-def test_utils_downloads():
- """Test file download utilities from ultralytics.utils.downloads."""
- from ultralytics.utils.downloads import get_google_drive_file_info
-
- get_google_drive_file_info("https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link")
-
-
def test_utils_ops():
"""Test utility operations functions for coordinate transformation and normalization."""
from ultralytics.utils.ops import (
@@ -585,11 +576,11 @@ def test_model_embeddings():
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="YOLOWorld with CLIP is not supported in Python 3.12")
def test_yolo_world():
"""Tests YOLO world models with CLIP support, including detection and training scenarios."""
- model = YOLO("yolov8s-world.pt") # no YOLO11n-world model yet
+ model = YOLO(WEIGHTS_DIR / "yolov8s-world.pt") # no YOLO11n-world model yet
model.set_classes(["tree", "window"])
model(SOURCE, conf=0.01)
- model = YOLO("yolov8s-worldv2.pt") # no YOLO11n-world model yet
+ model = YOLO(WEIGHTS_DIR / "yolov8s-worldv2.pt") # no YOLO11n-world model yet
# Training from a pretrained model. Eval is included at the final stage of training.
# Use dota8.yaml which has fewer categories to reduce the inference time of CLIP model
model.train(
diff --git a/tests/test_solutions.py b/tests/test_solutions.py
index fabec621d36..056a056fbc1 100644
--- a/tests/test_solutions.py
+++ b/tests/test_solutions.py
@@ -1,58 +1,66 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import cv2
import pytest
+from tests import TMP
from ultralytics import YOLO, solutions
+from ultralytics.utils import ASSETS_URL, WEIGHTS_DIR
from ultralytics.utils.downloads import safe_download
-MAJOR_SOLUTIONS_DEMO = "https://github.com/ultralytics/assets/releases/download/v0.0.0/solutions_ci_demo.mp4"
-WORKOUTS_SOLUTION_DEMO = "https://github.com/ultralytics/assets/releases/download/v0.0.0/solution_ci_pose_demo.mp4"
+DEMO_VIDEO = "solutions_ci_demo.mp4"
+POSE_VIDEO = "solution_ci_pose_demo.mp4"
@pytest.mark.slow
def test_major_solutions():
- """Test the object counting, heatmap, speed estimation and queue management solution."""
- safe_download(url=MAJOR_SOLUTIONS_DEMO)
- model = YOLO("yolo11n.pt")
- names = model.names
- cap = cv2.VideoCapture("solutions_ci_demo.mp4")
+ """Test the object counting, heatmap, speed estimation, trackzone and queue management solution."""
+ safe_download(url=f"{ASSETS_URL}/{DEMO_VIDEO}", dir=TMP)
+ cap = cv2.VideoCapture(str(TMP / DEMO_VIDEO))
assert cap.isOpened(), "Error reading video file"
- region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
- counter = solutions.ObjectCounter(reg_pts=region_points, names=names, view_img=False)
- heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, names=names, view_img=False)
- speed = solutions.SpeedEstimator(reg_pts=region_points, names=names, view_img=False)
- queue = solutions.QueueManager(names=names, reg_pts=region_points, view_img=False)
+ region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
+ counter = solutions.ObjectCounter(region=region_points, model="yolo11n.pt", show=False) # Test object counter
+ heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False) # Test heatmaps
+ heatmap_count = solutions.Heatmap(
+ colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False, region=region_points
+ ) # Test heatmaps with object counting
+ speed = solutions.SpeedEstimator(region=region_points, model="yolo11n.pt", show=False) # Test queue manager
+ queue = solutions.QueueManager(region=region_points, model="yolo11n.pt", show=False) # Test speed estimation
+ line_analytics = solutions.Analytics(analytics_type="line", model="yolo11n.pt", show=False) # line analytics
+ pie_analytics = solutions.Analytics(analytics_type="pie", model="yolo11n.pt", show=False) # line analytics
+ bar_analytics = solutions.Analytics(analytics_type="bar", model="yolo11n.pt", show=False) # line analytics
+ area_analytics = solutions.Analytics(analytics_type="area", model="yolo11n.pt", show=False) # line analytics
+ trackzone = solutions.TrackZone(region=region_points, model="yolo11n.pt", show=False) # Test trackzone
+ frame_count = 0 # Required for analytics
while cap.isOpened():
success, im0 = cap.read()
if not success:
break
+ frame_count += 1
original_im0 = im0.copy()
- tracks = model.track(im0, persist=True, show=False)
- _ = counter.start_counting(original_im0.copy(), tracks)
- _ = heatmap.generate_heatmap(original_im0.copy(), tracks)
- _ = speed.estimate_speed(original_im0.copy(), tracks)
- _ = queue.process_queue(original_im0.copy(), tracks)
+ _ = counter.count(original_im0.copy())
+ _ = heatmap.generate_heatmap(original_im0.copy())
+ _ = heatmap_count.generate_heatmap(original_im0.copy())
+ _ = speed.estimate_speed(original_im0.copy())
+ _ = queue.process_queue(original_im0.copy())
+ _ = line_analytics.process_data(original_im0.copy(), frame_count)
+ _ = pie_analytics.process_data(original_im0.copy(), frame_count)
+ _ = bar_analytics.process_data(original_im0.copy(), frame_count)
+ _ = area_analytics.process_data(original_im0.copy(), frame_count)
+ _ = trackzone.trackzone(original_im0.copy())
cap.release()
- cv2.destroyAllWindows()
-
-@pytest.mark.slow
-def test_aigym():
- """Test the workouts monitoring solution."""
- safe_download(url=WORKOUTS_SOLUTION_DEMO)
- model = YOLO("yolo11n-pose.pt")
- cap = cv2.VideoCapture("solution_ci_pose_demo.mp4")
+ # Test workouts monitoring
+ safe_download(url=f"{ASSETS_URL}/{POSE_VIDEO}", dir=TMP)
+ cap = cv2.VideoCapture(str(TMP / POSE_VIDEO))
assert cap.isOpened(), "Error reading video file"
- gym_object = solutions.AIGym(line_thickness=2, pose_type="squat", kpts_to_check=[5, 11, 13])
+ gym = solutions.AIGym(kpts=[5, 11, 13], show=False)
while cap.isOpened():
success, im0 = cap.read()
if not success:
break
- results = model.track(im0, verbose=False)
- _ = gym_object.start_counting(im0, results)
+ _ = gym.monitor(im0)
cap.release()
- cv2.destroyAllWindows()
@pytest.mark.slow
@@ -60,9 +68,9 @@ def test_instance_segmentation():
"""Test the instance segmentation solution."""
from ultralytics.utils.plotting import Annotator, colors
- model = YOLO("yolo11n-seg.pt")
+ model = YOLO(WEIGHTS_DIR / "yolo11n-seg.pt")
names = model.names
- cap = cv2.VideoCapture("solutions_ci_demo.mp4")
+ cap = cv2.VideoCapture(TMP / DEMO_VIDEO)
assert cap.isOpened(), "Error reading video file"
while cap.isOpened():
success, im0 = cap.read()
@@ -83,4 +91,4 @@ def test_instance_segmentation():
@pytest.mark.slow
def test_streamlit_predict():
"""Test streamlit predict live inference solution."""
- solutions.inference()
+ solutions.Inference().inference()
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index daff29f8f4e..01631ca8f3c 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,13 +1,13 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "8.3.1"
+__version__ = "8.3.71"
import os
-# Set ENV Variables (place before imports)
-os.environ["OMP_NUM_THREADS"] = "1" # reduce CPU utilization during training
+# Set ENV variables (place before imports)
+if not os.environ.get("OMP_NUM_THREADS"):
+ os.environ["OMP_NUM_THREADS"] = "1" # default for reduced CPU utilization during training
-from ultralytics.data.explorer.explorer import Explorer
from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld
from ultralytics.utils import ASSETS, SETTINGS
from ultralytics.utils.checks import check_yolo as checks
@@ -26,5 +26,4 @@
"checks",
"download",
"settings",
- "Explorer",
)
diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py
index 74da337f91a..b4e9f3b060e 100644
--- a/ultralytics/cfg/__init__.py
+++ b/ultralytics/cfg/__init__.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import contextlib
import shutil
import subprocess
import sys
@@ -8,11 +7,14 @@
from types import SimpleNamespace
from typing import Dict, List, Union
+import cv2
+
from ultralytics.utils import (
ASSETS,
DEFAULT_CFG,
DEFAULT_CFG_DICT,
DEFAULT_CFG_PATH,
+ DEFAULT_SOL_DICT,
IS_VSCODE,
LOGGER,
RANK,
@@ -31,9 +33,22 @@
yaml_print,
)
+# Define valid solutions
+SOLUTION_MAP = {
+ "count": ("ObjectCounter", "count"),
+ "heatmap": ("Heatmap", "generate_heatmap"),
+ "queue": ("QueueManager", "process_queue"),
+ "speed": ("SpeedEstimator", "estimate_speed"),
+ "workout": ("AIGym", "monitor"),
+ "analytics": ("Analytics", "process_data"),
+ "trackzone": ("TrackZone", "trackzone"),
+ "inference": ("Inference", "inference"),
+ "help": None,
+}
+
# Define valid tasks and modes
-MODES = {"train", "val", "predict", "export", "track", "benchmark"}
-TASKS = {"detect", "segment", "classify", "pose", "obb", "regress"}
+MODES = frozenset({"train", "val", "predict", "export", "track", "benchmark"})
+TASKS = frozenset({"detect", "segment", "classify", "pose", "obb", "regress"})
TASK2DATA = {
"detect": "coco8.yaml",
"segment": "coco8-seg.yaml",
@@ -58,11 +73,41 @@
"obb": "metrics/mAP50-95(B)",
"regress": "metrics/MAE",
}
-MODELS = {TASK2MODEL[task] for task in TASKS}
+MODELS = frozenset({TASK2MODEL[task] for task in TASKS})
ARGV = sys.argv or ["", ""] # sometimes sys.argv = []
+SOLUTIONS_HELP_MSG = f"""
+ Arguments received: {str(["yolo"] + ARGV[1:])}. Ultralytics 'yolo solutions' usage overview:
+
+ yolo solutions SOLUTION ARGS
+
+ Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())[:-1]}
+ ARGS (optional) are any number of custom 'arg=value' pairs like 'show_in=True' that override defaults
+ at https://docs.ultralytics.com/usage/cfg
+
+ 1. Call object counting solution
+ yolo solutions count source="path/to/video/file.mp4" region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
+
+ 2. Call heatmaps solution
+ yolo solutions heatmap colormap=cv2.COLORMAP_PARULA model=yolo11n.pt
+
+ 3. Call queue management solution
+ yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] model=yolo11n.pt
+
+ 4. Call workouts monitoring solution for push-ups
+ yolo solutions workout model=yolo11n-pose.pt kpts=[6, 8, 10]
+
+ 5. Generate analytical graphs
+ yolo solutions analytics analytics_type="pie"
+
+ 6. Track objects within specific zones
+ yolo solutions trackzone source="path/to/video/file.mp4" region=[(150, 150), (1130, 150), (1130, 570), (150, 570)]
+
+ 7. Streamlit real-time webcam inference GUI
+ yolo streamlit-predict
+ """
CLI_HELP_MSG = f"""
- Arguments received: {str(['yolo'] + ARGV[1:])}. Ultralytics 'yolo' commands use the following syntax:
+ Arguments received: {str(["yolo"] + ARGV[1:])}. Ultralytics 'yolo' commands use the following syntax:
yolo TASK MODE ARGS
@@ -83,115 +128,120 @@
4. Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required)
yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128
- 5. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
- yolo explorer data=data.yaml model=yolo11n.pt
-
- 6. Streamlit real-time webcam inference GUI
- yolo streamlit-predict
-
- 7. Run special commands:
+ 5. Ultralytics solutions usage
+ yolo solutions count or in {list(SOLUTION_MAP.keys())[1:-1]} source="path/to/video/file.mp4"
+
+ 6. Run special commands:
yolo help
yolo checks
yolo version
yolo settings
yolo copy-cfg
yolo cfg
+ yolo solutions help
Docs: https://docs.ultralytics.com
+ Solutions: https://docs.ultralytics.com/solutions/
Community: https://community.ultralytics.com
GitHub: https://github.com/ultralytics/ultralytics
"""
# Define keys for arg type checks
-CFG_FLOAT_KEYS = { # integer or float arguments, i.e. x=2 and x=2.0
- "warmup_epochs",
- "box",
- "cls",
- "dfl",
- "degrees",
- "shear",
- "time",
- "workspace",
- "batch",
-}
-CFG_FRACTION_KEYS = { # fractional float arguments with 0.0<=values<=1.0
- "dropout",
- "lr0",
- "lrf",
- "momentum",
- "weight_decay",
- "warmup_momentum",
- "warmup_bias_lr",
- "label_smoothing",
- "hsv_h",
- "hsv_s",
- "hsv_v",
- "translate",
- "scale",
- "perspective",
- "flipud",
- "fliplr",
- "bgr",
- "mosaic",
- "mixup",
- "copy_paste",
- "conf",
- "iou",
- "fraction",
-}
-CFG_INT_KEYS = { # integer-only arguments
- "epochs",
- "patience",
- "workers",
- "seed",
- "close_mosaic",
- "mask_ratio",
- "max_det",
- "vid_stride",
- "line_width",
- "nbs",
- "save_period",
- "max_ncalib_imgs",
-}
-CFG_BOOL_KEYS = { # boolean-only arguments
- "save",
- "exist_ok",
- "verbose",
- "deterministic",
- "single_cls",
- "rect",
- "cos_lr",
- "overlap_mask",
- "val",
- "save_json",
- "save_hybrid",
- "half",
- "dnn",
- "plots",
- "show",
- "save_txt",
- "save_conf",
- "save_crop",
- "save_frames",
- "show_labels",
- "show_conf",
- "visualize",
- "augment",
- "agnostic_nms",
- "retina_masks",
- "show_boxes",
- "keras",
- "optimize",
- "int8",
- "dynamic",
- "simplify",
- "nms",
- "profile",
- "multi_scale",
- "separate_outputs",
- "export_hw_optimized",
- "uint8_io_dtype",
-}
+CFG_FLOAT_KEYS = frozenset(
+ { # integer or float arguments, i.e. x=2 and x=2.0
+ "warmup_epochs",
+ "box",
+ "cls",
+ "dfl",
+ "degrees",
+ "shear",
+ "time",
+ "workspace",
+ "batch",
+ }
+)
+CFG_FRACTION_KEYS = frozenset(
+ { # fractional float arguments with 0.0<=values<=1.0
+ "dropout",
+ "lr0",
+ "lrf",
+ "momentum",
+ "weight_decay",
+ "warmup_momentum",
+ "warmup_bias_lr",
+ "hsv_h",
+ "hsv_s",
+ "hsv_v",
+ "translate",
+ "scale",
+ "perspective",
+ "flipud",
+ "fliplr",
+ "bgr",
+ "mosaic",
+ "mixup",
+ "copy_paste",
+ "conf",
+ "iou",
+ "fraction",
+ }
+)
+CFG_INT_KEYS = frozenset(
+ { # integer-only arguments
+ "epochs",
+ "patience",
+ "workers",
+ "seed",
+ "close_mosaic",
+ "mask_ratio",
+ "max_det",
+ "vid_stride",
+ "line_width",
+ "nbs",
+ "save_period",
+ }
+)
+CFG_BOOL_KEYS = frozenset(
+ { # boolean-only arguments
+ "save",
+ "exist_ok",
+ "verbose",
+ "deterministic",
+ "single_cls",
+ "rect",
+ "cos_lr",
+ "overlap_mask",
+ "val",
+ "save_json",
+ "save_hybrid",
+ "half",
+ "dnn",
+ "plots",
+ "show",
+ "save_txt",
+ "save_conf",
+ "save_crop",
+ "save_frames",
+ "show_labels",
+ "show_conf",
+ "visualize",
+ "augment",
+ "agnostic_nms",
+ "retina_masks",
+ "show_boxes",
+ "keras",
+ "optimize",
+ "int8",
+ "dynamic",
+ "simplify",
+ "nms",
+ "profile",
+ "multi_scale",
+ "separate_outputs",
+ "export_hw_optimized",
+ "uint8_io_dtype",
+ }
+)
def cfg2dict(cfg):
@@ -244,7 +294,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
Examples:
>>> from ultralytics.cfg import get_cfg
>>> config = get_cfg() # Load default configuration
- >>> config = get_cfg("path/to/config.yaml", overrides={"epochs": 50, "batch_size": 16})
+ >>> config_with_overrides = get_cfg("path/to/config.yaml", overrides={"epochs": 50, "batch_size": 16})
Notes:
- If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence.
@@ -267,7 +317,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
if k in cfg and isinstance(cfg[k], (int, float)):
cfg[k] = str(cfg[k])
if cfg.get("name") == "model": # assign model to 'name' arg
- cfg["name"] = cfg.get("model", "").split(".")[0]
+ cfg["name"] = str(cfg.get("model", "")).split(".")[0]
LOGGER.warning(f"WARNING โ ๏ธ 'name=model' automatically updated to 'name={cfg['name']}'.")
# Type and Value checks
@@ -323,11 +373,11 @@ def check_cfg(cfg, hard=True):
)
cfg[k] = v = float(v)
if not (0.0 <= v <= 1.0):
- raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
+ raise ValueError(f"'{k}={v}' is an invalid value. Valid '{k}' values are between 0.0 and 1.0.")
elif k in CFG_INT_KEYS and not isinstance(v, int):
if hard:
raise TypeError(
- f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
+ f"'{k}={v}' is of invalid type {type(v).__name__}. '{k}' must be an int (i.e. '{k}=8')"
)
cfg[k] = int(v)
elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
@@ -402,6 +452,9 @@ def _handle_deprecation(custom):
if key == "line_thickness":
deprecation_warn(key, "line_width")
custom["line_width"] = custom.pop("line_thickness")
+ if key == "label_smoothing":
+ deprecation_warn(key)
+ custom.pop("label_smoothing")
return custom
@@ -433,9 +486,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
- Prints detailed error messages for each mismatched key to help users correct their configurations.
"""
custom = _handle_deprecation(custom)
- base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
- mismatched = [k for k in custom_keys if k not in base_keys]
- if mismatched:
+ base_keys, custom_keys = (frozenset(x.keys()) for x in (base, custom))
+ if mismatched := [k for k in custom_keys if k not in base_keys]:
from difflib import get_close_matches
string = ""
@@ -449,34 +501,60 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
def merge_equals_args(args: List[str]) -> List[str]:
"""
- Merges arguments around isolated '=' in a list of strings, handling three cases:
- 1. ['arg', '=', 'val'] becomes ['arg=val'],
- 2. ['arg=', 'val'] becomes ['arg=val'],
- 3. ['arg', '=val'] becomes ['arg=val'].
+ Merges arguments around isolated '=' in a list of strings and joins fragments with brackets.
+
+ This function handles the following cases:
+ 1. ['arg', '=', 'val'] becomes ['arg=val']
+ 2. ['arg=', 'val'] becomes ['arg=val']
+ 3. ['arg', '=val'] becomes ['arg=val']
+ 4. Joins fragments with brackets, e.g., ['imgsz=[3,', '640,', '640]'] becomes ['imgsz=[3,640,640]']
Args:
- args (List[str]): A list of strings where each element represents an argument.
+ args (List[str]): A list of strings where each element represents an argument or fragment.
Returns:
- (List[str]): A list of strings where the arguments around isolated '=' are merged.
+ List[str]: A list of strings where the arguments around isolated '=' are merged and fragments with brackets are joined.
Examples:
- >>> args = ["arg1", "=", "value", "arg2=", "value2", "arg3", "=value3"]
- >>> merge_equals_args(args)
- ['arg1=value', 'arg2=value2', 'arg3=value3']
+ >>> args = ["arg1", "=", "value", "arg2=", "value2", "arg3", "=value3", "imgsz=[3,", "640,", "640]"]
+ >>> merge_and_join_args(args)
+ ['arg1=value', 'arg2=value2', 'arg3=value3', 'imgsz=[3,640,640]']
"""
new_args = []
- for i, arg in enumerate(args):
+ current = ""
+ depth = 0
+
+ i = 0
+ while i < len(args):
+ arg = args[i]
+
+ # Handle equals sign merging
if arg == "=" and 0 < i < len(args) - 1: # merge ['arg', '=', 'val']
new_args[-1] += f"={args[i + 1]}"
- del args[i + 1]
+ i += 2
+ continue
elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]: # merge ['arg=', 'val']
new_args.append(f"{arg}{args[i + 1]}")
- del args[i + 1]
+ i += 2
+ continue
elif arg.startswith("=") and i > 0: # merge ['arg', '=val']
new_args[-1] += arg
- else:
- new_args.append(arg)
+ i += 1
+ continue
+
+ # Handle bracket joining
+ depth += arg.count("[") - arg.count("]")
+ current += arg
+ if depth == 0:
+ new_args.append(current)
+ current = ""
+
+ i += 1
+
+ # Append any remaining current string
+ if current:
+ new_args.append(current)
+
return new_args
@@ -493,7 +571,7 @@ def handle_yolo_hub(args: List[str]) -> None:
Examples:
```bash
- yolo hub login YOUR_API_KEY
+ yolo login YOUR_API_KEY
```
Notes:
@@ -553,53 +631,122 @@ def handle_yolo_settings(args: List[str]) -> None:
LOGGER.warning(f"WARNING โ ๏ธ settings error: '{e}'. Please see {url} for help.")
-def handle_explorer(args: List[str]):
+def handle_yolo_solutions(args: List[str]) -> None:
"""
- Launches a graphical user interface that provides tools for interacting with and analyzing datasets using the
- Ultralytics Explorer API. It checks for the required 'streamlit' package and informs the user that the Explorer
- dashboard is loading.
+ Processes YOLO solutions arguments and runs the specified computer vision solutions pipeline.
Args:
- args (List[str]): A list of optional command line arguments.
+ args (List[str]): Command-line arguments for configuring and running the Ultralytics YOLO
+ solutions: https://docs.ultralytics.com/solutions/, It can include solution name, source,
+ and other configuration parameters.
+
+ Returns:
+ None: The function processes video frames and saves the output but doesn't return any value.
Examples:
- ```bash
- yolo explorer data=data.yaml model=yolo11n.pt
- ```
+ Run people counting solution with default settings:
+ >>> handle_yolo_solutions(["count"])
+
+ Run analytics with custom configuration:
+ >>> handle_yolo_solutions(["analytics", "conf=0.25", "source=path/to/video/file.mp4"])
+
+ Run inference with custom configuration, requires Streamlit version 1.29.0 or higher.
+ >>> handle_yolo_solutions(["inference", "model=yolo11n.pt"])
Notes:
- - Requires 'streamlit' package version 1.29.0 or higher.
- - The function does not take any arguments or return any values.
- - It is typically called from the command line interface using the 'yolo explorer' command.
+ - Default configurations are merged from DEFAULT_SOL_DICT and DEFAULT_CFG_DICT
+ - Arguments can be provided in the format 'key=value' or as boolean flags
+ - Available solutions are defined in SOLUTION_MAP with their respective classes and methods
+ - If an invalid solution is provided, defaults to 'count' solution
+ - Output videos are saved in 'runs/solution/{solution_name}' directory
+ - For 'analytics' solution, frame numbers are tracked for generating analytical graphs
+ - Video processing can be interrupted by pressing 'q'
+ - Processes video frames sequentially and saves output in .avi format
+ - If no source is specified, downloads and uses a default sample video\
+ - The inference solution will be launched using the 'streamlit run' command.
+ - The Streamlit app file is located in the Ultralytics package directory.
"""
- checks.check_requirements("streamlit>=1.29.0")
- LOGGER.info("๐ก Loading Explorer dashboard...")
- cmd = ["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"]
- new = dict(parse_key_value_pair(a) for a in args)
- check_dict_alignment(base={k: DEFAULT_CFG_DICT[k] for k in ["model", "data"]}, custom=new)
- for k, v in new.items():
- cmd += [k, v]
- subprocess.run(cmd)
+ full_args_dict = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT} # arguments dictionary
+ overrides = {}
+
+ # check dictionary alignment
+ for arg in merge_equals_args(args):
+ arg = arg.lstrip("-").rstrip(",")
+ if "=" in arg:
+ try:
+ k, v = parse_key_value_pair(arg)
+ overrides[k] = v
+ except (NameError, SyntaxError, ValueError, AssertionError) as e:
+ check_dict_alignment(full_args_dict, {arg: ""}, e)
+ elif arg in full_args_dict and isinstance(full_args_dict.get(arg), bool):
+ overrides[arg] = True
+ check_dict_alignment(full_args_dict, overrides) # dict alignment
+
+ # Get solution name
+ if args and args[0] in SOLUTION_MAP:
+ if args[0] != "help":
+ s_n = args.pop(0) # Extract the solution name directly
+ else:
+ LOGGER.info(SOLUTIONS_HELP_MSG)
+ else:
+ LOGGER.warning(
+ f"โ ๏ธ No valid solution provided. Using default 'count'. Available: {', '.join(SOLUTION_MAP.keys())}"
+ )
+ s_n = "count" # Default solution if none provided
+ if args and args[0] == "help": # Add check for return if user call `yolo solutions help`
+ return
-def handle_streamlit_inference():
- """
- Open the Ultralytics Live Inference Streamlit app for real-time object detection.
+ if s_n == "inference":
+ checks.check_requirements("streamlit>=1.29.0")
+ LOGGER.info("๐ก Loading Ultralytics live inference app...")
+ subprocess.run(
+ [ # Run subprocess with Streamlit custom argument
+ "streamlit",
+ "run",
+ str(ROOT / "solutions/streamlit_inference.py"),
+ "--server.headless",
+ "true",
+ overrides.pop("model", "yolo11n.pt"),
+ ]
+ )
+ else:
+ cls, method = SOLUTION_MAP[s_n] # solution class name, method name and default source
- This function initializes and runs a Streamlit application designed for performing live object detection using
- Ultralytics models. It checks for the required Streamlit package and launches the app.
+ from ultralytics import solutions # import ultralytics solutions
- Examples:
- >>> handle_streamlit_inference()
+ solution = getattr(solutions, cls)(IS_CLI=True, **overrides) # get solution class i.e ObjectCounter
+ process = getattr(
+ solution, method
+ ) # get specific function of class for processing i.e, count from ObjectCounter
- Notes:
- - Requires Streamlit version 1.29.0 or higher.
- - The app is launched using the 'streamlit run' command.
- - The Streamlit app file is located in the Ultralytics package directory.
- """
- checks.check_requirements("streamlit>=1.29.0")
- LOGGER.info("๐ก Loading Ultralytics Live Inference app...")
- subprocess.run(["streamlit", "run", ROOT / "solutions/streamlit_inference.py", "--server.headless", "true"])
+ cap = cv2.VideoCapture(solution.CFG["source"]) # read the video file
+
+ # extract width, height and fps of the video file, create save directory and initialize video writer
+ import os # for directory creation
+ from pathlib import Path
+
+ from ultralytics.utils.files import increment_path # for output directory path update
+
+ w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
+ if s_n == "analytics": # analytical graphs follow fixed shape for output i.e w=1920, h=1080
+ w, h = 1920, 1080
+ save_dir = increment_path(Path("runs") / "solutions" / "exp", exist_ok=False)
+ save_dir.mkdir(parents=True, exist_ok=True) # create the output directory
+ vw = cv2.VideoWriter(os.path.join(save_dir, "solution.avi"), cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+
+ try: # Process video frames
+ f_n = 0 # frame number, required for analytical graphs
+ while cap.isOpened():
+ success, frame = cap.read()
+ if not success:
+ break
+ frame = process(frame, f_n := f_n + 1) if s_n == "analytics" else process(frame)
+ vw.write(frame)
+ if cv2.waitKey(1) & 0xFF == ord("q"):
+ break
+ finally:
+ cap.release()
def parse_key_value_pair(pair: str = "key=value"):
@@ -610,9 +757,8 @@ def parse_key_value_pair(pair: str = "key=value"):
pair (str): A string containing a key-value pair in the format "key=value".
Returns:
- (tuple): A tuple containing two elements:
- - key (str): The parsed key.
- - value (str): The parsed value.
+ key (str): The parsed key.
+ value (str): The parsed value.
Raises:
AssertionError: If the value is missing or empty.
@@ -676,9 +822,10 @@ def smart_value(v):
elif v_lower == "false":
return False
else:
- with contextlib.suppress(Exception):
+ try:
return eval(v)
- return v
+ except Exception:
+ return v
def entrypoint(debug=""):
@@ -721,8 +868,7 @@ def entrypoint(debug=""):
"login": lambda: handle_yolo_hub(args),
"logout": lambda: handle_yolo_hub(args),
"copy-cfg": copy_default_cfg,
- "explorer": lambda: handle_explorer(args[1:]),
- "streamlit-predict": lambda: handle_streamlit_inference(),
+ "solutions": lambda: handle_yolo_solutions(args[1:]),
}
full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
@@ -782,7 +928,13 @@ def entrypoint(debug=""):
task = overrides.pop("task", None)
if task:
if task not in TASKS:
- raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
+ if task == "track":
+ LOGGER.warning(
+ "WARNING โ ๏ธ invalid 'task=track', setting 'task=detect' and 'mode=track'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}."
+ )
+ task, mode = "detect", "track"
+ else:
+ raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
if "model" not in overrides:
overrides["model"] = TASK2MODEL[task]
@@ -801,7 +953,7 @@ def entrypoint(debug=""):
from ultralytics import FastSAM
model = FastSAM(model)
- elif "sam_" in stem or "sam2_" in stem:
+ elif "sam_" in stem or "sam2_" in stem or "sam2.1_" in stem:
from ultralytics import SAM
model = SAM(model)
@@ -823,7 +975,9 @@ def entrypoint(debug=""):
# Mode
if mode in {"predict", "track"} and "source" not in overrides:
- overrides["source"] = DEFAULT_CFG.source or ASSETS
+ overrides["source"] = (
+ "https://ultralytics.com/images/boats.jpg" if task == "obb" else DEFAULT_CFG.source or ASSETS
+ )
LOGGER.warning(f"WARNING โ ๏ธ 'source' argument is missing. Using default 'source={overrides['source']}'.")
elif mode in {"train", "val"}:
if "data" not in overrides and "resume" not in overrides:
diff --git a/ultralytics/cfg/datasets/Argoverse.yaml b/ultralytics/cfg/datasets/Argoverse.yaml
index 43755f76870..5e05023d779 100644
--- a/ultralytics/cfg/datasets/Argoverse.yaml
+++ b/ultralytics/cfg/datasets/Argoverse.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
# Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
# Example usage: yolo train data=Argoverse.yaml
diff --git a/ultralytics/cfg/datasets/DOTAv1.5.yaml b/ultralytics/cfg/datasets/DOTAv1.5.yaml
index b59ff8816c1..26c73808d7b 100644
--- a/ultralytics/cfg/datasets/DOTAv1.5.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.5.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
diff --git a/ultralytics/cfg/datasets/DOTAv1.yaml b/ultralytics/cfg/datasets/DOTAv1.yaml
index d1c950b9957..5e71d2188d5 100644
--- a/ultralytics/cfg/datasets/DOTAv1.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
diff --git a/ultralytics/cfg/datasets/GlobalWheat2020.yaml b/ultralytics/cfg/datasets/GlobalWheat2020.yaml
index 95749a11b46..9dff73d7cd2 100644
--- a/ultralytics/cfg/datasets/GlobalWheat2020.yaml
+++ b/ultralytics/cfg/datasets/GlobalWheat2020.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
# Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
# Example usage: yolo train data=GlobalWheat2020.yaml
diff --git a/ultralytics/cfg/datasets/ImageNet.yaml b/ultralytics/cfg/datasets/ImageNet.yaml
index 0dc344abbaf..92e398a8fa8 100644
--- a/ultralytics/cfg/datasets/ImageNet.yaml
+++ b/ultralytics/cfg/datasets/ImageNet.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
# Documentation: https://docs.ultralytics.com/datasets/classify/imagenet/
diff --git a/ultralytics/cfg/datasets/Objects365.yaml b/ultralytics/cfg/datasets/Objects365.yaml
index 4994fd5f296..89921364a52 100644
--- a/ultralytics/cfg/datasets/Objects365.yaml
+++ b/ultralytics/cfg/datasets/Objects365.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Objects365 dataset https://www.objects365.org/ by Megvii
# Documentation: https://docs.ultralytics.com/datasets/detect/objects365/
# Example usage: yolo train data=Objects365.yaml
diff --git a/ultralytics/cfg/datasets/SKU-110K.yaml b/ultralytics/cfg/datasets/SKU-110K.yaml
index fff1baa4831..a2c94ced1bc 100644
--- a/ultralytics/cfg/datasets/SKU-110K.yaml
+++ b/ultralytics/cfg/datasets/SKU-110K.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
# Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
# Example usage: yolo train data=SKU-110K.yaml
diff --git a/ultralytics/cfg/datasets/VOC.yaml b/ultralytics/cfg/datasets/VOC.yaml
index 7311d8917e9..2eb06ffdeb4 100644
--- a/ultralytics/cfg/datasets/VOC.yaml
+++ b/ultralytics/cfg/datasets/VOC.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
# Example usage: yolo train data=VOC.yaml
diff --git a/ultralytics/cfg/datasets/VisDrone.yaml b/ultralytics/cfg/datasets/VisDrone.yaml
index 9c28d918769..9fc7b45e435 100644
--- a/ultralytics/cfg/datasets/VisDrone.yaml
+++ b/ultralytics/cfg/datasets/VisDrone.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
# Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
# Example usage: yolo train data=VisDrone.yaml
diff --git a/ultralytics/cfg/datasets/african-wildlife.yaml b/ultralytics/cfg/datasets/african-wildlife.yaml
index eaccb1a85a3..b825f8f068b 100644
--- a/ultralytics/cfg/datasets/african-wildlife.yaml
+++ b/ultralytics/cfg/datasets/african-wildlife.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# African-wildlife dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
# Example usage: yolo train data=african-wildlife.yaml
diff --git a/ultralytics/cfg/datasets/brain-tumor.yaml b/ultralytics/cfg/datasets/brain-tumor.yaml
index 115532a32ed..7a448e84afc 100644
--- a/ultralytics/cfg/datasets/brain-tumor.yaml
+++ b/ultralytics/cfg/datasets/brain-tumor.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Brain-tumor dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/
# Example usage: yolo train data=brain-tumor.yaml
diff --git a/ultralytics/cfg/datasets/carparts-seg.yaml b/ultralytics/cfg/datasets/carparts-seg.yaml
index d15da6e5b2e..9f15f9b0662 100644
--- a/ultralytics/cfg/datasets/carparts-seg.yaml
+++ b/ultralytics/cfg/datasets/carparts-seg.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Carparts-seg dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/
# Example usage: yolo train data=carparts-seg.yaml
diff --git a/ultralytics/cfg/datasets/coco-pose.yaml b/ultralytics/cfg/datasets/coco-pose.yaml
index 7d71c83de47..353dcd721b0 100644
--- a/ultralytics/cfg/datasets/coco-pose.yaml
+++ b/ultralytics/cfg/datasets/coco-pose.yaml
@@ -1,5 +1,6 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# COCO 2017 Keypoints dataset https://cocodataset.org by Microsoft
# Documentation: https://docs.ultralytics.com/datasets/pose/coco/
# Example usage: yolo train data=coco-pose.yaml
# parent
@@ -9,9 +10,9 @@
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco-pose # dataset root dir
-train: train2017.txt # train images (relative to 'path') 118287 images
-val: val2017.txt # val images (relative to 'path') 5000 images
-test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+train: train2017.txt # train images (relative to 'path') 56599 images
+val: val2017.txt # val images (relative to 'path') 2346 images
+test: test-dev2017.txt # 20288 of 40670 images, submit to https://codalab.lisn.upsaclay.fr/competitions/7403
# Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
diff --git a/ultralytics/cfg/datasets/coco.yaml b/ultralytics/cfg/datasets/coco.yaml
index 3bb9aacc3f4..cb6dff744bd 100644
--- a/ultralytics/cfg/datasets/coco.yaml
+++ b/ultralytics/cfg/datasets/coco.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# COCO 2017 dataset https://cocodataset.org by Microsoft
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
# Example usage: yolo train data=coco.yaml
diff --git a/ultralytics/cfg/datasets/coco128-seg.yaml b/ultralytics/cfg/datasets/coco128-seg.yaml
index dcd961c6e5f..b023c676300 100644
--- a/ultralytics/cfg/datasets/coco128-seg.yaml
+++ b/ultralytics/cfg/datasets/coco128-seg.yaml
@@ -1,5 +1,6 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# COCO128-seg dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
# Example usage: yolo train data=coco128.yaml
# parent
diff --git a/ultralytics/cfg/datasets/coco128.yaml b/ultralytics/cfg/datasets/coco128.yaml
index 1b515592f2e..12ff0511bcd 100644
--- a/ultralytics/cfg/datasets/coco128.yaml
+++ b/ultralytics/cfg/datasets/coco128.yaml
@@ -1,5 +1,6 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# COCO128 dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
# Example usage: yolo train data=coco128.yaml
# parent
diff --git a/ultralytics/cfg/datasets/coco8-pose.yaml b/ultralytics/cfg/datasets/coco8-pose.yaml
index 68678fa76d3..3e8af1e3448 100644
--- a/ultralytics/cfg/datasets/coco8-pose.yaml
+++ b/ultralytics/cfg/datasets/coco8-pose.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
# Example usage: yolo train data=coco8-pose.yaml
diff --git a/ultralytics/cfg/datasets/coco8-seg.yaml b/ultralytics/cfg/datasets/coco8-seg.yaml
index 42fc02b08d7..1ea6b31004c 100644
--- a/ultralytics/cfg/datasets/coco8-seg.yaml
+++ b/ultralytics/cfg/datasets/coco8-seg.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
# Example usage: yolo train data=coco8-seg.yaml
diff --git a/ultralytics/cfg/datasets/coco8.yaml b/ultralytics/cfg/datasets/coco8.yaml
index 50a1133cdc3..8200738b46d 100644
--- a/ultralytics/cfg/datasets/coco8.yaml
+++ b/ultralytics/cfg/datasets/coco8.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
# Example usage: yolo train data=coco8.yaml
diff --git a/ultralytics/cfg/datasets/crack-seg.yaml b/ultralytics/cfg/datasets/crack-seg.yaml
index f6fe9aa2297..11bdd5f575f 100644
--- a/ultralytics/cfg/datasets/crack-seg.yaml
+++ b/ultralytics/cfg/datasets/crack-seg.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Crack-seg dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/
# Example usage: yolo train data=crack-seg.yaml
diff --git a/ultralytics/cfg/datasets/dog-pose.yaml b/ultralytics/cfg/datasets/dog-pose.yaml
new file mode 100644
index 00000000000..447e542ce6c
--- /dev/null
+++ b/ultralytics/cfg/datasets/dog-pose.yaml
@@ -0,0 +1,24 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Dogs dataset http://vision.stanford.edu/aditya86/ImageNetDogs/ by Stanford
+# Documentation: https://docs.ultralytics.com/datasets/pose/dog-pose/
+# Example usage: yolo train data=dog-pose.yaml
+# parent
+# โโโ ultralytics
+# โโโ datasets
+# โโโ dog-pose โ downloads here (337 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/dog-pose # dataset root dir
+train: train # train images (relative to 'path') 6773 images
+val: val # val images (relative to 'path') 1703 images
+
+# Keypoints
+kpt_shape: [24, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+
+# Classes
+names:
+ 0: dog
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/assets/releases/download/v0.0.0/dog-pose.zip
diff --git a/ultralytics/cfg/datasets/dota8.yaml b/ultralytics/cfg/datasets/dota8.yaml
index a4dbe61ca47..486d9e2effb 100644
--- a/ultralytics/cfg/datasets/dota8.yaml
+++ b/ultralytics/cfg/datasets/dota8.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
# Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
diff --git a/ultralytics/cfg/datasets/hand-keypoints.yaml b/ultralytics/cfg/datasets/hand-keypoints.yaml
index 475a7c01379..6d2f765c789 100644
--- a/ultralytics/cfg/datasets/hand-keypoints.yaml
+++ b/ultralytics/cfg/datasets/hand-keypoints.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Hand Keypoints dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/pose/hand-keypoints/
# Example usage: yolo train data=hand-keypoints.yaml
diff --git a/ultralytics/cfg/datasets/lvis.yaml b/ultralytics/cfg/datasets/lvis.yaml
index 9a79bde621b..22030ac9079 100644
--- a/ultralytics/cfg/datasets/lvis.yaml
+++ b/ultralytics/cfg/datasets/lvis.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# LVIS dataset http://www.lvisdataset.org by Facebook AI Research.
# Documentation: https://docs.ultralytics.com/datasets/detect/lvis/
# Example usage: yolo train data=lvis.yaml
@@ -11,7 +12,7 @@
path: ../datasets/lvis # dataset root dir
train: train.txt # train images (relative to 'path') 100170 images
val: val.txt # val images (relative to 'path') 19809 images
-minival: minival.txt # minval images (relative to 'path') 5000 images
+minival: minival.txt # minival images (relative to 'path') 5000 images
names:
0: aerosol can/spray can
diff --git a/ultralytics/cfg/datasets/medical-pills.yaml b/ultralytics/cfg/datasets/medical-pills.yaml
new file mode 100644
index 00000000000..25507c8b9be
--- /dev/null
+++ b/ultralytics/cfg/datasets/medical-pills.yaml
@@ -0,0 +1,22 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Medical-pills dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/medical-pills/
+# Example usage: yolo train data=medical-pills.yaml
+# parent
+# โโโ ultralytics
+# โโโ datasets
+# โโโ medical-pills โ downloads here (8.19 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/medical-pills # dataset root dir
+train: train/images # train images (relative to 'path') 92 images
+val: valid/images # val images (relative to 'path') 23 images
+test: # test images (relative to 'path')
+
+# Classes
+names:
+ 0: pill
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/assets/releases/download/v0.0.0/medical-pills.zip
diff --git a/ultralytics/cfg/datasets/open-images-v7.yaml b/ultralytics/cfg/datasets/open-images-v7.yaml
index d9cad9f1d76..6bd4e0bdcf5 100644
--- a/ultralytics/cfg/datasets/open-images-v7.yaml
+++ b/ultralytics/cfg/datasets/open-images-v7.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
# Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/
# Example usage: yolo train data=open-images-v7.yaml
diff --git a/ultralytics/cfg/datasets/package-seg.yaml b/ultralytics/cfg/datasets/package-seg.yaml
index 6c2a6b60bab..433ca04c7fe 100644
--- a/ultralytics/cfg/datasets/package-seg.yaml
+++ b/ultralytics/cfg/datasets/package-seg.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Package-seg dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/
# Example usage: yolo train data=package-seg.yaml
@@ -9,8 +10,8 @@
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/package-seg # dataset root dir
-train: images/train # train images (relative to 'path') 1920 images
-val: images/val # val images (relative to 'path') 89 images
+train: train/images # train images (relative to 'path') 1920 images
+val: valid/images # val images (relative to 'path') 89 images
test: test/images # test images (relative to 'path') 188 images
# Classes
diff --git a/ultralytics/cfg/datasets/signature.yaml b/ultralytics/cfg/datasets/signature.yaml
index d838fd7872d..5c9d5c338e9 100644
--- a/ultralytics/cfg/datasets/signature.yaml
+++ b/ultralytics/cfg/datasets/signature.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Signature dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/signature/
# Example usage: yolo train data=signature.yaml
diff --git a/ultralytics/cfg/datasets/tiger-pose.yaml b/ultralytics/cfg/datasets/tiger-pose.yaml
index dbcda757780..2b3f7b71761 100644
--- a/ultralytics/cfg/datasets/tiger-pose.yaml
+++ b/ultralytics/cfg/datasets/tiger-pose.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# Tiger Pose dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
# Example usage: yolo train data=tiger-pose.yaml
diff --git a/ultralytics/cfg/datasets/xView.yaml b/ultralytics/cfg/datasets/xView.yaml
index d2e957ad5e7..ccef985974b 100644
--- a/ultralytics/cfg/datasets/xView.yaml
+++ b/ultralytics/cfg/datasets/xView.yaml
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
# -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
# Documentation: https://docs.ultralytics.com/datasets/detect/xview/
diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml
index ac21e1114c4..0c7d69794cc 100644
--- a/ultralytics/cfg/default.yaml
+++ b/ultralytics/cfg/default.yaml
@@ -1,7 +1,9 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# Default training settings and hyperparameters for medium-augmentation COCO training
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
+# Global configuration YAML with settings and hyperparameters for YOLO training, validation, prediction and export
+# For documentation see https://docs.ultralytics.com/usage/cfg/
+
+task: detect # (str) YOLO task, i.e. detect, segment, classify, pose, obb
mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
# Train settings -------------------------------------------------------------------------------------------------------
@@ -36,7 +38,7 @@ profile: False # (bool) profile ONNX and TensorRT speeds during training for log
freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
multi_scale: False # (bool) Whether to use multiscale during training
# Segmentation
-overlap_mask: True # (bool) masks should overlap during training (segment train only)
+overlap_mask: True # (bool) merge object masks into a single image mask during training (segment train only)
mask_ratio: 4 # (int) mask downsample ratio (segment train only)
# Classification
dropout: 0.0 # (float) use dropout regularization (classify train only)
@@ -84,7 +86,7 @@ int8: False # (bool) CoreML/TF INT8 quantization
dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
simplify: True # (bool) ONNX: simplify model using `onnxslim`
opset: # (int, optional) ONNX: opset version
-workspace: 4 # (int) TensorRT: workspace size (GB)
+workspace: None # (float, optional) TensorRT: workspace size (GiB), `None` will let TensorRT auto-allocate memory
nms: False # (bool) CoreML: add NMS
separate_outputs: False # export model with 6 outputs - no concatenation
export_hw_optimized: False # optimize c2f block for faster inference on some hardware
@@ -105,7 +107,6 @@ cls: 0.5 # (float) cls loss gain (scale with pixels)
dfl: 1.5 # (float) dfl loss gain
pose: 12.0 # (float) pose loss gain
kobj: 1.0 # (float) keypoint obj loss gain
-label_smoothing: 0.0 # (float) label smoothing (fraction)
nbs: 64 # (int) nominal batch size
hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
diff --git a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
new file mode 100644
index 00000000000..e2fbcfac106
--- /dev/null
+++ b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
@@ -0,0 +1,17 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLO11-cls image classification model with ResNet18 backbone
+# Model docs: https://docs.ultralytics.com/models/yolo11
+# Task docs: https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 10 # number of classes
+
+# ResNet18 backbone
+backbone:
+ # [from, repeats, module, args]
+ - [-1, 1, TorchVision, [512, resnet18, DEFAULT, True, 2]] # truncate two layers from the end
+
+# YOLO11n head
+head:
+ - [-1, 1, Classify, [nc]] # Classify
diff --git a/ultralytics/cfg/models/11/yolo11-cls.yaml b/ultralytics/cfg/models/11/yolo11-cls.yaml
index ea21e7922f8..7a6457c6d6d 100644
--- a/ultralytics/cfg/models/11/yolo11-cls.yaml
+++ b/ultralytics/cfg/models/11/yolo11-cls.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLO11-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLO11-cls image classification model
+# Model docs: https://docs.ultralytics.com/models/yolo11
+# Task docs: https://docs.ultralytics.com/tasks/classify
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/11/yolo11-obb.yaml b/ultralytics/cfg/models/11/yolo11-obb.yaml
index 5540ed753d5..8625c7cfdac 100644
--- a/ultralytics/cfg/models/11/yolo11-obb.yaml
+++ b/ultralytics/cfg/models/11/yolo11-obb.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLO11 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/obb
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLO11-obb Oriented Bounding Boxes (OBB) model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolo11
+# Task docs: https://docs.ultralytics.com/tasks/obb
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/11/yolo11-pose.yaml b/ultralytics/cfg/models/11/yolo11-pose.yaml
index a744a33b6be..7470edac2fa 100644
--- a/ultralytics/cfg/models/11/yolo11-pose.yaml
+++ b/ultralytics/cfg/models/11/yolo11-pose.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLO11-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLO11-pose keypoints/pose estimation model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolo11
+# Task docs: https://docs.ultralytics.com/tasks/pose
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/11/yolo11-seg.yaml b/ultralytics/cfg/models/11/yolo11-seg.yaml
index 0f02d96c063..a569f4af84d 100644
--- a/ultralytics/cfg/models/11/yolo11-seg.yaml
+++ b/ultralytics/cfg/models/11/yolo11-seg.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLO11-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLO11-seg instance segmentation model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolo11
+# Task docs: https://docs.ultralytics.com/tasks/segment
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/11/yolo11.yaml b/ultralytics/cfg/models/11/yolo11.yaml
index 8d06a129912..409465a1bb7 100644
--- a/ultralytics/cfg/models/11/yolo11.yaml
+++ b/ultralytics/cfg/models/11/yolo11.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLO11 object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolo11
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/README.md b/ultralytics/cfg/models/README.md
index bcaf8deda41..68a9238384e 100644
--- a/ultralytics/cfg/models/README.md
+++ b/ultralytics/cfg/models/README.md
@@ -11,8 +11,8 @@ To get started, simply browse through the models in this directory and find one
Model `*.yaml` files may be used directly in the [Command Line Interface (CLI)](https://docs.ultralytics.com/usage/cli/) with a `yolo` command:
```bash
-# Train a YOLOv8n model using the coco8 dataset for 100 epochs
-yolo task=detect mode=train model=yolov8n.yaml data=coco8.yaml epochs=100
+# Train a YOLO11n model using the coco8 dataset for 100 epochs
+yolo task=detect mode=train model=yolo11n.yaml data=coco8.yaml epochs=100
```
They may also be used directly in a Python environment, and accept the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
@@ -20,7 +20,7 @@ They may also be used directly in a Python environment, and accept the same [arg
```python
from ultralytics import YOLO
-# Initialize a YOLOv8n model from a YAML configuration file
+# Initialize a YOLO11n model from a YAML configuration file
model = YOLO("model.yaml")
# If a pre-trained model is available, use it instead
diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
index c6eb0b3eaf1..d8d6b4f410b 100644
--- a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics RT-DETR-l hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/rtdetr
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
index a68bb5ddae1..b13e94512bd 100644
--- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics RT-DETR-ResNet101 hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/rtdetr
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
index 7145910417a..8172ad4ed4c 100644
--- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics RT-DETR-ResNet50 hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/rtdetr
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
index 0e819b0a06d..f9c4a19c8ab 100644
--- a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics RT-DETR-x hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/rtdetr
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v10/yolov10b.yaml b/ultralytics/cfg/models/v10/yolov10b.yaml
index 57e0462959b..750379128cc 100644
--- a/ultralytics/cfg/models/v10/yolov10b.yaml
+++ b/ultralytics/cfg/models/v10/yolov10b.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv10b object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov10
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v10/yolov10l.yaml b/ultralytics/cfg/models/v10/yolov10l.yaml
index add507aed3b..1dedd752e23 100644
--- a/ultralytics/cfg/models/v10/yolov10l.yaml
+++ b/ultralytics/cfg/models/v10/yolov10l.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv10l object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov10
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v10/yolov10m.yaml b/ultralytics/cfg/models/v10/yolov10m.yaml
index 2712dfab008..6ba4020b330 100644
--- a/ultralytics/cfg/models/v10/yolov10m.yaml
+++ b/ultralytics/cfg/models/v10/yolov10m.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv10m object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov10
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v10/yolov10n.yaml b/ultralytics/cfg/models/v10/yolov10n.yaml
index 094f1384a00..a9aa7018950 100644
--- a/ultralytics/cfg/models/v10/yolov10n.yaml
+++ b/ultralytics/cfg/models/v10/yolov10n.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv10n object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov10
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v10/yolov10s.yaml b/ultralytics/cfg/models/v10/yolov10s.yaml
index b80f02bb857..dbb678b277d 100644
--- a/ultralytics/cfg/models/v10/yolov10s.yaml
+++ b/ultralytics/cfg/models/v10/yolov10s.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv10s object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov10
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v10/yolov10x.yaml b/ultralytics/cfg/models/v10/yolov10x.yaml
index 2e5d98a97ce..57482133863 100644
--- a/ultralytics/cfg/models/v10/yolov10x.yaml
+++ b/ultralytics/cfg/models/v10/yolov10x.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv10x object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov10
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v3/yolov3-spp.yaml b/ultralytics/cfg/models/v3/yolov3-spp.yaml
index 6724f4e9f12..6aef25ab748 100644
--- a/ultralytics/cfg/models/v3/yolov3-spp.yaml
+++ b/ultralytics/cfg/models/v3/yolov3-spp.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv3-SPP object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov3
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v3/yolov3-tiny.yaml b/ultralytics/cfg/models/v3/yolov3-tiny.yaml
index f3fe257806b..91a0bb03f7d 100644
--- a/ultralytics/cfg/models/v3/yolov3-tiny.yaml
+++ b/ultralytics/cfg/models/v3/yolov3-tiny.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv3-tiiny object detection model with P4/16 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov3
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v3/yolov3.yaml b/ultralytics/cfg/models/v3/yolov3.yaml
index 716866a97cc..95c99de52be 100644
--- a/ultralytics/cfg/models/v3/yolov3.yaml
+++ b/ultralytics/cfg/models/v3/yolov3.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv3 object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov3
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v5/yolov5-p6.yaml b/ultralytics/cfg/models/v5/yolov5-p6.yaml
index 2fd3ac71baa..376d1aba90c 100644
--- a/ultralytics/cfg/models/v5/yolov5-p6.yaml
+++ b/ultralytics/cfg/models/v5/yolov5-p6.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv5 object detection model with P3/8 - P6/64 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov5
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v5/yolov5-relu6.yaml b/ultralytics/cfg/models/v5/yolov5-relu6.yaml
index e8d811d431f..175ff7ef723 100644
--- a/ultralytics/cfg/models/v5/yolov5-relu6.yaml
+++ b/ultralytics/cfg/models/v5/yolov5-relu6.yaml
@@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call
l: [1.00, 1.00, 1024]
x: [1.33, 1.25, 1024]
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv5 v6.0 backbone
backbone:
diff --git a/ultralytics/cfg/models/v5/yolov5.yaml b/ultralytics/cfg/models/v5/yolov5.yaml
index 8fdc79ebf73..76a4749ae4f 100644
--- a/ultralytics/cfg/models/v5/yolov5.yaml
+++ b/ultralytics/cfg/models/v5/yolov5.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv5 object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov5
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v6/yolov6.yaml b/ultralytics/cfg/models/v6/yolov6.yaml
index f39dfb49274..4a45224e570 100644
--- a/ultralytics/cfg/models/v6/yolov6.yaml
+++ b/ultralytics/cfg/models/v6/yolov6.yaml
@@ -1,9 +1,12 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Meituan YOLOv6 object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov6
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
-activation: nn.ReLU() # (optional) model default activation function
+activation: torch.nn.ReLU() # (optional) model default activation function
scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
diff --git a/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml b/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml
index fcf111497df..c45d0f30da3 100644
--- a/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml
+++ b/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml
@@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will c
l: [1.00, 1.00, 1024]
x: [1.00, 1.25, 1024]
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv8.0n backbone
backbone:
diff --git a/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml b/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml
index 1aaca1caec7..28f7d01753b 100644
--- a/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml
+++ b/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml
@@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-regress.yaml' wi
l: [1.00, 1.00, 1024]
x: [1.00, 1.25, 1024]
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv8.0n backbone
backbone:
diff --git a/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml b/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml
index 892fa644549..8cc2cb52730 100644
--- a/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml
+++ b/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml
@@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-regress6.yaml' w
l: [1.00, 1.00, 1024]
x: [1.00, 1.25, 1024]
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv8.0n backbone
backbone:
diff --git a/ultralytics/cfg/models/v8/relu6-yolov8.yaml b/ultralytics/cfg/models/v8/relu6-yolov8.yaml
index 397525a2e4e..aa0e67153dc 100644
--- a/ultralytics/cfg/models/v8/relu6-yolov8.yaml
+++ b/ultralytics/cfg/models/v8/relu6-yolov8.yaml
@@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv8.0n backbone
backbone:
diff --git a/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml b/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
index 6867f88848f..44cc00ebf22 100644
--- a/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-cls image classification model with ResNet101 backbone
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/classify
# Parameters
nc: 1000 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml b/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
index 8ffd111f02f..1d05e0753fc 100644
--- a/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-cls image classification model with ResNet50 backbone
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/classify
# Parameters
nc: 1000 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-cls.yaml b/ultralytics/cfg/models/v8/yolov8-cls.yaml
index 180fc65a59f..e346e5e1b76 100644
--- a/ultralytics/cfg/models/v8/yolov8-cls.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-cls image classification model with YOLO backbone
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/classify
# Parameters
nc: 1000 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml b/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
index aee209349c9..a98f23837bf 100644
--- a/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
@@ -1,5 +1,9 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8 object detection model with P2/4 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/detect
+# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml b/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
index b35f4cdb6a9..956c2f0ad66 100644
--- a/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
@@ -1,5 +1,9 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8 object detection model with P3/8 - P6/64 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/detect
+# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-ghost.yaml b/ultralytics/cfg/models/v8/yolov8-ghost.yaml
index adc180232b4..5888fb39bd0 100644
--- a/ultralytics/cfg/models/v8/yolov8-ghost.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8 object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
# Parameters
diff --git a/ultralytics/cfg/models/v8/yolov8-obb.yaml b/ultralytics/cfg/models/v8/yolov8-obb.yaml
index 7a7f60caef1..909324c5bec 100644
--- a/ultralytics/cfg/models/v8/yolov8-obb.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-obb.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-obb Oriented Bounding Boxes (OBB) model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/obb
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-p2.yaml b/ultralytics/cfg/models/v8/yolov8-p2.yaml
index 5392774bb55..676bc8348c4 100644
--- a/ultralytics/cfg/models/v8/yolov8-p2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-p2.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8 object detection model with P2/4 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-p6.yaml b/ultralytics/cfg/models/v8/yolov8-p6.yaml
index 6a76612a2ae..3fde34981f8 100644
--- a/ultralytics/cfg/models/v8/yolov8-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-p6.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8 object detection model with P3/8 - P6/64 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
index 60007ace158..447a21aab07 100644
--- a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-pose keypoints/pose estimation model with P3/8 - P6/64 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/pose
# Parameters
nc: 1 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml b/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml
index 90a41181b1d..74617dcc2ac 100644
--- a/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml
@@ -12,7 +12,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will
l: [1.00, 1.00, 512]
x: [1.00, 1.25, 512]
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv8.0n backbone
backbone:
diff --git a/ultralytics/cfg/models/v8/yolov8-pose.yaml b/ultralytics/cfg/models/v8/yolov8-pose.yaml
index 60388ef595a..c22bc435b57 100644
--- a/ultralytics/cfg/models/v8/yolov8-pose.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-pose keypoints/pose estimation model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/pose
# Parameters
nc: 1 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-relu6.yaml b/ultralytics/cfg/models/v8/yolov8-relu6.yaml
index e11c4a05e48..76c85284862 100644
--- a/ultralytics/cfg/models/v8/yolov8-relu6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-relu6.yaml
@@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
-activation: nn.ReLU6()
+activation: torch.nn.ReLU6()
# YOLOv8.0n backbone
backbone:
diff --git a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
index 27b790b1074..50ec129ac18 100644
--- a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-RTDETR hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/rtdetr
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
index 78c0444c8b6..4c7ba9bf4dd 100644
--- a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-seg instance segmentation model with P3/8 - P6/64 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/segment
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-seg.yaml b/ultralytics/cfg/models/v8/yolov8-seg.yaml
index 700b7951de5..52b1c7e9aed 100644
--- a/ultralytics/cfg/models/v8/yolov8-seg.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-seg.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-seg instance segmentation model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/segment
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-world.yaml b/ultralytics/cfg/models/v8/yolov8-world.yaml
index c21a7f00205..3c92e824ce6 100644
--- a/ultralytics/cfg/models/v8/yolov8-world.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-world.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-World hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolo-world
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8-worldv2.yaml b/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
index 322b97d4b98..c6aaa277331 100644
--- a/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8-Worldv2 hybrid object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolo-world
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v8/yolov8.yaml b/ultralytics/cfg/models/v8/yolov8.yaml
index b328e98a14a..c7b9938ec34 100644
--- a/ultralytics/cfg/models/v8/yolov8.yaml
+++ b/ultralytics/cfg/models/v8/yolov8.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Ultralytics YOLOv8 object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov8
+# Task docs: https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
diff --git a/ultralytics/cfg/models/v9/yolov9c-seg.yaml b/ultralytics/cfg/models/v9/yolov9c-seg.yaml
index 48b0ad7289d..14122cb8393 100644
--- a/ultralytics/cfg/models/v9/yolov9c-seg.yaml
+++ b/ultralytics/cfg/models/v9/yolov9c-seg.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9c-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9c-seg instance segmentation model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/segment
# 654 layers, 27897120 parameters, 159.4 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/models/v9/yolov9c.yaml b/ultralytics/cfg/models/v9/yolov9c.yaml
index 4ea8997d420..4fc1fcd13fd 100644
--- a/ultralytics/cfg/models/v9/yolov9c.yaml
+++ b/ultralytics/cfg/models/v9/yolov9c.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9c object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9c object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/detect
# 618 layers, 25590912 parameters, 104.0 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/models/v9/yolov9e-seg.yaml b/ultralytics/cfg/models/v9/yolov9e-seg.yaml
index b149b4e2e20..4361daac293 100644
--- a/ultralytics/cfg/models/v9/yolov9e-seg.yaml
+++ b/ultralytics/cfg/models/v9/yolov9e-seg.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9e-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9e-seg instance segmentation model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/segment
# 1261 layers, 60512800 parameters, 248.4 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/models/v9/yolov9e.yaml b/ultralytics/cfg/models/v9/yolov9e.yaml
index c90843e093e..bba5597d0cf 100644
--- a/ultralytics/cfg/models/v9/yolov9e.yaml
+++ b/ultralytics/cfg/models/v9/yolov9e.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9e object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9e object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/detect
# 1225 layers, 58206592 parameters, 193.0 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/models/v9/yolov9m.yaml b/ultralytics/cfg/models/v9/yolov9m.yaml
index fd2c49ecbd4..89bed65bebb 100644
--- a/ultralytics/cfg/models/v9/yolov9m.yaml
+++ b/ultralytics/cfg/models/v9/yolov9m.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9m object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9m object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/detect
# 603 layers, 20216160 parameters, 77.9 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/models/v9/yolov9s.yaml b/ultralytics/cfg/models/v9/yolov9s.yaml
index d86eb0d938d..28891f4cebc 100644
--- a/ultralytics/cfg/models/v9/yolov9s.yaml
+++ b/ultralytics/cfg/models/v9/yolov9s.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9s object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9s object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/detect
# 917 layers, 7318368 parameters, 27.6 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/models/v9/yolov9t.yaml b/ultralytics/cfg/models/v9/yolov9t.yaml
index 0ce5f95e74f..21a5bad86b9 100644
--- a/ultralytics/cfg/models/v9/yolov9t.yaml
+++ b/ultralytics/cfg/models/v9/yolov9t.yaml
@@ -1,5 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# YOLOv9t object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# YOLOv9t object detection model with P3/8 - P5/32 outputs
+# Model docs: https://docs.ultralytics.com/models/yolov9
+# Task docs: https://docs.ultralytics.com/tasks/detect
# 917 layers, 2128720 parameters, 8.5 GFLOPs
# Parameters
diff --git a/ultralytics/cfg/solutions/default.yaml b/ultralytics/cfg/solutions/default.yaml
new file mode 100644
index 00000000000..a4afb49b324
--- /dev/null
+++ b/ultralytics/cfg/solutions/default.yaml
@@ -0,0 +1,24 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Global configuration YAML with settings and arguments for Ultralytics Solutions
+# For documentation see https://docs.ultralytics.com/solutions/
+
+# Object counting settings --------------------------------------------------------------------------------------------
+region: # list[tuple[int, int]] object counting, queue or speed estimation region points.
+show_in: True # (bool) flag to display objects moving *into* the defined region
+show_out: True # (bool) flag to display objects moving *out of* the defined region
+
+# Heatmaps settings ----------------------------------------------------------------------------------------------------
+colormap: # (int | str) colormap for heatmap, Only OPENCV supported colormaps can be used.
+
+# Workouts monitoring settings -----------------------------------------------------------------------------------------
+up_angle: 145.0 # (float) Workouts up_angle for counts, 145.0 is default value.
+down_angle: 90 # (float) Workouts down_angle for counts, 90 is default value. Y
+kpts: [6, 8, 10] # (list[int]) keypoints for workouts monitoring, i.e. for push-ups kpts have values of [6, 8, 10].
+
+# Analytics settings ---------------------------------------------------------------------------------------------------
+analytics_type: "line" # (str) analytics type i.e "line", "pie", "bar" or "area" charts.
+json_file: # (str) parking system regions file path.
+
+# Security alarm system settings ---------------------------------------------------------------------------------------
+records: 5 # (int) Total detections count to send an email about security
diff --git a/ultralytics/cfg/trackers/botsort.yaml b/ultralytics/cfg/trackers/botsort.yaml
index 01cebb64789..aedcee4860f 100644
--- a/ultralytics/cfg/trackers/botsort.yaml
+++ b/ultralytics/cfg/trackers/botsort.yaml
@@ -1,10 +1,13 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Default Ultralytics settings for BoT-SORT tracker when using mode="track"
+# For documentation and examples see https://docs.ultralytics.com/modes/track/
+# For BoT-SORT source code see https://github.com/NirAharon/BoT-SORT
tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
-track_high_thresh: 0.5 # threshold for the first association
+track_high_thresh: 0.25 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
-new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+new_track_thresh: 0.25 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
fuse_score: True # Whether to fuse confidence scores with the iou distances before matching
diff --git a/ultralytics/cfg/trackers/bytetrack.yaml b/ultralytics/cfg/trackers/bytetrack.yaml
index 49ab3f697bb..62071a3022d 100644
--- a/ultralytics/cfg/trackers/bytetrack.yaml
+++ b/ultralytics/cfg/trackers/bytetrack.yaml
@@ -1,10 +1,13 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+# Default Ultralytics settings for ByteTrack tracker when using mode="track"
+# For documentation and examples see https://docs.ultralytics.com/modes/track/
+# For ByteTrack source code see https://github.com/ifzhang/ByteTrack
tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
-track_high_thresh: 0.5 # threshold for the first association
+track_high_thresh: 0.25 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
-new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+new_track_thresh: 0.25 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
fuse_score: True # Whether to fuse confidence scores with the iou distances before matching
diff --git a/ultralytics/data/__init__.py b/ultralytics/data/__init__.py
index daab4875254..7e88dc9982d 100644
--- a/ultralytics/data/__init__.py
+++ b/ultralytics/data/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .base import BaseDataset
from .build import build_dataloader, build_grounding, build_yolo_dataset, load_inference_source
diff --git a/ultralytics/data/annotator.py b/ultralytics/data/annotator.py
index 5cb0058dcb1..982e5de5ab1 100644
--- a/ultralytics/data/annotator.py
+++ b/ultralytics/data/annotator.py
@@ -1,11 +1,22 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from pathlib import Path
from ultralytics import SAM, YOLO
-def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
+def auto_annotate(
+ data,
+ det_model="yolo11x.pt",
+ sam_model="sam_b.pt",
+ device="",
+ conf=0.25,
+ iou=0.45,
+ imgsz=640,
+ max_det=300,
+ classes=None,
+ output_dir=None,
+):
"""
Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
@@ -17,11 +28,16 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="",
det_model (str): Path or name of the pre-trained YOLO detection model.
sam_model (str): Path or name of the pre-trained SAM segmentation model.
device (str): Device to run the models on (e.g., 'cpu', 'cuda', '0').
+ conf (float): Confidence threshold for detection model; default is 0.25.
+ iou (float): IoU threshold for filtering overlapping boxes in detection results; default is 0.45.
+ imgsz (int): Input image resize dimension; default is 640.
+ max_det (int): Limits detections per image to control outputs in dense scenes.
+ classes (list): Filters predictions to specified class IDs, returning only relevant detections.
output_dir (str | None): Directory to save the annotated results. If None, a default directory is created.
Examples:
>>> from ultralytics.data.annotator import auto_annotate
- >>> auto_annotate(data="ultralytics/assets", det_model="yolov8n.pt", sam_model="mobile_sam.pt")
+ >>> auto_annotate(data="ultralytics/assets", det_model="yolo11n.pt", sam_model="mobile_sam.pt")
Notes:
- The function creates a new directory for output if not specified.
@@ -36,7 +52,9 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="",
output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
Path(output_dir).mkdir(exist_ok=True, parents=True)
- det_results = det_model(data, stream=True, device=device)
+ det_results = det_model(
+ data, stream=True, device=device, conf=conf, iou=iou, imgsz=imgsz, max_det=max_det, classes=classes
+ )
for result in det_results:
class_ids = result.boxes.cls.int().tolist() # noqa
diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py
index 49bdc92235b..1ab14a647eb 100644
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import math
import random
@@ -271,9 +271,9 @@ def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
"""
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
if isinstance(index, list):
- assert isinstance(
- value, list
- ), f"The indices should be the same type as values, but got {type(index)} and {type(value)}"
+ assert isinstance(value, list), (
+ f"The indices should be the same type as values, but got {type(index)} and {type(value)}"
+ )
if isinstance(index, int):
index, value = [index], [value]
for i, v in zip(index, value):
@@ -441,7 +441,8 @@ def get_indexes(self):
"""
raise NotImplementedError
- def _update_label_text(self, labels):
+ @staticmethod
+ def _update_label_text(labels):
"""
Updates label text and class IDs for mixed labels in image augmentation.
@@ -641,7 +642,7 @@ def _mosaic3(self, labels):
c = s - w, s + h0 - h, s, s + h0
padw, padh = c[:2]
- x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
+ x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coordinates
img3[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :] # img3[ymin:ymax, xmin:xmax]
# hp, wp = h, w # height, width previous for next iteration
@@ -770,7 +771,7 @@ def _mosaic9(self, labels):
c = s - w, s + h0 - hp - h, s, s + h0 - hp
padw, padh = c[:2]
- x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
+ x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coordinates
# Image
img9[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :] # img9[ymin:ymax, xmin:xmax]
@@ -1259,7 +1260,8 @@ def __call__(self, labels):
labels["resized_shape"] = img.shape[:2]
return labels
- def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
+ @staticmethod
+ def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
"""
Compute candidate boxes for further processing based on size and aspect ratio criteria.
@@ -1281,7 +1283,7 @@ def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-
eps (float): Small epsilon value to prevent division by zero.
Returns:
- (numpy.ndarray): Boolean array of shape (n,) indicating which boxes are candidates.
+ (numpy.ndarray): Boolean array of shape (n) indicating which boxes are candidates.
True values correspond to boxes that meet all criteria.
Examples:
@@ -1318,7 +1320,7 @@ class RandomHSV:
>>> augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
>>> image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
>>> labels = {"img": image}
- >>> augmented_labels = augmenter(labels)
+ >>> augmenter(labels)
>>> augmented_image = augmented_labels["img"]
"""
@@ -1335,7 +1337,7 @@ def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
Examples:
>>> hsv_aug = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
- >>> augmented_image = hsv_aug(image)
+ >>> hsv_aug(image)
"""
self.hgain = hgain
self.sgain = sgain
@@ -1417,7 +1419,7 @@ def __init__(self, p=0.5, direction="horizontal", flip_idx=None) -> None:
Examples:
>>> flip = RandomFlip(p=0.5, direction="horizontal")
- >>> flip = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
+ >>> flip_with_idx = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
"""
assert direction in {"horizontal", "vertical"}, f"Support direction `horizontal` or `vertical`, got {direction}"
assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}."
@@ -1591,14 +1593,15 @@ def __call__(self, labels=None, image=None):
labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation
if len(labels):
- labels = self._update_labels(labels, ratio, dw, dh)
+ labels = self._update_labels(labels, ratio, left, top)
labels["img"] = img
labels["resized_shape"] = new_shape
return labels
else:
return img
- def _update_labels(self, labels, ratio, padw, padh):
+ @staticmethod
+ def _update_labels(labels, ratio, padw, padh):
"""
Updates labels after applying letterboxing to an image.
@@ -1847,7 +1850,7 @@ def __init__(self, p=1.0):
A.CLAHE(p=0.01),
A.RandomBrightnessContrast(p=0.0),
A.RandomGamma(p=0.0),
- A.ImageCompression(quality_lower=75, p=0.0),
+ A.ImageCompression(quality_range=(75, 100), p=0.0),
]
# Compose transforms
@@ -1857,6 +1860,9 @@ def __init__(self, p=1.0):
if self.contains_spatial
else A.Compose(T)
)
+ if hasattr(self.transform, "set_random_seed"):
+ # Required for deterministic transforms in albumentations>=1.4.21
+ self.transform.set_random_seed(torch.initial_seed())
LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
except ImportError: # package not installed, skip
pass
@@ -2019,7 +2025,7 @@ def __call__(self, labels):
Returns:
(Dict): A dictionary with formatted data, including:
- 'img': Formatted image tensor.
- - 'cls': Class labels tensor.
+ - 'cls': Class label's tensor.
- 'bboxes': Bounding boxes tensor in the specified format.
- 'masks': Instance masks tensor (if return_mask is True).
- 'keypoints': Keypoints tensor (if return_keypoint is True).
@@ -2111,10 +2117,9 @@ def _format_segments(self, instances, cls, w, h):
h (int): Height of the image.
Returns:
- (tuple): Tuple containing:
- masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
- instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
- cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
+ masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
+ instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
+ cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
Notes:
- If self.mask_overlap is True, masks are overlapped and sorted by area.
@@ -2280,7 +2285,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
Args:
dataset (Dataset): The dataset object containing image data and annotations.
imgsz (int): The target image size for resizing.
- hyp (Dict): A dictionary of hyperparameters controlling various aspects of the transformations.
+ hyp (Namespace): A dictionary of hyperparameters controlling various aspects of the transformations.
stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing.
Returns:
@@ -2288,8 +2293,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
Examples:
>>> from ultralytics.data.dataset import YOLODataset
+ >>> from ultralytics.utils import IterableSimpleNamespace
>>> dataset = YOLODataset(img_path="path/to/images", imgsz=640)
- >>> hyp = {"mosaic": 1.0, "copy_paste": 0.5, "degrees": 10.0, "translate": 0.2, "scale": 0.9}
+ >>> hyp = IterableSimpleNamespace(mosaic=1.0, copy_paste=0.5, degrees=10.0, translate=0.2, scale=0.9)
>>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp)
>>> augmented_data = transforms(dataset[0])
"""
diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py
index f18c2d54dce..446b4ecf88b 100644
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import glob
import math
@@ -90,13 +90,15 @@ def __init__(
self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
self.cache = cache.lower() if isinstance(cache, str) else "ram" if cache is True else None
- if (self.cache == "ram" and self.check_cache_ram()) or self.cache == "disk":
- if self.cache == "ram" and hyp.deterministic:
+ if self.cache == "ram" and self.check_cache_ram():
+ if hyp.deterministic:
LOGGER.warning(
"WARNING โ ๏ธ cache='ram' may produce non-deterministic training results. "
"Consider cache='disk' as a deterministic alternative if your disk space allows."
)
self.cache_images()
+ elif self.cache == "disk" and self.check_cache_disk():
+ self.cache_images()
# Transforms
self.transforms = self.build_transforms(hyp=hyp)
@@ -206,25 +208,55 @@ def cache_images_to_disk(self, i):
if not f.exists():
np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
+ def check_cache_disk(self, safety_margin=0.5):
+ """Check image caching requirements vs available disk space."""
+ import shutil
+
+ b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
+ n = min(self.ni, 30) # extrapolate from 30 random images
+ for _ in range(n):
+ im_file = random.choice(self.im_files)
+ im = cv2.imread(im_file)
+ if im is None:
+ continue
+ b += im.nbytes
+ if not os.access(Path(im_file).parent, os.W_OK):
+ self.cache = None
+ LOGGER.info(f"{self.prefix}Skipping caching images to disk, directory not writeable โ ๏ธ")
+ return False
+ disk_required = b * self.ni / n * (1 + safety_margin) # bytes required to cache dataset to disk
+ total, used, free = shutil.disk_usage(Path(self.im_files[0]).parent)
+ if disk_required > free:
+ self.cache = None
+ LOGGER.info(
+ f"{self.prefix}{disk_required / gb:.1f}GB disk space required, "
+ f"with {int(safety_margin * 100)}% safety margin but only "
+ f"{free / gb:.1f}/{total / gb:.1f}GB free, not caching images to disk โ ๏ธ"
+ )
+ return False
+ return True
+
def check_cache_ram(self, safety_margin=0.5):
"""Check image caching requirements vs available memory."""
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
n = min(self.ni, 30) # extrapolate from 30 random images
for _ in range(n):
im = cv2.imread(random.choice(self.im_files)) # sample image
+ if im is None:
+ continue
ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
b += im.nbytes * ratio**2
mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM
mem = psutil.virtual_memory()
- success = mem_required < mem.available # to cache or not to cache, that is the question
- if not success:
+ if mem_required > mem.available:
self.cache = None
LOGGER.info(
f"{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images "
f"with {int(safety_margin * 100)}% safety margin but only "
f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, not caching images โ ๏ธ"
)
- return success
+ return False
+ return True
def set_rectangle(self):
"""Sets the shape of bounding boxes for YOLO detections as rectangles."""
diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py
index df3425037cd..468238308b7 100644
--- a/ultralytics/data/build.py
+++ b/ultralytics/data/build.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import os
import random
@@ -47,6 +47,18 @@ def __iter__(self):
for _ in range(len(self)):
yield next(self.iterator)
+ def __del__(self):
+ """Ensure that workers are terminated."""
+ try:
+ if not hasattr(self.iterator, "_workers"):
+ return
+ for w in self.iterator._workers: # force terminate
+ if w.is_alive():
+ w.terminate()
+ self.iterator._shutdown_workers() # cleanup
+ except Exception:
+ pass
+
def reset(self):
"""
Reset iterator.
diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py
index 03dbf0ade1e..05a316b4858 100644
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@@ -1,13 +1,18 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import json
+import random
+import shutil
from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import cv2
import numpy as np
+from PIL import Image
-from ultralytics.utils import LOGGER, TQDM
+from ultralytics.utils import DATASETS_DIR, LOGGER, NUM_THREADS, TQDM
+from ultralytics.utils.downloads import download
from ultralytics.utils.files import increment_path
@@ -236,8 +241,10 @@ def convert_coco(
```python
from ultralytics.data.converter import convert_coco
- convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=True)
- convert_coco("../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
+ convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
+ convert_coco(
+ "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
+ )
```
Output:
@@ -261,11 +268,11 @@ def convert_coco(
# since LVIS val set contains images from COCO 2017 train in addition to the COCO 2017 val split.
(fn / "train2017").mkdir(parents=True, exist_ok=True)
(fn / "val2017").mkdir(parents=True, exist_ok=True)
- with open(json_file) as f:
+ with open(json_file, encoding="utf-8") as f:
data = json.load(f)
# Create image dict
- images = {f'{x["id"]:d}': x for x in data["images"]}
+ images = {f"{x['id']:d}": x for x in data["images"]}
# Create image-annotations dict
imgToAnns = defaultdict(list)
for ann in data["annotations"]:
@@ -372,7 +379,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
"""
pixel_to_class_mapping = {i + 1: i for i in range(classes)}
for mask_path in Path(masks_dir).iterdir():
- if mask_path.suffix == ".png":
+ if mask_path.suffix in {".png", ".jpg"}:
mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) # Read the mask image in grayscale
img_height, img_width = mask.shape # Get image dimensions
LOGGER.info(f"Processing {mask_path} imgsz = {img_height} x {img_width}")
@@ -572,7 +579,7 @@ def merge_multi_segment(segments):
return s
-def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
+def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
"""
Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
@@ -582,21 +589,20 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
save_dir (str | Path): Path to save the generated labels, labels will be saved
into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
+ device (int | str): The specific device to run SAM models. Default: None.
Notes:
The input directory structure assumed for dataset:
- im_dir
โโ 001.jpg
- โโ ..
+ โโ ...
โโ NNN.jpg
- labels
โโ 001.txt
- โโ ..
+ โโ ...
โโ NNN.txt
"""
- from tqdm import tqdm
-
from ultralytics import SAM
from ultralytics.data import YOLODataset
from ultralytics.utils import LOGGER
@@ -610,7 +616,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
sam_model = SAM(sam_model)
- for label in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
+ for label in TQDM(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
h, w = label["shape"]
boxes = label["bboxes"]
if len(boxes) == 0: # skip empty labels
@@ -618,7 +624,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
boxes[:, [0, 2]] *= w
boxes[:, [1, 3]] *= h
im = cv2.imread(label["im_file"])
- sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
+ sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False, device=device)
label["segments"] = sam_results[0].masks.xyn
save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
@@ -629,9 +635,68 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
txt_file = save_dir / lb_name
cls = label["cls"]
for i, s in enumerate(label["segments"]):
+ if len(s) == 0:
+ continue
line = (int(cls[i]), *s.reshape(-1))
texts.append(("%g " * len(line)).rstrip() % line)
- if texts:
- with open(txt_file, "a") as f:
- f.writelines(text + "\n" for text in texts)
+ with open(txt_file, "a") as f:
+ f.writelines(text + "\n" for text in texts)
LOGGER.info(f"Generated segment labels saved in {save_dir}")
+
+
+def create_synthetic_coco_dataset():
+ """
+ Creates a synthetic COCO dataset with random images based on filenames from label lists.
+
+ This function downloads COCO labels, reads image filenames from label list files,
+ creates synthetic images for train2017 and val2017 subsets, and organizes
+ them in the COCO dataset structure. It uses multithreading to generate images efficiently.
+
+ Examples:
+ >>> from ultralytics.data.converter import create_synthetic_coco_dataset
+ >>> create_synthetic_coco_dataset()
+
+ Notes:
+ - Requires internet connection to download label files.
+ - Generates random RGB images of varying sizes (480x480 to 640x640 pixels).
+ - Existing test2017 directory is removed as it's not needed.
+ - Reads image filenames from train2017.txt and val2017.txt files.
+ """
+
+ def create_synthetic_image(image_file):
+ """Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
+ if not image_file.exists():
+ size = (random.randint(480, 640), random.randint(480, 640))
+ Image.new(
+ "RGB",
+ size=size,
+ color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
+ ).save(image_file)
+
+ # Download labels
+ dir = DATASETS_DIR / "coco"
+ url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/"
+ label_zip = "coco2017labels-segments.zip"
+ download([url + label_zip], dir=dir.parent)
+
+ # Create synthetic images
+ shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) # Remove test2017 directory as not needed
+ with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
+ for subset in ["train2017", "val2017"]:
+ subset_dir = dir / "images" / subset
+ subset_dir.mkdir(parents=True, exist_ok=True)
+
+ # Read image filenames from label list file
+ label_list_file = dir / f"{subset}.txt"
+ if label_list_file.exists():
+ with open(label_list_file) as f:
+ image_files = [dir / line.strip() for line in f]
+
+ # Submit all tasks
+ futures = [executor.submit(create_synthetic_image, image_file) for image_file in image_files]
+ for _ in TQDM(as_completed(futures), total=len(futures), desc=f"Generating images for {subset}"):
+ pass # The actual work is done in the background
+ else:
+ print(f"Warning: Labels file {label_list_file} does not exist. Skipping image creation for {subset}.")
+
+ print("Synthetic COCO dataset created successfully.")
diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py
index 01cffd4a20f..0db566c01ed 100644
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import contextlib
import json
from collections import defaultdict
from itertools import repeat
@@ -70,7 +69,7 @@ def cache_labels(self, path=Path("./labels.cache")):
Cache dataset labels, check images and read shapes.
Args:
- path (Path): Path where to save the cache file. Default is Path('./labels.cache').
+ path (Path): Path where to save the cache file. Default is Path("./labels.cache").
Returns:
(dict): labels.
@@ -220,8 +219,10 @@ def update_labels_info(self, label):
# NOTE: do NOT resample oriented boxes
segment_resamples = 100 if self.use_obb else 1000
if len(segments) > 0:
- # list[np.array(1000, 2)] * num_samples
- # (N, 1000, 2)
+ # make sure segments interpolate correctly if original length is greater than segment_resamples
+ max_len = max(len(s) for s in segments)
+ segment_resamples = (max_len + 1) if segment_resamples < max_len else segment_resamples
+ # list[np.array(segment_resamples, 2)] * num_samples
segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
else:
segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
@@ -299,7 +300,7 @@ def get_labels(self):
LOGGER.info("Loading annotation file...")
with open(self.json_file) as f:
annotations = json.load(f)
- images = {f'{x["id"]:d}': x for x in annotations["images"]}
+ images = {f"{x['id']:d}": x for x in annotations["images"]}
img_to_anns = defaultdict(list)
for ann in annotations["annotations"]:
img_to_anns[ann["image_id"]].append(ann)
@@ -323,7 +324,8 @@ def get_labels(self):
if box[2] <= 0 or box[3] <= 0:
continue
- cat_name = " ".join([img["caption"][t[0] : t[1]] for t in ann["tokens_positive"]])
+ caption = img["caption"]
+ cat_name = " ".join([caption[t[0] : t[1]] for t in ann["tokens_positive"]])
if cat_name not in cat2id:
cat2id[cat_name] = len(cat2id)
texts.append([cat_name])
@@ -484,7 +486,7 @@ def verify_images(self):
desc = f"{self.prefix}Scanning {self.root}..."
path = Path(self.root).with_suffix(".cache") # *.cache file path
- with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
+ try:
cache = load_dataset_cache_file(path) # attempt to load a *.cache file
assert cache["version"] == DATASET_CACHE_VERSION # matches current version
assert cache["hash"] == get_hash([x[0] for x in self.samples]) # identical hash
@@ -496,27 +498,29 @@ def verify_images(self):
LOGGER.info("\n".join(cache["msgs"])) # display warnings
return samples
- # Run scan if *.cache retrieval failed
- nf, nc, msgs, samples, x = 0, 0, [], [], {}
- with ThreadPool(NUM_THREADS) as pool:
- results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
- pbar = TQDM(results, desc=desc, total=len(self.samples))
- for sample, nf_f, nc_f, msg in pbar:
- if nf_f:
- samples.append(sample)
- if msg:
- msgs.append(msg)
- nf += nf_f
- nc += nc_f
- pbar.desc = f"{desc} {nf} images, {nc} corrupt"
- pbar.close()
- if msgs:
- LOGGER.info("\n".join(msgs))
- x["hash"] = get_hash([x[0] for x in self.samples])
- x["results"] = nf, nc, len(samples), samples
- x["msgs"] = msgs # warnings
- save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
- return samples
+ except (FileNotFoundError, AssertionError, AttributeError):
+ # Run scan if *.cache retrieval failed
+ nf, nc, msgs, samples, x = 0, 0, [], [], {}
+ with ThreadPool(NUM_THREADS) as pool:
+ results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
+ pbar = TQDM(results, desc=desc, total=len(self.samples))
+ for sample, nf_f, nc_f, msg in pbar:
+ if nf_f:
+ samples.append(sample)
+ if msg:
+ msgs.append(msg)
+ nf += nf_f
+ nc += nc_f
+ pbar.desc = f"{desc} {nf} images, {nc} corrupt"
+ pbar.close()
+ if msgs:
+ LOGGER.info("\n".join(msgs))
+ x["hash"] = get_hash([x[0] for x in self.samples])
+ x["results"] = nf, nc, len(samples), samples
+ x["msgs"] = msgs # warnings
+ save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
+ return samples
+
# Regression dataloaders -------------------------------------------------------------------------------------------
class RegressionDataset:
@@ -648,4 +652,4 @@ def verify_images(self):
x["results"] = nf, nc, len(samples), samples
x["msgs"] = msgs # warnings
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
- return samples
\ No newline at end of file
+ return samples
diff --git a/ultralytics/data/explorer/__init__.py b/ultralytics/data/explorer/__init__.py
deleted file mode 100644
index ce594dc1fd5..00000000000
--- a/ultralytics/data/explorer/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-from .utils import plot_query_result
-
-__all__ = ["plot_query_result"]
diff --git a/ultralytics/data/explorer/explorer.py b/ultralytics/data/explorer/explorer.py
deleted file mode 100644
index 0407c1a288a..00000000000
--- a/ultralytics/data/explorer/explorer.py
+++ /dev/null
@@ -1,460 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-from io import BytesIO
-from pathlib import Path
-from typing import Any, List, Tuple, Union
-
-import cv2
-import numpy as np
-import torch
-from matplotlib import pyplot as plt
-from PIL import Image
-from tqdm import tqdm
-
-from ultralytics.data.augment import Format
-from ultralytics.data.dataset import YOLODataset
-from ultralytics.data.utils import check_det_dataset
-from ultralytics.models.yolo.model import YOLO
-from ultralytics.utils import LOGGER, USER_CONFIG_DIR, IterableSimpleNamespace, checks
-
-from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch
-
-
-class ExplorerDataset(YOLODataset):
- """Extends YOLODataset for advanced data exploration and manipulation in model training workflows."""
-
- def __init__(self, *args, data: dict = None, **kwargs) -> None:
- """Initializes the ExplorerDataset with the provided data arguments, extending the YOLODataset class."""
- super().__init__(*args, data=data, **kwargs)
-
- def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]:
- """Loads 1 image from dataset index 'i' without any resize ops."""
- im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
- if im is None: # not cached in RAM
- if fn.exists(): # load npy
- im = np.load(fn)
- else: # read image
- im = cv2.imread(f) # BGR
- if im is None:
- raise FileNotFoundError(f"Image Not Found {f}")
- h0, w0 = im.shape[:2] # orig hw
- return im, (h0, w0), im.shape[:2]
-
- return self.ims[i], self.im_hw0[i], self.im_hw[i]
-
- def build_transforms(self, hyp: IterableSimpleNamespace = None):
- """Creates transforms for dataset images without resizing."""
- return Format(
- bbox_format="xyxy",
- normalize=False,
- return_mask=self.use_segments,
- return_keypoint=self.use_keypoints,
- batch_idx=True,
- mask_ratio=hyp.mask_ratio,
- mask_overlap=hyp.overlap_mask,
- )
-
-
-class Explorer:
- """Utility class for image embedding, table creation, and similarity querying using LanceDB and YOLO models."""
-
- def __init__(
- self,
- data: Union[str, Path] = "coco128.yaml",
- model: str = "yolov8n.pt",
- uri: str = USER_CONFIG_DIR / "explorer",
- ) -> None:
- """Initializes the Explorer class with dataset path, model, and URI for database connection."""
- # Note duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181
- checks.check_requirements(["lancedb>=0.4.3", "duckdb<=0.9.2"])
- import lancedb
-
- self.connection = lancedb.connect(uri)
- self.table_name = f"{Path(data).name.lower()}_{model.lower()}"
- self.sim_idx_base_name = (
- f"{self.table_name}_sim_idx".lower()
- ) # Use this name and append thres and top_k to reuse the table
- self.model = YOLO(model)
- self.data = data # None
- self.choice_set = None
-
- self.table = None
- self.progress = 0
-
- def create_embeddings_table(self, force: bool = False, split: str = "train") -> None:
- """
- Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it
- already exists. Pass force=True to overwrite the existing table.
-
- Args:
- force (bool): Whether to overwrite the existing table or not. Defaults to False.
- split (str): Split of the dataset to use. Defaults to 'train'.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- ```
- """
- if self.table is not None and not force:
- LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.")
- return
- if self.table_name in self.connection.table_names() and not force:
- LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.")
- self.table = self.connection.open_table(self.table_name)
- self.progress = 1
- return
- if self.data is None:
- raise ValueError("Data must be provided to create embeddings table")
-
- data_info = check_det_dataset(self.data)
- if split not in data_info:
- raise ValueError(
- f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}"
- )
-
- choice_set = data_info[split]
- choice_set = choice_set if isinstance(choice_set, list) else [choice_set]
- self.choice_set = choice_set
- dataset = ExplorerDataset(img_path=choice_set, data=data_info, augment=False, cache=False, task=self.model.task)
-
- # Create the table schema
- batch = dataset[0]
- vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0]
- table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite")
- table.add(
- self._yield_batches(
- dataset,
- data_info,
- self.model,
- exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"],
- )
- )
-
- self.table = table
-
- def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]):
- """Generates batches of data for embedding, excluding specified keys."""
- for i in tqdm(range(len(dataset))):
- self.progress = float(i + 1) / len(dataset)
- batch = dataset[i]
- for k in exclude_keys:
- batch.pop(k, None)
- batch = sanitize_batch(batch, data_info)
- batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist()
- yield [batch]
-
- def query(
- self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25
- ) -> Any: # pyarrow.Table
- """
- Query the table for similar images. Accepts a single image or a list of images.
-
- Args:
- imgs (str or list): Path to the image or a list of paths to the images.
- limit (int): Number of results to return.
-
- Returns:
- (pyarrow.Table): An arrow table containing the results. Supports converting to:
- - pandas dataframe: `result.to_pandas()`
- - dict of lists: `result.to_pydict()`
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- similar = exp.query(img="https://ultralytics.com/images/zidane.jpg")
- ```
- """
- if self.table is None:
- raise ValueError("Table is not created. Please create the table first.")
- if isinstance(imgs, str):
- imgs = [imgs]
- assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}"
- embeds = self.model.embed(imgs)
- # Get avg if multiple images are passed (len > 1)
- embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy()
- return self.table.search(embeds).limit(limit).to_arrow()
-
- def sql_query(
- self, query: str, return_type: str = "pandas"
- ) -> Union[Any, None]: # pandas.DataFrame or pyarrow.Table
- """
- Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown.
-
- Args:
- query (str): SQL query to run.
- return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
-
- Returns:
- (pyarrow.Table): An arrow table containing the results.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
- result = exp.sql_query(query)
- ```
- """
- assert return_type in {
- "pandas",
- "arrow",
- }, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
- import duckdb
-
- if self.table is None:
- raise ValueError("Table is not created. Please create the table first.")
-
- # Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this.
- table = self.table.to_arrow() # noqa NOTE: Don't comment this. This line is used by DuckDB
- if not query.startswith("SELECT") and not query.startswith("WHERE"):
- raise ValueError(
- f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE "
- f"clause. found {query}"
- )
- if query.startswith("WHERE"):
- query = f"SELECT * FROM 'table' {query}"
- LOGGER.info(f"Running query: {query}")
-
- rs = duckdb.sql(query)
- if return_type == "arrow":
- return rs.arrow()
- elif return_type == "pandas":
- return rs.df()
-
- def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image:
- """
- Plot the results of a SQL-Like query on the table.
-
- Args:
- query (str): SQL query to run.
- labels (bool): Whether to plot the labels or not.
-
- Returns:
- (PIL.Image): Image containing the plot.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
- result = exp.plot_sql_query(query)
- ```
- """
- result = self.sql_query(query, return_type="arrow")
- if len(result) == 0:
- LOGGER.info("No results found.")
- return None
- img = plot_query_result(result, plot_labels=labels)
- return Image.fromarray(img)
-
- def get_similar(
- self,
- img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
- idx: Union[int, List[int]] = None,
- limit: int = 25,
- return_type: str = "pandas",
- ) -> Any: # pandas.DataFrame or pyarrow.Table
- """
- Query the table for similar images. Accepts a single image or a list of images.
-
- Args:
- img (str or list): Path to the image or a list of paths to the images.
- idx (int or list): Index of the image in the table or a list of indexes.
- limit (int): Number of results to return. Defaults to 25.
- return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
-
- Returns:
- (pandas.DataFrame): A dataframe containing the results.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- similar = exp.get_similar(img="https://ultralytics.com/images/zidane.jpg")
- ```
- """
- assert return_type in {"pandas", "arrow"}, f"Return type should be `pandas` or `arrow`, but got {return_type}"
- img = self._check_imgs_or_idxs(img, idx)
- similar = self.query(img, limit=limit)
-
- if return_type == "arrow":
- return similar
- elif return_type == "pandas":
- return similar.to_pandas()
-
- def plot_similar(
- self,
- img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
- idx: Union[int, List[int]] = None,
- limit: int = 25,
- labels: bool = True,
- ) -> Image.Image:
- """
- Plot the similar images. Accepts images or indexes.
-
- Args:
- img (str or list): Path to the image or a list of paths to the images.
- idx (int or list): Index of the image in the table or a list of indexes.
- labels (bool): Whether to plot the labels or not.
- limit (int): Number of results to return. Defaults to 25.
-
- Returns:
- (PIL.Image): Image containing the plot.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- similar = exp.plot_similar(img="https://ultralytics.com/images/zidane.jpg")
- ```
- """
- similar = self.get_similar(img, idx, limit, return_type="arrow")
- if len(similar) == 0:
- LOGGER.info("No results found.")
- return None
- img = plot_query_result(similar, plot_labels=labels)
- return Image.fromarray(img)
-
- def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Any: # pd.DataFrame
- """
- Calculate the similarity index of all the images in the table. Here, the index will contain the data points that
- are max_dist or closer to the image in the embedding space at a given index.
-
- Args:
- max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
- top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit.
- vector search. Defaults: None.
- force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
-
- Returns:
- (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image,
- and columns include indices of similar images and their respective distances.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- sim_idx = exp.similarity_index()
- ```
- """
- if self.table is None:
- raise ValueError("Table is not created. Please create the table first.")
- sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower()
- if sim_idx_table_name in self.connection.table_names() and not force:
- LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.")
- return self.connection.open_table(sim_idx_table_name).to_pandas()
-
- if top_k and not (1.0 >= top_k >= 0.0):
- raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}")
- if max_dist < 0.0:
- raise ValueError(f"max_dist must be greater than 0. Got {max_dist}")
-
- top_k = int(top_k * len(self.table)) if top_k else len(self.table)
- top_k = max(top_k, 1)
- features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict()
- im_files = features["im_file"]
- embeddings = features["vector"]
-
- sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite")
-
- def _yield_sim_idx():
- """Generates a dataframe with similarity indices and distances for images."""
- for i in tqdm(range(len(embeddings))):
- sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}")
- yield [
- {
- "idx": i,
- "im_file": im_files[i],
- "count": len(sim_idx),
- "sim_im_files": sim_idx["im_file"].tolist(),
- }
- ]
-
- sim_table.add(_yield_sim_idx())
- self.sim_index = sim_table
- return sim_table.to_pandas()
-
- def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image:
- """
- Plot the similarity index of all the images in the table. Here, the index will contain the data points that are
- max_dist or closer to the image in the embedding space at a given index.
-
- Args:
- max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
- top_k (float): Percentage of closest data points to consider when counting. Used to apply limit when
- running vector search. Defaults to 0.01.
- force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
-
- Returns:
- (PIL.Image): Image containing the plot.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
-
- similarity_idx_plot = exp.plot_similarity_index()
- similarity_idx_plot.show() # view image preview
- similarity_idx_plot.save("path/to/save/similarity_index_plot.png") # save contents to file
- ```
- """
- sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force)
- sim_count = sim_idx["count"].tolist()
- sim_count = np.array(sim_count)
-
- indices = np.arange(len(sim_count))
-
- # Create the bar plot
- plt.bar(indices, sim_count)
-
- # Customize the plot (optional)
- plt.xlabel("data idx")
- plt.ylabel("Count")
- plt.title("Similarity Count")
- buffer = BytesIO()
- plt.savefig(buffer, format="png")
- buffer.seek(0)
-
- # Use Pillow to open the image from the buffer
- return Image.fromarray(np.array(Image.open(buffer)))
-
- def _check_imgs_or_idxs(
- self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]]
- ) -> List[np.ndarray]:
- """Determines whether to fetch images or indexes based on provided arguments and returns image paths."""
- if img is None and idx is None:
- raise ValueError("Either img or idx must be provided.")
- if img is not None and idx is not None:
- raise ValueError("Only one of img or idx must be provided.")
- if idx is not None:
- idx = idx if isinstance(idx, list) else [idx]
- img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"]
-
- return img if isinstance(img, list) else [img]
-
- def ask_ai(self, query):
- """
- Ask AI a question.
-
- Args:
- query (str): Question to ask.
-
- Returns:
- (pandas.DataFrame): A dataframe containing filtered results to the SQL query.
-
- Example:
- ```python
- exp = Explorer()
- exp.create_embeddings_table()
- answer = exp.ask_ai("Show images with 1 person and 2 dogs")
- ```
- """
- result = prompt_sql_query(query)
- try:
- return self.sql_query(result)
- except Exception as e:
- LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
- LOGGER.error(e)
- return None
diff --git a/ultralytics/data/explorer/gui/__init__.py b/ultralytics/data/explorer/gui/__init__.py
deleted file mode 100644
index 9e68dc12245..00000000000
--- a/ultralytics/data/explorer/gui/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
diff --git a/ultralytics/data/explorer/gui/dash.py b/ultralytics/data/explorer/gui/dash.py
deleted file mode 100644
index 81f1f62a8a4..00000000000
--- a/ultralytics/data/explorer/gui/dash.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-import sys
-import time
-from threading import Thread
-
-from ultralytics import Explorer
-from ultralytics.utils import ROOT, SETTINGS
-from ultralytics.utils.checks import check_requirements
-
-check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3"))
-
-import streamlit as st
-from streamlit_select import image_select
-
-
-def _get_explorer():
- """Initializes and returns an instance of the Explorer class."""
- exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model"))
- thread = Thread(
- target=exp.create_embeddings_table,
- kwargs={"force": st.session_state.get("force_recreate_embeddings"), "split": st.session_state.get("split")},
- )
- thread.start()
- progress_bar = st.progress(0, text="Creating embeddings table...")
- while exp.progress < 1:
- time.sleep(0.1)
- progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%")
- thread.join()
- st.session_state["explorer"] = exp
- progress_bar.empty()
-
-
-def init_explorer_form(data=None, model=None):
- """Initializes an Explorer instance and creates embeddings table with progress tracking."""
- if data is None:
- datasets = ROOT / "cfg" / "datasets"
- ds = [d.name for d in datasets.glob("*.yaml")]
- else:
- ds = [data]
-
- if model is None:
- models = [
- "yolov8n.pt",
- "yolov8s.pt",
- "yolov8m.pt",
- "yolov8l.pt",
- "yolov8x.pt",
- "yolov8n-seg.pt",
- "yolov8s-seg.pt",
- "yolov8m-seg.pt",
- "yolov8l-seg.pt",
- "yolov8x-seg.pt",
- "yolov8n-pose.pt",
- "yolov8s-pose.pt",
- "yolov8m-pose.pt",
- "yolov8l-pose.pt",
- "yolov8x-pose.pt",
- ]
- else:
- models = [model]
-
- splits = ["train", "val", "test"]
-
- with st.form(key="explorer_init_form"):
- col1, col2, col3 = st.columns(3)
- with col1:
- st.selectbox("Select dataset", ds, key="dataset")
- with col2:
- st.selectbox("Select model", models, key="model")
- with col3:
- st.selectbox("Select split", splits, key="split")
- st.checkbox("Force recreate embeddings", key="force_recreate_embeddings")
-
- st.form_submit_button("Explore", on_click=_get_explorer)
-
-
-def query_form():
- """Sets up a form in Streamlit to initialize Explorer with dataset and model selection."""
- with st.form("query_form"):
- col1, col2 = st.columns([0.8, 0.2])
- with col1:
- st.text_input(
- "Query",
- "WHERE labels LIKE '%person%' AND labels LIKE '%dog%'",
- label_visibility="collapsed",
- key="query",
- )
- with col2:
- st.form_submit_button("Query", on_click=run_sql_query)
-
-
-def ai_query_form():
- """Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection."""
- with st.form("ai_query_form"):
- col1, col2 = st.columns([0.8, 0.2])
- with col1:
- st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query")
- with col2:
- st.form_submit_button("Ask AI", on_click=run_ai_query)
-
-
-def find_similar_imgs(imgs):
- """Initializes a Streamlit form for AI-based image querying with custom input."""
- exp = st.session_state["explorer"]
- similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow")
- paths = similar.to_pydict()["im_file"]
- st.session_state["imgs"] = paths
- st.session_state["res"] = similar
-
-
-def similarity_form(selected_imgs):
- """Initializes a form for AI-based image querying with custom input in Streamlit."""
- st.write("Similarity Search")
- with st.form("similarity_form"):
- subcol1, subcol2 = st.columns([1, 1])
- with subcol1:
- st.number_input(
- "limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit"
- )
-
- with subcol2:
- disabled = not len(selected_imgs)
- st.write("Selected: ", len(selected_imgs))
- st.form_submit_button(
- "Search",
- disabled=disabled,
- on_click=find_similar_imgs,
- args=(selected_imgs,),
- )
- if disabled:
- st.error("Select at least one image to search.")
-
-
-# def persist_reset_form():
-# with st.form("persist_reset"):
-# col1, col2 = st.columns([1, 1])
-# with col1:
-# st.form_submit_button("Reset", on_click=reset)
-#
-# with col2:
-# st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
-
-
-def run_sql_query():
- """Executes an SQL query and returns the results."""
- st.session_state["error"] = None
- query = st.session_state.get("query")
- if query.rstrip().lstrip():
- exp = st.session_state["explorer"]
- res = exp.sql_query(query, return_type="arrow")
- st.session_state["imgs"] = res.to_pydict()["im_file"]
- st.session_state["res"] = res
-
-
-def run_ai_query():
- """Execute SQL query and update session state with query results."""
- if not SETTINGS["openai_api_key"]:
- st.session_state["error"] = (
- 'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
- )
- return
- import pandas # scope for faster 'import ultralytics'
-
- st.session_state["error"] = None
- query = st.session_state.get("ai_query")
- if query.rstrip().lstrip():
- exp = st.session_state["explorer"]
- res = exp.ask_ai(query)
- if not isinstance(res, pandas.DataFrame) or res.empty:
- st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it."
- return
- st.session_state["imgs"] = res["im_file"].to_list()
- st.session_state["res"] = res
-
-
-def reset_explorer():
- """Resets the explorer to its initial state by clearing session variables."""
- st.session_state["explorer"] = None
- st.session_state["imgs"] = None
- st.session_state["error"] = None
-
-
-def utralytics_explorer_docs_callback():
- """Resets the explorer to its initial state by clearing session variables."""
- with st.container(border=True):
- st.image(
- "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg",
- width=100,
- )
- st.markdown(
- "
This demo is built using Ultralytics Explorer API. Visit API docs to try examples & learn more
",
- unsafe_allow_html=True,
- help=None,
- )
- st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/")
-
-
-def layout(data=None, model=None):
- """Resets explorer session variables and provides documentation with a link to API docs."""
- st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
- st.markdown("
Ultralytics Explorer Demo
", unsafe_allow_html=True)
-
- if st.session_state.get("explorer") is None:
- init_explorer_form(data, model)
- return
-
- st.button(":arrow_backward: Select Dataset", on_click=reset_explorer)
- exp = st.session_state.get("explorer")
- col1, col2 = st.columns([0.75, 0.25], gap="small")
- imgs = []
- if st.session_state.get("error"):
- st.error(st.session_state["error"])
- elif st.session_state.get("imgs"):
- imgs = st.session_state.get("imgs")
- else:
- imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"]
- st.session_state["res"] = exp.table.to_arrow()
- total_imgs, selected_imgs = len(imgs), []
- with col1:
- subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5)
- with subcol1:
- st.write("Max Images Displayed:")
- with subcol2:
- num = st.number_input(
- "Max Images Displayed",
- min_value=0,
- max_value=total_imgs,
- value=min(500, total_imgs),
- key="num_imgs_displayed",
- label_visibility="collapsed",
- )
- with subcol3:
- st.write("Start Index:")
- with subcol4:
- start_idx = st.number_input(
- "Start Index",
- min_value=0,
- max_value=total_imgs,
- value=0,
- key="start_index",
- label_visibility="collapsed",
- )
- with subcol5:
- reset = st.button("Reset", use_container_width=False, key="reset")
- if reset:
- st.session_state["imgs"] = None
- st.experimental_rerun()
-
- query_form()
- ai_query_form()
- if total_imgs:
- labels, boxes, masks, kpts, classes = None, None, None, None, None
- task = exp.model.task
- if st.session_state.get("display_labels"):
- labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num]
- boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num]
- masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num]
- kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num]
- classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num]
- imgs_displayed = imgs[start_idx : start_idx + num]
- selected_imgs = image_select(
- f"Total samples: {total_imgs}",
- images=imgs_displayed,
- use_container_width=False,
- # indices=[i for i in range(num)] if select_all else None,
- labels=labels,
- classes=classes,
- bboxes=boxes,
- masks=masks if task == "segment" else None,
- kpts=kpts if task == "pose" else None,
- )
-
- with col2:
- similarity_form(selected_imgs)
- st.checkbox("Labels", value=False, key="display_labels")
- utralytics_explorer_docs_callback()
-
-
-if __name__ == "__main__":
- kwargs = dict(zip(sys.argv[1::2], sys.argv[2::2]))
- layout(**kwargs)
diff --git a/ultralytics/data/explorer/utils.py b/ultralytics/data/explorer/utils.py
deleted file mode 100644
index 76f25572759..00000000000
--- a/ultralytics/data/explorer/utils.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-import getpass
-from typing import List
-
-import cv2
-import numpy as np
-
-from ultralytics.data.augment import LetterBox
-from ultralytics.utils import LOGGER as logger
-from ultralytics.utils import SETTINGS
-from ultralytics.utils.checks import check_requirements
-from ultralytics.utils.ops import xyxy2xywh
-from ultralytics.utils.plotting import plot_images
-
-
-def get_table_schema(vector_size):
- """Extracts and returns the schema of a database table."""
- from lancedb.pydantic import LanceModel, Vector
-
- class Schema(LanceModel):
- im_file: str
- labels: List[str]
- cls: List[int]
- bboxes: List[List[float]]
- masks: List[List[List[int]]]
- keypoints: List[List[List[float]]]
- vector: Vector(vector_size)
-
- return Schema
-
-
-def get_sim_index_schema():
- """Returns a LanceModel schema for a database table with specified vector size."""
- from lancedb.pydantic import LanceModel
-
- class Schema(LanceModel):
- idx: int
- im_file: str
- count: int
- sim_im_files: List[str]
-
- return Schema
-
-
-def sanitize_batch(batch, dataset_info):
- """Sanitizes input batch for inference, ensuring correct format and dimensions."""
- batch["cls"] = batch["cls"].flatten().int().tolist()
- box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1])
- batch["bboxes"] = [box for box, _ in box_cls_pair]
- batch["cls"] = [cls for _, cls in box_cls_pair]
- batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]]
- batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]]
- batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]]
- return batch
-
-
-def plot_query_result(similar_set, plot_labels=True):
- """
- Plot images from the similar set.
-
- Args:
- similar_set (list): Pyarrow or pandas object containing the similar data points
- plot_labels (bool): Whether to plot labels or not
- """
- import pandas # scope for faster 'import ultralytics'
-
- similar_set = (
- similar_set.to_dict(orient="list") if isinstance(similar_set, pandas.DataFrame) else similar_set.to_pydict()
- )
- empty_masks = [[[]]]
- empty_boxes = [[]]
- images = similar_set.get("im_file", [])
- bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else []
- masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else []
- kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else []
- cls = similar_set.get("cls", [])
-
- plot_size = 640
- imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], []
- for i, imf in enumerate(images):
- im = cv2.imread(imf)
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- h, w = im.shape[:2]
- r = min(plot_size / h, plot_size / w)
- imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1))
- if plot_labels:
- if len(bboxes) > i and len(bboxes[i]) > 0:
- box = np.array(bboxes[i], dtype=np.float32)
- box[:, [0, 2]] *= r
- box[:, [1, 3]] *= r
- plot_boxes.append(box)
- if len(masks) > i and len(masks[i]) > 0:
- mask = np.array(masks[i], dtype=np.uint8)[0]
- plot_masks.append(LetterBox(plot_size, center=False)(image=mask))
- if len(kpts) > i and kpts[i] is not None:
- kpt = np.array(kpts[i], dtype=np.float32)
- kpt[:, :, :2] *= r
- plot_kpts.append(kpt)
- batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i)
- imgs = np.stack(imgs, axis=0)
- masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8)
- kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32)
- boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32)
- batch_idx = np.concatenate(batch_idx, axis=0)
- cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0)
-
- return plot_images(
- imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False
- )
-
-
-def prompt_sql_query(query):
- """Plots images with optional labels from a similar data set."""
- check_requirements("openai>=1.6.1")
- from openai import OpenAI
-
- if not SETTINGS["openai_api_key"]:
- logger.warning("OpenAI API key not found in settings. Please enter your API key below.")
- openai_api_key = getpass.getpass("OpenAI API key: ")
- SETTINGS.update({"openai_api_key": openai_api_key})
- openai = OpenAI(api_key=SETTINGS["openai_api_key"])
-
- messages = [
- {
- "role": "system",
- "content": """
- You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on
- the following schema and a user request. You only need to output the format with fixed selection
- statement that selects everything from "'table'", like `SELECT * from 'table'`
-
- Schema:
- im_file: string not null
- labels: list not null
- child 0, item: string
- cls: list not null
- child 0, item: int64
- bboxes: list> not null
- child 0, item: list
- child 0, item: double
- masks: list>> not null
- child 0, item: list>
- child 0, item: list
- child 0, item: int64
- keypoints: list>> not null
- child 0, item: list>
- child 0, item: list
- child 0, item: double
- vector: fixed_size_list[256] not null
- child 0, item: float
-
- Some details about the schema:
- - the "labels" column contains the string values like 'person' and 'dog' for the respective objects
- in each image
- - the "cls" column contains the integer values on these classes that map them the labels
-
- Example of a correct query:
- request - Get all data points that contain 2 or more people and at least one dog
- correct query-
- SELECT * FROM 'table' WHERE ARRAY_LENGTH(cls) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1;
- """,
- },
- {"role": "user", "content": f"{query}"},
- ]
-
- response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
- return response.choices[0].message.content
diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py
index e91f2082c5e..3a04bb0383d 100644
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import glob
import math
@@ -18,11 +18,29 @@
from ultralytics.data.utils import FORMATS_HELP_MSG, IMG_FORMATS, VID_FORMATS
from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, ops
from ultralytics.utils.checks import check_requirements
+from ultralytics.utils.patches import imread
@dataclass
class SourceTypes:
- """Class to represent various types of input sources for predictions."""
+ """
+ Class to represent various types of input sources for predictions.
+
+ This class uses dataclass to define boolean flags for different types of input sources that can be used for
+ making predictions with YOLO models.
+
+ Attributes:
+ stream (bool): Flag indicating if the input source is a video stream.
+ screenshot (bool): Flag indicating if the input source is a screenshot.
+ from_img (bool): Flag indicating if the input source is an image file.
+
+ Examples:
+ >>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False)
+ >>> print(source_types.stream)
+ True
+ >>> print(source_types.from_img)
+ False
+ """
stream: bool = False
screenshot: bool = False
@@ -32,38 +50,47 @@ class SourceTypes:
class LoadStreams:
"""
- Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams.
+ Stream Loader for various types of video streams.
+
+ Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video
+ streams simultaneously, making it suitable for real-time video analysis tasks.
Attributes:
- sources (str): The source input paths or URLs for the video streams.
- vid_stride (int): Video frame-rate stride, defaults to 1.
- buffer (bool): Whether to buffer input streams, defaults to False.
+ sources (List[str]): The source input paths or URLs for the video streams.
+ vid_stride (int): Video frame-rate stride.
+ buffer (bool): Whether to buffer input streams.
running (bool): Flag to indicate if the streaming thread is running.
mode (str): Set to 'stream' indicating real-time capture.
- imgs (list): List of image frames for each stream.
- fps (list): List of FPS for each stream.
- frames (list): List of total frames for each stream.
- threads (list): List of threads for each stream.
- shape (list): List of shapes for each stream.
- caps (list): List of cv2.VideoCapture objects for each stream.
+ imgs (List[List[np.ndarray]]): List of image frames for each stream.
+ fps (List[float]): List of FPS for each stream.
+ frames (List[int]): List of total frames for each stream.
+ threads (List[Thread]): List of threads for each stream.
+ shape (List[Tuple[int, int, int]]): List of shapes for each stream.
+ caps (List[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
bs (int): Batch size for processing.
Methods:
- __init__: Initialize the stream loader.
update: Read stream frames in daemon thread.
close: Close stream loader and release resources.
__iter__: Returns an iterator object for the class.
__next__: Returns source paths, transformed, and original images for processing.
__len__: Return the length of the sources object.
- Example:
- ```bash
- yolo predict source='rtsp://example.com/media.mp4'
- ```
+ Examples:
+ >>> stream_loader = LoadStreams("rtsp://example.com/stream1.mp4")
+ >>> for sources, imgs, _ in stream_loader:
+ ... # Process the images
+ ... pass
+ >>> stream_loader.close()
+
+ Notes:
+ - The class uses threading to efficiently load frames from multiple streams simultaneously.
+ - It automatically handles YouTube links, converting them to the best available stream URL.
+ - The class implements a buffer system to manage frame storage and retrieval.
"""
def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
- """Initialize instance variables and check for consistent input stream shapes."""
+ """Initialize stream loader for multiple video sources, supporting various stream types."""
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
self.buffer = buffer # buffer input streams
self.running = True # running flag for Thread
@@ -114,7 +141,7 @@ def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
LOGGER.info("") # newline
def update(self, i, cap, stream):
- """Read stream `i` frames in daemon thread."""
+ """Read stream frames in daemon thread and update image buffer."""
n, f = 0, self.frames[i] # frame number, frame array
while self.running and cap.isOpened() and n < (f - 1):
if len(self.imgs[i]) < 30: # keep a <=30-image buffer
@@ -134,7 +161,7 @@ def update(self, i, cap, stream):
time.sleep(0.01) # wait until the buffer is empty
def close(self):
- """Close stream loader and release resources."""
+ """Terminates stream loader, stops threads, and releases video capture resources."""
self.running = False # stop flag for Thread
for thread in self.threads:
if thread.is_alive():
@@ -152,7 +179,7 @@ def __iter__(self):
return self
def __next__(self):
- """Returns source paths, transformed and original images for processing."""
+ """Returns the next batch of frames from multiple video streams for processing."""
self.count += 1
images = []
@@ -179,16 +206,16 @@ def __next__(self):
return self.sources, images, [""] * self.bs
def __len__(self):
- """Return the length of the sources object."""
+ """Return the number of video streams in the LoadStreams object."""
return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
class LoadScreenshots:
"""
- YOLOv8 screenshot dataloader.
+ Ultralytics screenshot dataloader for capturing and processing screen images.
- This class manages the loading of screenshot images for processing with YOLOv8.
- Suitable for use with `yolo predict source=screen`.
+ This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with
+ `yolo predict source=screen`.
Attributes:
source (str): The source input indicating which screen to capture.
@@ -201,15 +228,21 @@ class LoadScreenshots:
frame (int): Counter for captured frames.
sct (mss.mss): Screen capture object from `mss` library.
bs (int): Batch size, set to 1.
- monitor (dict): Monitor configuration details.
+ fps (int): Frames per second, set to 30.
+ monitor (Dict[str, int]): Monitor configuration details.
Methods:
__iter__: Returns an iterator object.
__next__: Captures the next screenshot and returns it.
+
+ Examples:
+ >>> loader = LoadScreenshots("0 100 100 640 480") # screen 0, top-left (100,100), 640x480
+ >>> for source, im, im0s, vid_cap, s in loader:
+ ... print(f"Captured frame: {im.shape}")
"""
def __init__(self, source):
- """Source = [screen_number left top width height] (pixels)."""
+ """Initialize screenshot capture with specified screen and region parameters."""
check_requirements("mss")
import mss # noqa
@@ -236,11 +269,11 @@ def __init__(self, source):
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
def __iter__(self):
- """Returns an iterator of the object."""
+ """Yields the next screenshot image from the specified screen or region for processing."""
return self
def __next__(self):
- """Screen capture with 'mss' to get raw pixels from the screen as np array."""
+ """Captures and returns the next screenshot as a numpy array using the mss library."""
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
@@ -250,29 +283,45 @@ def __next__(self):
class LoadImagesAndVideos:
"""
- YOLOv8 image/video dataloader.
+ A class for loading and processing images and videos for YOLO object detection.
- This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
- various formats, including single image files, video files, and lists of image and video paths.
+ This class manages the loading and pre-processing of image and video data from various sources, including
+ single image files, video files, and lists of image and video paths.
Attributes:
- files (list): List of image and video file paths.
+ files (List[str]): List of image and video file paths.
nf (int): Total number of files (images and videos).
- video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
+ video_flag (List[bool]): Flags indicating whether a file is a video (True) or an image (False).
mode (str): Current mode, 'image' or 'video'.
- vid_stride (int): Stride for video frame-rate, defaults to 1.
- bs (int): Batch size, set to 1 for this class.
+ vid_stride (int): Stride for video frame-rate.
+ bs (int): Batch size.
cap (cv2.VideoCapture): Video capture object for OpenCV.
frame (int): Frame counter for video.
frames (int): Total number of frames in the video.
- count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+ count (int): Counter for iteration, initialized at 0 during __iter__().
+ ni (int): Number of images.
Methods:
- _new_video(path): Create a new cv2.VideoCapture object for a given video path.
+ __init__: Initialize the LoadImagesAndVideos object.
+ __iter__: Returns an iterator object for VideoStream or ImageFolder.
+ __next__: Returns the next batch of images or video frames along with their paths and metadata.
+ _new_video: Creates a new video capture object for the given path.
+ __len__: Returns the number of batches in the object.
+
+ Examples:
+ >>> loader = LoadImagesAndVideos("path/to/data", batch=32, vid_stride=1)
+ >>> for paths, imgs, info in loader:
+ ... # Process batch of images or video frames
+ ... pass
+
+ Notes:
+ - Supports various image formats including HEIC.
+ - Handles both local files and directories.
+ - Can read from a text file containing paths to images and videos.
"""
def __init__(self, path, batch=1, vid_stride=1):
- """Initialize the Dataloader and raise FileNotFoundError if file not found."""
+ """Initialize dataloader for images and videos, supporting various input formats."""
parent = None
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
parent = Path(path).parent
@@ -305,7 +354,7 @@ def __init__(self, path, batch=1, vid_stride=1):
self.nf = ni + nv # number of files
self.ni = ni # number of images
self.video_flag = [False] * ni + [True] * nv
- self.mode = "image"
+ self.mode = "video" if ni == 0 else "image" # default to video if no images
self.vid_stride = vid_stride # video frame-rate stride
self.bs = batch
if any(videos):
@@ -316,12 +365,12 @@ def __init__(self, path, batch=1, vid_stride=1):
raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}")
def __iter__(self):
- """Returns an iterator object for VideoStream or ImageFolder."""
+ """Iterates through image/video files, yielding source paths, images, and metadata."""
self.count = 0
return self
def __next__(self):
- """Returns the next batch of images or video frames along with their paths and metadata."""
+ """Returns the next batch of images or video frames with their paths and metadata."""
paths, imgs, info = [], [], []
while len(imgs) < self.bs:
if self.count >= self.nf: # end of file list
@@ -336,6 +385,7 @@ def __next__(self):
if not self.cap or not self.cap.isOpened():
self._new_video(path)
+ success = False
for _ in range(self.vid_stride):
success = self.cap.grab()
if not success:
@@ -359,8 +409,19 @@ def __next__(self):
if self.count < self.nf:
self._new_video(self.files[self.count])
else:
+ # Handle image files (including HEIC)
self.mode = "image"
- im0 = cv2.imread(path) # BGR
+ if path.split(".")[-1].lower() == "heic":
+ # Load HEIC image using Pillow with pillow-heif
+ check_requirements("pillow-heif")
+
+ from pillow_heif import register_heif_opener
+
+ register_heif_opener() # Register HEIF opener with Pillow
+ with Image.open(path) as img:
+ im0 = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # convert image to BGR nparray
+ else:
+ im0 = imread(path) # BGR
if im0 is None:
LOGGER.warning(f"WARNING โ ๏ธ Image Read Error {path}")
else:
@@ -374,7 +435,7 @@ def __next__(self):
return paths, imgs, info
def _new_video(self, path):
- """Creates a new video capture object for the given path."""
+ """Creates a new video capture object for the given path and initializes video-related attributes."""
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
@@ -383,40 +444,50 @@ def _new_video(self, path):
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
def __len__(self):
- """Returns the number of batches in the object."""
- return math.ceil(self.nf / self.bs) # number of files
+ """Returns the number of files (images and videos) in the dataset."""
+ return math.ceil(self.nf / self.bs) # number of batches
class LoadPilAndNumpy:
"""
Load images from PIL and Numpy arrays for batch processing.
- This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
- It performs basic validation and format conversion to ensure that the images are in the required format for
- downstream processing.
+ This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic
+ validation and format conversion to ensure that the images are in the required format for downstream processing.
Attributes:
- paths (list): List of image paths or autogenerated filenames.
- im0 (list): List of images stored as Numpy arrays.
- mode (str): Type of data being processed, defaults to 'image'.
+ paths (List[str]): List of image paths or autogenerated filenames.
+ im0 (List[np.ndarray]): List of images stored as Numpy arrays.
+ mode (str): Type of data being processed, set to 'image'.
bs (int): Batch size, equivalent to the length of `im0`.
Methods:
- _single_check(im): Validate and format a single image to a Numpy array.
+ _single_check: Validate and format a single image to a Numpy array.
+
+ Examples:
+ >>> from PIL import Image
+ >>> import numpy as np
+ >>> pil_img = Image.new("RGB", (100, 100))
+ >>> np_img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
+ >>> loader = LoadPilAndNumpy([pil_img, np_img])
+ >>> paths, images, _ = next(iter(loader))
+ >>> print(f"Loaded {len(images)} images")
+ Loaded 2 images
"""
def __init__(self, im0):
- """Initialize PIL and Numpy Dataloader."""
+ """Initializes a loader for PIL and Numpy images, converting inputs to a standardized format."""
if not isinstance(im0, list):
im0 = [im0]
- self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
+ # use `image{i}.jpg` when Image.filename returns an empty path.
+ self.paths = [getattr(im, "filename", "") or f"image{i}.jpg" for i, im in enumerate(im0)]
self.im0 = [self._single_check(im) for im in im0]
self.mode = "image"
self.bs = len(self.im0)
@staticmethod
def _single_check(im):
- """Validate and format an image to numpy array."""
+ """Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
if isinstance(im, Image.Image):
if im.mode != "RGB":
@@ -426,41 +497,48 @@ def _single_check(im):
return im
def __len__(self):
- """Returns the length of the 'im0' attribute."""
+ """Returns the length of the 'im0' attribute, representing the number of loaded images."""
return len(self.im0)
def __next__(self):
- """Returns batch paths, images, processed images, None, ''."""
+ """Returns the next batch of images, paths, and metadata for processing."""
if self.count == 1: # loop only once as it's batch inference
raise StopIteration
self.count += 1
return self.paths, self.im0, [""] * self.bs
def __iter__(self):
- """Enables iteration for class LoadPilAndNumpy."""
+ """Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
self.count = 0
return self
class LoadTensor:
"""
- Load images from torch.Tensor data.
+ A class for loading and processing tensor data for object detection tasks.
- This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
+ This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for
+ further processing in object detection pipelines.
Attributes:
- im0 (torch.Tensor): The input tensor containing the image(s).
+ im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
bs (int): Batch size, inferred from the shape of `im0`.
- mode (str): Current mode, set to 'image'.
- paths (list): List of image paths or filenames.
- count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+ mode (str): Current processing mode, set to 'image'.
+ paths (List[str]): List of image paths or auto-generated filenames.
Methods:
- _single_check(im, stride): Validate and possibly modify the input tensor.
+ _single_check: Validates and formats an input tensor.
+
+ Examples:
+ >>> import torch
+ >>> tensor = torch.rand(1, 3, 640, 640)
+ >>> loader = LoadTensor(tensor)
+ >>> paths, images, info = next(iter(loader))
+ >>> print(f"Processed {len(images)} images")
"""
def __init__(self, im0) -> None:
- """Initialize Tensor Dataloader."""
+ """Initialize LoadTensor object for processing torch.Tensor image data."""
self.im0 = self._single_check(im0)
self.bs = self.im0.shape[0]
self.mode = "image"
@@ -468,7 +546,7 @@ def __init__(self, im0) -> None:
@staticmethod
def _single_check(im, stride=32):
- """Validate and format an image to torch.Tensor."""
+ """Validates and formats a single image tensor, ensuring correct shape and normalization."""
s = (
f"WARNING โ ๏ธ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
@@ -490,24 +568,24 @@ def _single_check(im, stride=32):
return im
def __iter__(self):
- """Returns an iterator object."""
+ """Yields an iterator object for iterating through tensor image data."""
self.count = 0
return self
def __next__(self):
- """Return next item in the iterator."""
+ """Yields the next batch of tensor images and metadata for processing."""
if self.count == 1:
raise StopIteration
self.count += 1
return self.paths, self.im0, [""] * self.bs
def __len__(self):
- """Returns the batch size."""
+ """Returns the batch size of the tensor input."""
return self.bs
def autocast_list(source):
- """Merges a list of source of different types into a list of numpy arrays or PIL images."""
+ """Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
files = []
for im in source:
if isinstance(im, (str, Path)): # filename or uri
@@ -527,21 +605,24 @@ def get_best_youtube_url(url, method="pytube"):
"""
Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
- This function uses the specified method to extract the video info from YouTube. It supports the following methods:
- - "pytube": Uses the pytube library to fetch the video streams.
- - "pafy": Uses the pafy library to fetch the video streams.
- - "yt-dlp": Uses the yt-dlp library to fetch the video streams.
-
- The function then finds the highest quality MP4 format that has a video codec but no audio codec, and returns the
- URL of this video stream.
-
Args:
url (str): The URL of the YouTube video.
- method (str): The method to use for extracting video info. Default is "pytube". Other options are "pafy" and
- "yt-dlp".
+ method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp".
+ Defaults to "pytube".
Returns:
- (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
+ (str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
+
+ Examples:
+ >>> url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+ >>> best_url = get_best_youtube_url(url)
+ >>> print(best_url)
+ https://rr4---sn-q4flrnek.googlevideo.com/videoplayback?expire=...
+
+ Notes:
+ - Requires additional libraries based on the chosen method: pytubefix, pafy, or yt-dlp.
+ - The function prioritizes streams with at least 1080p resolution when available.
+ - For the "yt-dlp" method, it looks for formats with video codec, no audio, and *.mp4 extension.
"""
if method == "pytube":
# Switched from pytube to pytubefix to resolve https://github.com/pytube/pytube/issues/1954
diff --git a/ultralytics/data/scripts/download_weights.sh b/ultralytics/data/scripts/download_weights.sh
index 87db31fe1e6..f8a739f6d61 100755
--- a/ultralytics/data/scripts/download_weights.sh
+++ b/ultralytics/data/scripts/download_weights.sh
@@ -11,8 +11,8 @@
python - <>> label_map = {0: "cat", 1: "dog", 2: "bird"} # It should include all annotated classes details
+ >>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map)
+ """
+ import matplotlib.pyplot as plt
+
+ from ultralytics.utils.plotting import colors
+
+ img = np.array(Image.open(image_path))
+ img_height, img_width = img.shape[:2]
+ annotations = []
+ with open(txt_path) as file:
+ for line in file:
+ class_id, x_center, y_center, width, height = map(float, line.split())
+ x = (x_center - width / 2) * img_width
+ y = (y_center - height / 2) * img_height
+ w = width * img_width
+ h = height * img_height
+ annotations.append((x, y, w, h, int(class_id)))
+ fig, ax = plt.subplots(1) # Plot the image and annotations
+ for x, y, w, h, label in annotations:
+ color = tuple(c / 255 for c in colors(label, True)) # Get and normalize the RGB color
+ rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color, facecolor="none") # Create a rectangle
+ ax.add_patch(rect)
+ luminance = 0.2126 * color[0] + 0.7152 * color[1] + 0.0722 * color[2] # Formula for luminance
+ ax.text(x, y - 5, label_map[label], color="white" if luminance < 0.5 else "black", backgroundcolor=color)
+ ax.imshow(img)
+ plt.show()
+
+
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
"""
Convert a list of polygons to a binary mask of the specified image size.
@@ -216,7 +264,7 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
ms = []
for si in range(len(segments)):
mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1)
- ms.append(mask)
+ ms.append(mask.astype(masks.dtype))
areas.append(mask.sum())
areas = np.asarray(areas)
index = np.argsort(-areas)
@@ -401,7 +449,7 @@ def check_cls_dataset(dataset, split=""):
# Print to console
for k, v in {"train": train_set, "val": val_set, "test": test_set}.items():
- prefix = f'{colorstr(f"{k}:")} {v}...'
+ prefix = f"{colorstr(f'{k}:')} {v}..."
if v is None:
LOGGER.info(prefix)
else:
@@ -535,12 +583,12 @@ def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
path = Path(path).resolve()
LOGGER.info(f"Starting HUB dataset checks for {path}....")
- self.task = task # detect, segment, pose, classify
+ self.task = task # detect, segment, pose, classify, obb
if self.task == "classify":
unzip_dir = unzip_file(path)
data = check_cls_dataset(unzip_dir)
data["path"] = unzip_dir
- else: # detect, segment, pose
+ else: # detect, segment, pose, obb
_, data_dir, yaml_path = self._unzip(Path(path))
try:
# Load YAML with checks
@@ -552,7 +600,7 @@ def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
except Exception as e:
raise Exception("error/HUB/dataset_stats/init") from e
- self.hub_dir = Path(f'{data["path"]}-hub')
+ self.hub_dir = Path(f"{data['path']}-hub")
self.im_dir = self.hub_dir / "images"
self.stats = {"nc": len(data["names"]), "names": list(data["names"].values())} # statistics dictionary
self.data = data
@@ -564,7 +612,7 @@ def _unzip(path):
return False, None, path
unzip_dir = unzip_file(path, path=path.parent)
assert unzip_dir.is_dir(), (
- f"Error unzipping {path}, {unzip_dir} not found. " f"path/to/abc.zip MUST unzip to path/to/abc/"
+ f"Error unzipping {path}, {unzip_dir} not found. path/to/abc.zip MUST unzip to path/to/abc/"
)
return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path
@@ -602,7 +650,7 @@ def _round(labels):
# Get dataset statistics
if self.task == "classify":
- from torchvision.datasets import ImageFolder
+ from torchvision.datasets import ImageFolder # scope for faster 'import ultralytics'
dataset = ImageFolder(self.data[split])
diff --git a/ultralytics/engine/__init__.py b/ultralytics/engine/__init__.py
index 9e68dc12245..77a19dcf0f8 100644
--- a/ultralytics/engine/__init__.py
+++ b/ultralytics/engine/__init__.py
@@ -1 +1 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index 313cb7d076b..75c5d3d1337 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -1,52 +1,57 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
-Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit.
+Export a YOLO PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit.
Format | `format=argument` | Model
--- | --- | ---
-PyTorch | - | yolov8n.pt
-TorchScript | `torchscript` | yolov8n.torchscript
-ONNX | `onnx` | yolov8n.onnx
-OpenVINO | `openvino` | yolov8n_openvino_model/
-TensorRT | `engine` | yolov8n.engine
-CoreML | `coreml` | yolov8n.mlpackage
-TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/
-TensorFlow GraphDef | `pb` | yolov8n.pb
-TensorFlow Lite | `tflite` | yolov8n.tflite
-TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite
-TensorFlow.js | `tfjs` | yolov8n_web_model/
-PaddlePaddle | `paddle` | yolov8n_paddle_model/
-NCNN | `ncnn` | yolov8n_ncnn_model/
+PyTorch | - | yolo11n.pt
+TorchScript | `torchscript` | yolo11n.torchscript
+ONNX | `onnx` | yolo11n.onnx
+OpenVINO | `openvino` | yolo11n_openvino_model/
+TensorRT | `engine` | yolo11n.engine
+CoreML | `coreml` | yolo11n.mlpackage
+TensorFlow SavedModel | `saved_model` | yolo11n_saved_model/
+TensorFlow GraphDef | `pb` | yolo11n.pb
+TensorFlow Lite | `tflite` | yolo11n.tflite
+TensorFlow Edge TPU | `edgetpu` | yolo11n_edgetpu.tflite
+TensorFlow.js | `tfjs` | yolo11n_web_model/
+PaddlePaddle | `paddle` | yolo11n_paddle_model/
+MNN | `mnn` | yolo11n.mnn
+NCNN | `ncnn` | yolo11n_ncnn_model/
+IMX | `imx` | yolo11n_imx_model/
+RKNN | `rknn` | yolo11n_rknn_model/
Requirements:
$ pip install "ultralytics[export]"
Python:
from ultralytics import YOLO
- model = YOLO('yolov8n.pt')
+ model = YOLO('yolo11n.pt')
results = model.export(format='onnx')
CLI:
- $ yolo mode=export model=yolov8n.pt format=onnx
+ $ yolo mode=export model=yolo11n.pt format=onnx
Inference:
- $ yolo predict model=yolov8n.pt # PyTorch
- yolov8n.torchscript # TorchScript
- yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
- yolov8n_openvino_model # OpenVINO
- yolov8n.engine # TensorRT
- yolov8n.mlpackage # CoreML (macOS-only)
- yolov8n_saved_model # TensorFlow SavedModel
- yolov8n.pb # TensorFlow GraphDef
- yolov8n.tflite # TensorFlow Lite
- yolov8n_edgetpu.tflite # TensorFlow Edge TPU
- yolov8n_paddle_model # PaddlePaddle
- yolov8n_ncnn_model # NCNN
+ $ yolo predict model=yolo11n.pt # PyTorch
+ yolo11n.torchscript # TorchScript
+ yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
+ yolo11n_openvino_model # OpenVINO
+ yolo11n.engine # TensorRT
+ yolo11n.mlpackage # CoreML (macOS-only)
+ yolo11n_saved_model # TensorFlow SavedModel
+ yolo11n.pb # TensorFlow GraphDef
+ yolo11n.tflite # TensorFlow Lite
+ yolo11n_edgetpu.tflite # TensorFlow Edge TPU
+ yolo11n_paddle_model # PaddlePaddle
+ yolo11n.mnn # MNN
+ yolo11n_ncnn_model # NCNN
+ yolo11n_imx_model # IMX
TensorFlow.js:
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
$ npm install
- $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model
+ $ ln -s ../../yolo11n_web_model public/yolo11n_web_model
$ npm start
"""
@@ -72,15 +77,17 @@
from ultralytics.nn.autobackend import check_class_names, default_class_names
from ultralytics.nn.modules import C2f, Detect, Pose, Segment, RTDETRDecoder
from ultralytics.nn.modules import Regress6
-from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel
+from ultralytics.nn.tasks import ClassificationModel, DetectionModel, SegmentationModel, WorldModel
from ultralytics.utils import (
ARM64,
DEFAULT_CFG,
+ IS_COLAB,
IS_JETSON,
LINUX,
LOGGER,
MACOS,
PYTHON_VERSION,
+ RKNN_CHIPS,
ROOT,
WINDOWS,
__version__,
@@ -89,31 +96,64 @@
get_default_args,
yaml_save,
)
-from ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requirements, check_version
+from ultralytics.utils.checks import (
+ check_imgsz,
+ check_is_path_safe,
+ check_requirements,
+ check_version,
+ is_sudo_available,
+)
from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download
from ultralytics.utils.files import file_size, spaces_in_path
-from ultralytics.utils.ops import Profile
-from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device, smart_inference_mode
+from ultralytics.utils.ops import Profile, nms_rotated, xywh2xyxy
+from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device
def export_formats():
"""Ultralytics YOLO export formats."""
x = [
- ["PyTorch", "-", ".pt", True, True],
- ["TorchScript", "torchscript", ".torchscript", True, True],
- ["ONNX", "onnx", ".onnx", True, True],
- ["OpenVINO", "openvino", "_openvino_model", True, False],
- ["TensorRT", "engine", ".engine", False, True],
- ["CoreML", "coreml", ".mlpackage", True, False],
- ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True],
- ["TensorFlow GraphDef", "pb", ".pb", True, True],
- ["TensorFlow Lite", "tflite", ".tflite", True, False],
- ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False],
- ["TensorFlow.js", "tfjs", "_web_model", True, False],
- ["PaddlePaddle", "paddle", "_paddle_model", True, True],
- ["NCNN", "ncnn", "_ncnn_model", True, True],
+ ["PyTorch", "-", ".pt", True, True, []],
+ ["TorchScript", "torchscript", ".torchscript", True, True, ["batch", "optimize", "nms"]],
+ ["ONNX", "onnx", ".onnx", True, True, ["batch", "dynamic", "half", "opset", "simplify", "nms"]],
+ ["OpenVINO", "openvino", "_openvino_model", True, False, ["batch", "dynamic", "half", "int8", "nms"]],
+ ["TensorRT", "engine", ".engine", False, True, ["batch", "dynamic", "half", "int8", "simplify", "nms"]],
+ ["CoreML", "coreml", ".mlpackage", True, False, ["batch", "half", "int8", "nms"]],
+ ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True, ["batch", "int8", "keras", "nms"]],
+ ["TensorFlow GraphDef", "pb", ".pb", True, True, ["batch"]],
+ ["TensorFlow Lite", "tflite", ".tflite", True, False, ["batch", "half", "int8", "nms"]],
+ ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False, []],
+ ["TensorFlow.js", "tfjs", "_web_model", True, False, ["batch", "half", "int8", "nms"]],
+ ["PaddlePaddle", "paddle", "_paddle_model", True, True, ["batch"]],
+ ["MNN", "mnn", ".mnn", True, True, ["batch", "half", "int8"]],
+ ["NCNN", "ncnn", "_ncnn_model", True, True, ["batch", "half"]],
+ ["IMX", "imx", "_imx_model", True, True, ["int8"]],
+ ["RKNN", "rknn", "_rknn_model", False, False, ["batch", "name"]],
]
- return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x)))
+ return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU", "Arguments"], zip(*x)))
+
+
+def validate_args(format, passed_args, valid_args):
+ """
+ Validates arguments based on format.
+
+ Args:
+ format (str): The export format.
+ passed_args (Namespace): The arguments used during export.
+ valid_args (dict): List of valid arguments for the format.
+
+ Raises:
+ AssertionError: If an argument that's not supported by the export format is used, or if format doesn't have the supported arguments listed.
+ """
+ # Only check valid usage of these args
+ export_args = ["half", "int8", "dynamic", "keras", "nms", "batch"]
+
+ assert valid_args is not None, f"ERROR โ๏ธ valid arguments for '{format}' not listed."
+ custom = {"batch": 1, "data": None, "device": None} # exporter defaults
+ default_args = get_cfg(DEFAULT_CFG, custom)
+ for arg in export_args:
+ not_default = getattr(passed_args, arg, None) != getattr(default_args, arg, None)
+ if not_default:
+ assert arg in valid_args, f"ERROR โ๏ธ argument '{arg}' is not supported for format='{format}'"
def gd_outputs(gd):
@@ -126,7 +166,7 @@ def gd_outputs(gd):
def try_export(inner_func):
- """YOLOv8 export decorator, i.e. @try_export."""
+ """YOLO export decorator, i.e. @try_export."""
inner_args = get_default_args(inner_func)
def outer_func(*args, **kwargs):
@@ -169,7 +209,6 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
self.callbacks = _callbacks or callbacks.get_default_callbacks()
callbacks.add_integration_callbacks(self)
- @smart_inference_mode()
def __call__(self, model=None) -> str:
"""Returns list of exported files/dirs after running callbacks."""
self.run_callbacks("on_export_start")
@@ -179,20 +218,43 @@ def __call__(self, model=None) -> str:
fmt = "engine"
if fmt in {"mlmodel", "mlpackage", "mlprogram", "apple", "ios", "coreml"}: # 'coreml' aliases
fmt = "coreml"
- fmts = tuple(export_formats()["Argument"][1:]) # available export formats
+ fmts_dict = export_formats()
+ fmts = tuple(fmts_dict["Argument"][1:]) # available export formats
+ if fmt not in fmts:
+ import difflib
+
+ # Get the closest match if format is invalid
+ matches = difflib.get_close_matches(fmt, fmts, n=1, cutoff=0.6) # 60% similarity required to match
+ if not matches:
+ raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
+ LOGGER.warning(f"WARNING โ ๏ธ Invalid export format='{fmt}', updating to format='{matches[0]}'")
+ fmt = matches[0]
flags = [x == fmt for x in fmts]
if sum(flags) != 1:
raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
- jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn = flags # export booleans
+ (jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, mnn, ncnn, imx, rknn) = (
+ flags # export booleans
+ )
+
is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))
# Device
+ dla = None
if fmt == "engine" and self.args.device is None:
LOGGER.warning("WARNING โ ๏ธ TensorRT requires GPU export, automatically assigning device=0")
self.args.device = "0"
+ if fmt == "engine" and "dla" in str(self.args.device): # convert int/list to str first
+ dla = self.args.device.split(":")[-1]
+ self.args.device = "0" # update device to "0"
+ assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
self.device = select_device("cpu" if self.args.device is None else self.args.device)
- # Checks
+ # Argument compatibility checks
+ fmt_keys = fmts_dict["Arguments"][flags.index(True) + 1]
+ validate_args(fmt, self.args, fmt_keys)
+ if imx and not self.args.int8:
+ LOGGER.warning("WARNING โ ๏ธ IMX only supports int8 export, setting int8=True.")
+ self.args.int8 = True
if not hasattr(model, "names"):
model.names = default_class_names()
model.names = check_class_names(model.names)
@@ -209,6 +271,24 @@ def __call__(self, model=None) -> str:
if self.args.optimize:
assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False"
assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'"
+ if rknn:
+ if not self.args.name:
+ LOGGER.warning(
+ "WARNING โ ๏ธ Rockchip RKNN export requires a missing 'name' arg for processor type. Using default name='rk3588'."
+ )
+ self.args.name = "rk3588"
+ self.args.name = self.args.name.lower()
+ assert self.args.name in RKNN_CHIPS, (
+ f"Invalid processor name '{self.args.name}' for Rockchip RKNN export. Valid names are {RKNN_CHIPS}."
+ )
+ if self.args.int8 and tflite:
+ assert not getattr(model, "end2end", False), "TFLite INT8 export not supported for end2end models."
+ if self.args.nms:
+ assert not isinstance(model, ClassificationModel), "'nms=True' is not valid for classification models."
+ if getattr(model, "end2end", False):
+ LOGGER.warning("WARNING โ ๏ธ 'nms=True' is not available for end2end models. Forcing 'nms=False'.")
+ self.args.nms = False
+ self.args.conf = self.args.conf or 0.25 # set conf default value for nms export
if edgetpu:
if not LINUX:
raise SystemError("Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler")
@@ -230,12 +310,14 @@ def __call__(self, model=None) -> str:
"(torchscript, onnx, openvino, engine, coreml) formats. "
"See https://docs.ultralytics.com/models/yolo-world for details."
)
+ model.clip_model = None # openvino int8 export error: https://github.com/ultralytics/ultralytics/pull/18445
if self.args.int8 and not self.args.data:
self.args.data = DEFAULT_CFG.data or TASK2DATA[getattr(model, "task", "detect")] # assign default data
LOGGER.warning(
"WARNING โ ๏ธ INT8 export requires a missing 'data' arg for calibration. "
f"Using default 'data={self.args.data}'."
)
+
# Input
im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
file = Path(
@@ -251,7 +333,14 @@ def __call__(self, model=None) -> str:
model.eval()
model.float()
model = model.fuse()
+
+ if imx:
+ from ultralytics.utils.torch_utils import FXModel
+
+ model = FXModel(model)
for m in model.modules():
+ if isinstance(m, Classify):
+ m.export = True
if isinstance(m, (Detect, RTDETRDecoder)): # includes all Detect subclasses like Segment, Pose, OBB
m.dynamic = self.args.dynamic
m.export = True
@@ -270,9 +359,19 @@ def __call__(self, model=None) -> str:
elif isinstance(m, Regress6):
m.export = True
+ if isinstance(m, Detect) and imx:
+ from ultralytics.utils.tal import make_anchors
+
+ m.anchors, m.strides = (
+ x.transpose(0, 1)
+ for x in make_anchors(
+ torch.cat([s / m.stride.unsqueeze(-1) for s in self.imgsz], dim=1), m.stride, 0.5
+ )
+ )
+
y = None
- for _ in range(2):
- y = model(im) # dry runs
+ for _ in range(2): # dry runs
+ y = NMSModel(model, self.args)(im) if self.args.nms and not coreml else model(im)
if self.args.half and onnx and self.device.type != "cpu":
im, model = im.half(), model.half() # to FP16
@@ -292,7 +391,7 @@ def __call__(self, model=None) -> str:
)
self.pretty_name = Path(self.model.yaml.get("yaml_file", self.file)).stem.replace("yolo", "YOLO")
data = model.args["data"] if hasattr(model, "args") and isinstance(model.args, dict) else ""
- description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}'
+ description = f"Ultralytics {self.pretty_name} model {f'trained on {data}' if data else ''}"
self.metadata = {
"description": description,
"author": "Ultralytics",
@@ -305,6 +404,7 @@ def __call__(self, model=None) -> str:
"batch": self.args.batch,
"imgsz": self.imgsz,
"names": model.names,
+ "args": {k: v for k, v in self.args if k in fmt_keys},
} # model metadata
if model.task == "pose":
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
@@ -314,7 +414,7 @@ def __call__(self, model=None) -> str:
LOGGER.info(
f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
- f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)'
+ f"output shape(s) {self.output_shape} ({file_size(file):.1f} MB)"
)
# Exports
@@ -322,7 +422,7 @@ def __call__(self, model=None) -> str:
if jit or ncnn: # TorchScript
f[0], _ = self.export_torchscript()
if engine: # TensorRT required before ONNX
- f[1], _ = self.export_engine()
+ f[1], _ = self.export_engine(dla=dla)
if onnx: # ONNX
f[2], _ = self.export_onnx()
if xml: # OpenVINO
@@ -342,8 +442,14 @@ def __call__(self, model=None) -> str:
f[9], _ = self.export_tfjs()
if paddle: # PaddlePaddle
f[10], _ = self.export_paddle()
+ if mnn: # MNN
+ f[11], _ = self.export_mnn()
if ncnn: # NCNN
- f[11], _ = self.export_ncnn()
+ f[12], _ = self.export_ncnn()
+ if imx:
+ f[13], _ = self.export_imx()
+ if rknn:
+ f[14], _ = self.export_rknn()
# Finish
f = [str(x) for x in f if x] # filter out '' and None
@@ -360,11 +466,11 @@ def __call__(self, model=None) -> str:
predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else ""
q = "int8" if self.args.int8 else "half" if self.args.half else "" # quantization
LOGGER.info(
- f'\nExport complete ({time.time() - t:.1f}s)'
+ f"\nExport complete ({time.time() - t:.1f}s)"
f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
- f'\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}'
- f'\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}'
- f'\nVisualize: https://netron.app'
+ f"\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}"
+ f"\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}"
+ f"\nVisualize: https://netron.app"
)
self.run_callbacks("on_export_end")
@@ -391,17 +497,21 @@ def get_int8_calibration_dataloader(self, prefix=""):
batch_size=batch,
)
n = len(dataset)
- if n < 300:
+ if n < self.args.batch:
+ raise ValueError(
+ f"The calibration dataset ({n} images) must have at least as many images as the batch size ('batch={self.args.batch}')."
+ )
+ elif n < 300:
LOGGER.warning(f"{prefix} WARNING โ ๏ธ >300 images recommended for INT8 calibration, found {n} images.")
return build_dataloader(dataset, batch=batch, workers=0) # required for batch loading
@try_export
def export_torchscript(self, prefix=colorstr("TorchScript:")):
- """YOLOv8 TorchScript model export."""
+ """YOLO TorchScript model export."""
LOGGER.info(f"\n{prefix} starting export with torch {torch.__version__}...")
f = self.file.with_suffix(".torchscript")
- ts = torch.jit.trace(self.model, self.im, strict=False)
+ ts = torch.jit.trace(NMSModel(self.model, self.args) if self.args.nms else self.model, self.im, strict=False)
extra_files = {"config.txt": json.dumps(self.metadata)} # torch._C.ExtraFilesMap()
if self.args.optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
LOGGER.info(f"{prefix} optimizing for mobile...")
@@ -414,29 +524,39 @@ def export_torchscript(self, prefix=colorstr("TorchScript:")):
@try_export
def export_onnx(self, prefix=colorstr("ONNX:")):
- """YOLOv8 ONNX export."""
+ """YOLO ONNX export."""
requirements = ["onnx>=1.12.0"]
if self.args.simplify:
- requirements += ["onnxslim==0.1.34", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
+ requirements += ["onnxslim", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
check_requirements(requirements)
import onnx # noqa
opset_version = self.args.opset or get_latest_opset()
LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...")
f = str(self.file.with_suffix(".onnx"))
-
output_names = ["output0", "output1"] if isinstance(self.model, SegmentationModel) else ["output0"]
dynamic = self.args.dynamic
if dynamic:
+ self.model.cpu() # dynamic=True only compatible with cpu
dynamic = {"images": {0: "batch", 2: "height", 3: "width"}} # shape(1,3,640,640)
if isinstance(self.model, SegmentationModel):
dynamic["output0"] = {0: "batch", 2: "anchors"} # shape(1, 116, 8400)
dynamic["output1"] = {0: "batch", 2: "mask_height", 3: "mask_width"} # shape(1,32,160,160)
elif isinstance(self.model, DetectionModel):
dynamic["output0"] = {0: "batch", 2: "anchors"} # shape(1, 84, 8400)
+ if self.args.nms: # only batch size is dynamic with NMS
+ dynamic["output0"].pop(2)
+ if self.args.nms and self.model.task == "obb":
+ self.args.opset = opset_version # for NMSModel
+ # OBB error https://github.com/pytorch/pytorch/issues/110859#issuecomment-1757841865
+ try:
+ torch.onnx.register_custom_op_symbolic("aten::lift_fresh", lambda g, x: x, opset_version)
+ except RuntimeError: # it will fail if it's already registered
+ pass
+ check_requirements("onnxslim>=0.1.46") # Older versions has bug with OBB
torch.onnx.export(
- self.model.cpu() if dynamic else self.model, # dynamic=True only compatible with cpu
+ NMSModel(self.model, self.args) if self.args.nms else self.model,
self.im.cpu() if dynamic else self.im,
f,
verbose=False,
@@ -471,21 +591,21 @@ def export_onnx(self, prefix=colorstr("ONNX:")):
@try_export
def export_openvino(self, prefix=colorstr("OpenVINO:")):
- """YOLOv8 OpenVINO export."""
- check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}') # fix OpenVINO issue on ARM64
+ """YOLO OpenVINO export."""
+ check_requirements("openvino>=2024.5.0")
import openvino as ov
LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
assert TORCH_1_13, f"OpenVINO export requires torch>=1.13.0 but torch=={torch.__version__} is installed"
ov_model = ov.convert_model(
- self.model,
+ NMSModel(self.model, self.args) if self.args.nms else self.model,
input=None if self.args.dynamic else [self.im.shape],
example_input=self.im,
)
def serialize(ov_model, file):
"""Set RT info, serialize and save metadata YAML."""
- ov_model.set_rt_info("YOLOv8", ["model_info", "model_type"])
+ ov_model.set_rt_info("YOLO", ["model_info", "model_type"])
ov_model.set_rt_info(True, ["model_info", "reverse_input_channels"])
ov_model.set_rt_info(114, ["model_info", "pad_value"])
ov_model.set_rt_info([255.0], ["model_info", "scale_values"])
@@ -501,7 +621,7 @@ def serialize(ov_model, file):
if self.args.int8:
fq = str(self.file).replace(self.file.suffix, f"_int8_openvino_model{os.sep}")
fq_ov = str(Path(fq) / self.file.with_suffix(".xml").name)
- check_requirements("nncf>=2.8.0")
+ check_requirements("nncf>=2.14.0")
import nncf
def transform_fn(data_item) -> np.ndarray:
@@ -544,8 +664,8 @@ def transform_fn(data_item) -> np.ndarray:
@try_export
def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
- """YOLOv8 Paddle export."""
- check_requirements(("paddlepaddle", "x2paddle"))
+ """YOLO Paddle export."""
+ check_requirements(("paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle", "x2paddle"))
import x2paddle # noqa
from x2paddle.convert import pytorch2paddle # noqa
@@ -556,9 +676,34 @@ def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
return f, None
+ @try_export
+ def export_mnn(self, prefix=colorstr("MNN:")):
+ """YOLOv8 MNN export using MNN https://github.com/alibaba/MNN."""
+ f_onnx, _ = self.export_onnx() # get onnx model first
+
+ check_requirements("MNN>=2.9.6")
+ import MNN # noqa
+ from MNN.tools import mnnconvert
+
+ # Setup and checks
+ LOGGER.info(f"\n{prefix} starting export with MNN {MNN.version()}...")
+ assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}"
+ f = str(self.file.with_suffix(".mnn")) # MNN model file
+ args = ["", "-f", "ONNX", "--modelFile", f_onnx, "--MNNModel", f, "--bizCode", json.dumps(self.metadata)]
+ if self.args.int8:
+ args.extend(("--weightQuantBits", "8"))
+ if self.args.half:
+ args.append("--fp16")
+ mnnconvert.convert(args)
+ # remove scratch file for model convert optimize
+ convert_scratch = Path(self.file.parent / ".__convert_external_data.bin")
+ if convert_scratch.exists():
+ convert_scratch.unlink()
+ return f, None
+
@try_export
def export_ncnn(self, prefix=colorstr("NCNN:")):
- """YOLOv8 NCNN export using PNNX https://github.com/pnnx/pnnx."""
+ """YOLO NCNN export using PNNX https://github.com/pnnx/pnnx."""
check_requirements("ncnn")
import ncnn # noqa
@@ -591,16 +736,16 @@ def export_ncnn(self, prefix=colorstr("NCNN:")):
shutil.rmtree(unzip_dir) # delete unzip dir
ncnn_args = [
- f'ncnnparam={f / "model.ncnn.param"}',
- f'ncnnbin={f / "model.ncnn.bin"}',
- f'ncnnpy={f / "model_ncnn.py"}',
+ f"ncnnparam={f / 'model.ncnn.param'}",
+ f"ncnnbin={f / 'model.ncnn.bin'}",
+ f"ncnnpy={f / 'model_ncnn.py'}",
]
pnnx_args = [
- f'pnnxparam={f / "model.pnnx.param"}',
- f'pnnxbin={f / "model.pnnx.bin"}',
- f'pnnxpy={f / "model_pnnx.py"}',
- f'pnnxonnx={f / "model.pnnx.onnx"}',
+ f"pnnxparam={f / 'model.pnnx.param'}",
+ f"pnnxbin={f / 'model.pnnx.bin'}",
+ f"pnnxpy={f / 'model_pnnx.py'}",
+ f"pnnxonnx={f / 'model.pnnx.onnx'}",
]
cmd = [
@@ -626,7 +771,7 @@ def export_ncnn(self, prefix=colorstr("NCNN:")):
@try_export
def export_coreml(self, prefix=colorstr("CoreML:")):
- """YOLOv8 CoreML export."""
+ """YOLO CoreML export."""
mlmodel = self.args.format.lower() == "mlmodel" # legacy *.mlmodel export format requested
check_requirements("coremltools>=6.0,<=6.2" if mlmodel else "coremltools>=7.0")
import coremltools as ct # noqa
@@ -637,9 +782,6 @@ def export_coreml(self, prefix=colorstr("CoreML:")):
f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage")
if f.is_dir():
shutil.rmtree(f)
- if self.args.nms and getattr(self.model, "end2end", False):
- LOGGER.warning(f"{prefix} WARNING โ ๏ธ 'nms=True' is not available for end2end models. Forcing 'nms=False'.")
- self.args.nms = False
bias = [0.0, 0.0, 0.0]
scale = 1 / 255
@@ -651,7 +793,7 @@ def export_coreml(self, prefix=colorstr("CoreML:")):
model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model
else:
if self.args.nms:
- LOGGER.warning(f"{prefix} WARNING โ ๏ธ 'nms=True' is only available for Detect models like 'yolov8n.pt'.")
+ LOGGER.warning(f"{prefix} WARNING โ ๏ธ 'nms=True' is only available for Detect models like 'yolo11n.pt'.")
# TODO CoreML Segment and Pose model pipelining
model = self.model
@@ -702,8 +844,8 @@ def export_coreml(self, prefix=colorstr("CoreML:")):
return f, ct_model
@try_export
- def export_engine(self, prefix=colorstr("TensorRT:")):
- """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
+ def export_engine(self, dla=None, prefix=colorstr("TensorRT:")):
+ """YOLO TensorRT export https://developer.nvidia.com/tensorrt."""
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
f_onnx, _ = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016
@@ -711,10 +853,10 @@ def export_engine(self, prefix=colorstr("TensorRT:")):
import tensorrt as trt # noqa
except ImportError:
if LINUX:
- check_requirements("tensorrt>7.0.0,<=10.1.0")
+ check_requirements("tensorrt>7.0.0,!=10.1.0")
import tensorrt as trt # noqa
check_version(trt.__version__, ">=7.0.0", hard=True)
- check_version(trt.__version__, "<=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
+ check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
# Setup and checks
LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...")
@@ -728,15 +870,29 @@ def export_engine(self, prefix=colorstr("TensorRT:")):
# Engine builder
builder = trt.Builder(logger)
config = builder.create_builder_config()
- workspace = int(self.args.workspace * (1 << 30))
- if is_trt10:
+ workspace = int(self.args.workspace * (1 << 30)) if self.args.workspace is not None else 0
+ if is_trt10 and workspace > 0:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace)
- else: # TensorRT versions 7, 8
+ elif workspace > 0: # TensorRT versions 7, 8
config.max_workspace_size = workspace
flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(flag)
half = builder.platform_has_fast_fp16 and self.args.half
int8 = builder.platform_has_fast_int8 and self.args.int8
+
+ # Optionally switch to DLA if enabled
+ if dla is not None:
+ if not IS_JETSON:
+ raise ValueError("DLA is only available on NVIDIA Jetson devices")
+ LOGGER.info(f"{prefix} enabling DLA on core {dla}...")
+ if not self.args.half and not self.args.int8:
+ raise ValueError(
+ "DLA requires either 'half=True' (FP16) or 'int8=True' (INT8) to be enabled. Please enable one of them and try again."
+ )
+ config.default_device_type = trt.DeviceType.DLA
+ config.DLA_core = int(dla)
+ config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
+
# Read ONNX file
parser = trt.OnnxParser(network, logger)
if not parser.parse_from_file(f_onnx):
@@ -756,7 +912,7 @@ def export_engine(self, prefix=colorstr("TensorRT:")):
LOGGER.warning(f"{prefix} WARNING โ ๏ธ 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
profile = builder.create_optimization_profile()
min_shape = (1, shape[1], 32, 32) # minimum input shape
- max_shape = (*shape[:2], *(max(1, self.args.workspace) * d for d in shape[2:])) # max input shape
+ max_shape = (*shape[:2], *(int(max(1, workspace) * d) for d in shape[2:])) # max input shape
for inp in inputs:
profile.set_shape(inp.name, min=min_shape, opt=shape, max=max_shape)
config.add_optimization_profile(profile)
@@ -837,7 +993,7 @@ def write_calibration_cache(self, cache) -> None:
@try_export
def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
- """YOLOv8 TensorFlow SavedModel export."""
+ """YOLO TensorFlow SavedModel export."""
cuda = torch.cuda.is_available()
try:
import tensorflow as tf # noqa
@@ -853,7 +1009,7 @@ def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
"sng4onnx>=1.0.1", # required by 'onnx2tf' package
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
"onnx>=1.12.0",
- "onnx2tf>1.17.5,<=1.22.3",
+ "onnx2tf>1.17.5,<=1.26.3",
"onnxslim>=0.1.31",
"tflite_support<=0.4.3" if IS_JETSON else "tflite_support", # fix ImportError 'GLIBCXX_3.4.29'
"flatbuffers>=23.5.26,<100", # update old 'flatbuffers' included inside tensorflow package
@@ -890,23 +1046,22 @@ def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
io_quant_dtype = "int8"
if self.args.int8:
tmp_file = f / "tmp_tflite_int8_calibration_images.npy" # int8 calibration images file
- verbosity = "info"
io_quant_dtype = "uint8" if self.args.uint8_io_dtype else "int8"
if self.args.data:
f.mkdir()
- images = [batch["img"].permute(0, 2, 3, 1) for batch in self.get_int8_calibration_dataloader(prefix)]
- images = torch.cat(images, 0).float()
+ images = [batch["img"] for batch in self.get_int8_calibration_dataloader(prefix)]
+ images = torch.nn.functional.interpolate(torch.cat(images, 0).float(), size=self.imgsz).permute(
+ 0, 2, 3, 1
+ )
np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC
np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]]
- else:
- verbosity = "error"
LOGGER.info(f"{prefix} starting TFLite export with onnx2tf {onnx2tf.__version__}...")
- onnx2tf.convert(
+ keras_model = onnx2tf.convert(
input_onnx_file_path=f_onnx,
output_folder_path=str(f),
not_use_onnxsim=True,
- verbosity=verbosity,
+ verbosity="error", # note INT8-FP16 activation bug https://github.com/ultralytics/ultralytics/issues/15873
output_integer_quantized_tflite=self.args.int8,
quant_type="per-tensor", # "per-tensor" (faster) or "per-channel" (slower but more accurate)
custom_input_op_name_np_data_path=np_data,
@@ -932,11 +1087,11 @@ def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
else:
if not self.args.separate_outputs:
self._add_tflite_metadata(file)
- return str(f), tf.saved_model.load(f, tags=None, options=None) # load saved_model as Keras model
+ return str(f), keras_model # or keras_model = tf.saved_model.load(f, tags=None, options=None)
@try_export
def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")):
- """YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow."""
+ """YOLO TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow."""
import tensorflow as tf # noqa
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa
@@ -952,7 +1107,7 @@ def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")):
@try_export
def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")):
- """YOLOv8 TensorFlow Lite export."""
+ """YOLO TensorFlow Lite export."""
# BUG https://github.com/ultralytics/ultralytics/issues/13436
import tensorflow as tf # noqa
@@ -968,7 +1123,7 @@ def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorF
@try_export
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
- """YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
+ """YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
LOGGER.warning(f"{prefix} WARNING โ ๏ธ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185")
cmd = "edgetpu_compiler --version"
@@ -976,7 +1131,6 @@ def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
assert LINUX, f"export only supported on Linux. See {help_url}"
if subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True).returncode != 0:
LOGGER.info(f"\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}")
- sudo = subprocess.run("sudo --version >/dev/null", shell=True).returncode == 0 # sudo installed on system
for c in (
"curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -",
'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | '
@@ -984,13 +1138,21 @@ def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
"sudo apt-get update",
"sudo apt-get install edgetpu-compiler",
):
- subprocess.run(c if sudo else c.replace("sudo ", ""), shell=True, check=True)
+ subprocess.run(c if is_sudo_available() else c.replace("sudo ", ""), shell=True, check=True)
ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model
- cmd = f'edgetpu_compiler -s -d -k 10 --out_dir "{Path(f).parent}" "{tflite_model}"'
+ cmd = (
+ "edgetpu_compiler "
+ f'--out_dir "{Path(f).parent}" '
+ "--show_operations "
+ "--search_delegate "
+ "--delegate_search_step 30 "
+ "--timeout_sec 180 "
+ f'"{tflite_model}"'
+ )
LOGGER.info(f"{prefix} running '{cmd}'")
subprocess.run(cmd, shell=True)
if not self.args.separate_outputs:
@@ -999,7 +1161,7 @@ def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
@try_export
def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
- """YOLOv8 TensorFlow.js export."""
+ """YOLO TensorFlow.js export."""
check_requirements("tensorflowjs")
if ARM64:
# Fix error: `np.object` was a deprecated alias for the builtin `object` when exporting to TF.js on ARM64
@@ -1033,6 +1195,171 @@ def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
return f, None
+ @try_export
+ def export_rknn(self, prefix=colorstr("RKNN:")):
+ """YOLO RKNN model export."""
+ LOGGER.info(f"\n{prefix} starting export with rknn-toolkit2...")
+
+ check_requirements("rknn-toolkit2")
+ if IS_COLAB:
+ # Prevent 'exit' from closing the notebook https://github.com/airockchip/rknn-toolkit2/issues/259
+ import builtins
+
+ builtins.exit = lambda: None
+
+ from rknn.api import RKNN
+
+ f, _ = self.export_onnx()
+ export_path = Path(f"{Path(f).stem}_rknn_model")
+ export_path.mkdir(exist_ok=True)
+
+ rknn = RKNN(verbose=False)
+ rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform=self.args.name)
+ rknn.load_onnx(model=f)
+ rknn.build(do_quantization=False) # TODO: Add quantization support
+ f = f.replace(".onnx", f"-{self.args.name}.rknn")
+ rknn.export_rknn(f"{export_path / f}")
+ yaml_save(export_path / "metadata.yaml", self.metadata)
+ return export_path, None
+
+ @try_export
+ def export_imx(self, prefix=colorstr("IMX:")):
+ """YOLO IMX export."""
+ gptq = False
+ assert LINUX, (
+ "export only supported on Linux. See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
+ )
+ if getattr(self.model, "end2end", False):
+ raise ValueError("IMX export is not supported for end2end models.")
+ if "C2f" not in self.model.__str__():
+ raise ValueError("IMX export is only supported for YOLOv8n detection models")
+ check_requirements(("model-compression-toolkit==2.1.1", "sony-custom-layers==0.2.0", "tensorflow==2.12.0"))
+ check_requirements("imx500-converter[pt]==3.14.3") # Separate requirements for imx500-converter
+
+ import model_compression_toolkit as mct
+ import onnx
+ from sony_custom_layers.pytorch.object_detection.nms import multiclass_nms
+
+ LOGGER.info(f"\n{prefix} starting export with model_compression_toolkit {mct.__version__}...")
+
+ try:
+ out = subprocess.run(
+ ["java", "--version"], check=True, capture_output=True
+ ) # Java 17 is required for imx500-converter
+ if "openjdk 17" not in str(out.stdout):
+ raise FileNotFoundError
+ except FileNotFoundError:
+ c = ["apt", "install", "-y", "openjdk-17-jdk", "openjdk-17-jre"]
+ if is_sudo_available():
+ c.insert(0, "sudo")
+ subprocess.run(c, check=True)
+
+ def representative_dataset_gen(dataloader=self.get_int8_calibration_dataloader(prefix)):
+ for batch in dataloader:
+ img = batch["img"]
+ img = img / 255.0
+ yield [img]
+
+ tpc = mct.get_target_platform_capabilities(
+ fw_name="pytorch", target_platform_name="imx500", target_platform_version="v1"
+ )
+
+ config = mct.core.CoreConfig(
+ mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10),
+ quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True),
+ )
+
+ resource_utilization = mct.core.ResourceUtilization(weights_memory=3146176 * 0.76)
+
+ quant_model = (
+ mct.gptq.pytorch_gradient_post_training_quantization( # Perform Gradient-Based Post Training Quantization
+ model=self.model,
+ representative_data_gen=representative_dataset_gen,
+ target_resource_utilization=resource_utilization,
+ gptq_config=mct.gptq.get_pytorch_gptq_config(n_epochs=1000, use_hessian_based_weights=False),
+ core_config=config,
+ target_platform_capabilities=tpc,
+ )[0]
+ if gptq
+ else mct.ptq.pytorch_post_training_quantization( # Perform post training quantization
+ in_module=self.model,
+ representative_data_gen=representative_dataset_gen,
+ target_resource_utilization=resource_utilization,
+ core_config=config,
+ target_platform_capabilities=tpc,
+ )[0]
+ )
+
+ class NMSWrapper(torch.nn.Module):
+ def __init__(
+ self,
+ model: torch.nn.Module,
+ score_threshold: float = 0.001,
+ iou_threshold: float = 0.7,
+ max_detections: int = 300,
+ ):
+ """
+ Wrapping PyTorch Module with multiclass_nms layer from sony_custom_layers.
+
+ Args:
+ model (nn.Module): Model instance.
+ score_threshold (float): Score threshold for non-maximum suppression.
+ iou_threshold (float): Intersection over union threshold for non-maximum suppression.
+ max_detections (float): The number of detections to return.
+ """
+ super().__init__()
+ self.model = model
+ self.score_threshold = score_threshold
+ self.iou_threshold = iou_threshold
+ self.max_detections = max_detections
+
+ def forward(self, images):
+ # model inference
+ outputs = self.model(images)
+
+ boxes = outputs[0]
+ scores = outputs[1]
+ nms = multiclass_nms(
+ boxes=boxes,
+ scores=scores,
+ score_threshold=self.score_threshold,
+ iou_threshold=self.iou_threshold,
+ max_detections=self.max_detections,
+ )
+ return nms
+
+ quant_model = NMSWrapper(
+ model=quant_model,
+ score_threshold=self.args.conf or 0.001,
+ iou_threshold=self.args.iou,
+ max_detections=self.args.max_det,
+ ).to(self.device)
+
+ f = Path(str(self.file).replace(self.file.suffix, "_imx_model"))
+ f.mkdir(exist_ok=True)
+ onnx_model = f / Path(str(self.file.name).replace(self.file.suffix, "_imx.onnx")) # js dir
+ mct.exporter.pytorch_export_model(
+ model=quant_model, save_model_path=onnx_model, repr_dataset=representative_dataset_gen
+ )
+
+ model_onnx = onnx.load(onnx_model) # load onnx model
+ for k, v in self.metadata.items():
+ meta = model_onnx.metadata_props.add()
+ meta.key, meta.value = k, str(v)
+
+ onnx.save(model_onnx, onnx_model)
+
+ subprocess.run(
+ ["imxconv-pt", "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"],
+ check=True,
+ )
+
+ # Needed for imx models.
+ with open(f / "labels.txt", "w") as file:
+ file.writelines([f"{name}\n" for _, name in self.model.names.items()])
+
+ return f, None
+
def _add_tflite_metadata(self, file):
"""Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
import flatbuffers
@@ -1098,7 +1425,7 @@ def _add_tflite_metadata(self, file):
tmp_file.unlink()
def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
- """YOLOv8 CoreML pipeline."""
+ """YOLO CoreML pipeline."""
import coremltools as ct # noqa
LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...")
@@ -1162,8 +1489,8 @@ def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipe
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"
- nms.iouThreshold = 0.45
- nms.confidenceThreshold = 0.25
+ nms.iouThreshold = self.args.iou
+ nms.confidenceThreshold = self.args.conf
nms.pickTop.perClass = True
nms.stringClassLabels.vector.extend(names.values())
nms_model = ct.models.MLModel(nms_spec)
@@ -1231,3 +1558,103 @@ def forward(self, x):
"""Normalize predictions of object detection model with input size-dependent factors."""
xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
+
+
+class NMSModel(torch.nn.Module):
+ """Model wrapper with embedded NMS for Detect, Segment, Pose and OBB."""
+
+ def __init__(self, model, args):
+ """
+ Initialize the NMSModel.
+
+ Args:
+ model (torch.nn.module): The model to wrap with NMS postprocessing.
+ args (Namespace): The export arguments.
+ """
+ super().__init__()
+ self.model = model
+ self.args = args
+ self.obb = model.task == "obb"
+ self.is_tf = self.args.format in frozenset({"saved_model", "tflite", "tfjs"})
+
+ def forward(self, x):
+ """
+ Performs inference with NMS post-processing. Supports Detect, Segment, OBB and Pose.
+
+ Args:
+ x (torch.Tensor): The preprocessed tensor with shape (N, 3, H, W).
+
+ Returns:
+ out (torch.Tensor): The post-processed results with shape (N, max_det, 4 + 2 + extra_shape).
+ """
+ from functools import partial
+
+ from torchvision.ops import nms
+
+ preds = self.model(x)
+ pred = preds[0] if isinstance(preds, tuple) else preds
+ pred = pred.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
+ extra_shape = pred.shape[-1] - (4 + self.model.nc) # extras from Segment, OBB, Pose
+ boxes, scores, extras = pred.split([4, self.model.nc, extra_shape], dim=2)
+ scores, classes = scores.max(dim=-1)
+ self.args.max_det = min(pred.shape[1], self.args.max_det) # in case num_anchors < max_det
+ # (N, max_det, 4 coords + 1 class score + 1 class label + extra_shape).
+ out = torch.zeros(
+ boxes.shape[0],
+ self.args.max_det,
+ boxes.shape[-1] + 2 + extra_shape,
+ device=boxes.device,
+ dtype=boxes.dtype,
+ )
+ for i, (box, cls, score, extra) in enumerate(zip(boxes, classes, scores, extras)):
+ mask = score > self.args.conf
+ if self.is_tf:
+ # TFLite GatherND error if mask is empty
+ score *= mask
+ # Explicit length otherwise reshape error, hardcoded to `self.args.max_det * 5`
+ mask = score.topk(min(self.args.max_det * 5, score.shape[0])).indices
+ box, score, cls, extra = box[mask], score[mask], cls[mask], extra[mask]
+ if not self.obb:
+ box = xywh2xyxy(box)
+ if self.is_tf:
+ # TFlite bug returns less boxes
+ box = torch.nn.functional.pad(box, (0, 0, 0, mask.shape[0] - box.shape[0]))
+ nmsbox = box.clone()
+ # `8` is the minimum value experimented to get correct NMS results for obb
+ multiplier = 8 if self.obb else 1
+ # Normalize boxes for NMS since large values for class offset causes issue with int8 quantization
+ if self.args.format == "tflite": # TFLite is already normalized
+ nmsbox *= multiplier
+ else:
+ nmsbox = multiplier * nmsbox / torch.tensor(x.shape[2:], device=box.device, dtype=box.dtype).max()
+ if not self.args.agnostic_nms: # class-specific NMS
+ end = 2 if self.obb else 4
+ # fully explicit expansion otherwise reshape error
+ # large max_wh causes issues when quantizing
+ cls_offset = cls.reshape(-1, 1).expand(nmsbox.shape[0], end)
+ offbox = nmsbox[:, :end] + cls_offset * multiplier
+ nmsbox = torch.cat((offbox, nmsbox[:, end:]), dim=-1)
+ nms_fn = (
+ partial(
+ nms_rotated,
+ use_triu=not (
+ self.is_tf
+ or (self.args.opset or 14) < 14
+ or (self.args.format == "openvino" and self.args.int8) # OpenVINO int8 error with triu
+ ),
+ )
+ if self.obb
+ else nms
+ )
+ keep = nms_fn(
+ torch.cat([nmsbox, extra], dim=-1) if self.obb else nmsbox,
+ score,
+ self.args.iou,
+ )[: self.args.max_det]
+ dets = torch.cat(
+ [box[keep], score[keep].view(-1, 1), cls[keep].view(-1, 1).to(out.dtype), extra[keep]], dim=-1
+ )
+ # Zero-pad to max_det size to avoid reshape error
+ pad = (0, 0, 0, self.args.max_det - dets.shape[0])
+ out[i] = torch.nn.functional.pad(dets, pad)
+ return (out, preds[1]) if self.model.task == "segment" else out
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index 519c3f905ed..21ef3fb44aa 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -1,8 +1,8 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import inspect
from pathlib import Path
-from typing import List, Union
+from typing import Any, Dict, List, Union
import numpy as np
import torch
@@ -11,7 +11,7 @@
from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir
from ultralytics.engine.results import Results
from ultralytics.hub import HUB_WEB_ROOT, HUBTrainingSession
-from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load
+from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, yaml_model_load
from ultralytics.utils import (
ARGV,
ASSETS,
@@ -26,7 +26,7 @@
)
-class Model(nn.Module):
+class Model(torch.nn.Module):
"""
A base class for implementing YOLO models, unifying APIs across different model types.
@@ -37,7 +37,7 @@ class Model(nn.Module):
Attributes:
callbacks (Dict): A dictionary of callback functions for various events during model operations.
predictor (BasePredictor): The predictor object used for making predictions.
- model (nn.Module): The underlying PyTorch model.
+ model (torch.nn.Module): The underlying PyTorch model.
trainer (BaseTrainer): The trainer object used for training the model.
ckpt (Dict): The checkpoint data if the model is loaded from a *.pt file.
cfg (str): The configuration of the model if loaded from a *.yaml file.
@@ -72,16 +72,16 @@ class Model(nn.Module):
Examples:
>>> from ultralytics import YOLO
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model.predict("image.jpg")
- >>> model.train(data="coco128.yaml", epochs=3)
+ >>> model.train(data="coco8.yaml", epochs=3)
>>> metrics = model.val()
>>> model.export(format="onnx")
"""
def __init__(
self,
- model: Union[str, Path] = "yolov8n.pt",
+ model: Union[str, Path] = "yolo11n.pt",
task: str = None,
verbose: bool = False,
) -> None:
@@ -106,7 +106,7 @@ def __init__(
ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model = Model("path/to/model.yaml", task="detect")
>>> model = Model("hub_model", verbose=True)
"""
@@ -115,7 +115,7 @@ def __init__(
self.predictor = None # reuse predictor
self.model = None # model object
self.trainer = None # trainer object
- self.ckpt = None # if loaded from *.pt
+ self.ckpt = {} # if loaded from *.pt
self.cfg = None # if loaded from *.yaml
self.ckpt_path = None
self.overrides = {} # overrides for trainer object
@@ -136,6 +136,7 @@ def __init__(
# Check if Triton Server model
elif self.is_triton_model(model):
self.model_name = self.model = model
+ self.overrides["task"] = task or "detect" # set `task=detect` if not explicitly set
return
# Load or create new YOLO model
@@ -144,11 +145,14 @@ def __init__(
else:
self._load(model, task=task)
+ # Delete super().training for accessing self.model.training
+ del self.training
+
def __call__(
self,
source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
- **kwargs,
+ **kwargs: Any,
) -> list:
"""
Alias for the predict method, enabling the model instance to be callable for predictions.
@@ -161,14 +165,14 @@ def __call__(
the image(s) to make predictions on. Can be a file path, URL, PIL image, numpy array, PyTorch
tensor, or a list/tuple of these.
stream (bool): If True, treat the input source as a continuous stream for predictions.
- **kwargs (Any): Additional keyword arguments to configure the prediction process.
+ **kwargs: Additional keyword arguments to configure the prediction process.
Returns:
(List[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a
Results object.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model("https://ultralytics.com/images/bus.jpg")
>>> for r in results:
... print(f"Detected {len(r)} objects in image")
@@ -190,9 +194,9 @@ def is_triton_model(model: str) -> bool:
(bool): True if the model string is a valid Triton Server URL, False otherwise.
Examples:
- >>> Model.is_triton_model("http://localhost:8000/v2/models/yolov8n")
+ >>> Model.is_triton_model("http://localhost:8000/v2/models/yolo11n")
True
- >>> Model.is_triton_model("yolov8n.pt")
+ >>> Model.is_triton_model("yolo11n.pt")
False
"""
from urllib.parse import urlsplit
@@ -217,7 +221,7 @@ def is_hub_model(model: str) -> bool:
Examples:
>>> Model.is_hub_model("https://hub.ultralytics.com/models/MODEL")
True
- >>> Model.is_hub_model("yolov8n.pt")
+ >>> Model.is_hub_model("yolo11n.pt")
False
"""
return model.startswith(f"{HUB_WEB_ROOT}/models/")
@@ -243,7 +247,7 @@ class from the task map.
Examples:
>>> model = Model()
- >>> model._new("yolov8n.yaml", task="detect", verbose=True)
+ >>> model._new("yolo11n.yaml", task="detect", verbose=True)
"""
cfg_dict = yaml_model_load(cfg)
self.cfg = cfg
@@ -274,12 +278,12 @@ def _load(self, weights: str, task=None) -> None:
Examples:
>>> model = Model()
- >>> model._load("yolov8n.pt")
+ >>> model._load("yolo11n.pt")
>>> model._load("path/to/weights.pth", task="detect")
"""
if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
weights = checks.check_file(weights, download_dir=SETTINGS["weights_dir"]) # download and return local file
- weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolov8n -> yolov8n.pt
+ weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolo11n -> yolo11n.pt
if Path(weights).suffix == ".pt":
self.model, self.ckpt = attempt_load_one_weight(weights)
@@ -307,19 +311,19 @@ def _check_is_pytorch_model(self) -> None:
information about supported model formats and operations.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model._check_is_pytorch_model() # No error raised
- >>> model = Model("yolov8n.onnx")
+ >>> model = Model("yolo11n.onnx")
>>> model._check_is_pytorch_model() # Raises TypeError
"""
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt"
- pt_module = isinstance(self.model, nn.Module)
+ pt_module = isinstance(self.model, torch.nn.Module)
if not (pt_module or pt_str):
raise TypeError(
f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. "
f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported "
f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, "
- f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device "
+ f"i.e. 'yolo predict model=yolo11n.onnx'.\nTo run CUDA or MPS inference please pass the device "
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'"
)
@@ -338,7 +342,7 @@ def reset_weights(self) -> "Model":
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model.reset_weights()
"""
self._check_is_pytorch_model()
@@ -349,7 +353,7 @@ def reset_weights(self) -> "Model":
p.requires_grad = True
return self
- def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model":
+ def load(self, weights: Union[str, Path] = "yolo11n.pt") -> "Model":
"""
Loads parameters from the specified weights file into the model.
@@ -367,7 +371,7 @@ def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model":
Examples:
>>> model = Model()
- >>> model.load("yolov8n.pt")
+ >>> model.load("yolo11n.pt")
>>> model.load(Path("path/to/weights.pt"))
"""
self._check_is_pytorch_model()
@@ -377,7 +381,7 @@ def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model":
self.model.load(weights)
return self
- def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) -> None:
+ def save(self, filename: Union[str, Path] = "saved_model.pt") -> None:
"""
Saves the current model state to a file.
@@ -386,13 +390,12 @@ def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) ->
Args:
filename (Union[str, Path]): The name of the file to save the model to.
- use_dill (bool): Whether to try using dill for serialization if available.
Raises:
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model.save("my_model.pt")
"""
self._check_is_pytorch_model()
@@ -402,13 +405,13 @@ def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) ->
from ultralytics import __version__
updates = {
- "model": deepcopy(self.model).half() if isinstance(self.model, nn.Module) else self.model,
+ "model": deepcopy(self.model).half() if isinstance(self.model, torch.nn.Module) else self.model,
"date": datetime.now().isoformat(),
"version": __version__,
"license": "AGPL-3.0 License (https://ultralytics.com/license)",
"docs": "https://docs.ultralytics.com",
}
- torch.save({**self.ckpt, **updates}, filename, use_dill=use_dill)
+ torch.save({**self.ckpt, **updates}, filename)
def info(self, detailed: bool = False, verbose: bool = True):
"""
@@ -429,7 +432,7 @@ def info(self, detailed: bool = False, verbose: bool = True):
TypeError: If the model is not a PyTorch model.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model.info() # Prints model summary
>>> info_list = model.info(detailed=True, verbose=False) # Returns detailed info as a list
"""
@@ -449,10 +452,10 @@ def fuse(self):
performs both convolution and normalization in one step.
Raises:
- TypeError: If the model is not a PyTorch nn.Module.
+ TypeError: If the model is not a PyTorch torch.nn.Module.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model.fuse()
>>> # Model is now fused and ready for optimized inference
"""
@@ -463,7 +466,7 @@ def embed(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
- **kwargs,
+ **kwargs: Any,
) -> list:
"""
Generates image embeddings based on the provided source.
@@ -475,7 +478,7 @@ def embed(
source (str | Path | int | List | Tuple | np.ndarray | torch.Tensor): The source of the image for
generating embeddings. Can be a file path, URL, PIL image, numpy array, etc.
stream (bool): If True, predictions are streamed.
- **kwargs (Any): Additional keyword arguments for configuring the embedding process.
+ **kwargs: Additional keyword arguments for configuring the embedding process.
Returns:
(List[torch.Tensor]): A list containing the image embeddings.
@@ -484,7 +487,7 @@ def embed(
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> image = "https://ultralytics.com/images/bus.jpg"
>>> embeddings = model.embed(image)
>>> print(embeddings[0].shape)
@@ -498,7 +501,7 @@ def predict(
source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
predictor=None,
- **kwargs,
+ **kwargs: Any,
) -> List[Results]:
"""
Performs predictions on the given image source using the YOLO model.
@@ -514,14 +517,14 @@ def predict(
stream (bool): If True, treats the input source as a continuous stream for predictions.
predictor (BasePredictor | None): An instance of a custom predictor class for making predictions.
If None, the method uses a default predictor.
- **kwargs (Any): Additional keyword arguments for configuring the prediction process.
+ **kwargs: Additional keyword arguments for configuring the prediction process.
Returns:
(List[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a
Results object.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model.predict(source="path/to/image.jpg", conf=0.25)
>>> for r in results:
... print(r.boxes.data) # print detection bounding boxes
@@ -544,7 +547,7 @@ def predict(
prompts = args.pop("prompts", None) # for SAM-type models
if not self.predictor:
- self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks)
+ self.predictor = (predictor or self._smart_load("predictor"))(overrides=args, _callbacks=self.callbacks)
self.predictor.setup_model(model=self.model, verbose=is_cli)
else: # only update args if predictor is already setup
self.predictor.args = get_cfg(self.predictor.args, args)
@@ -559,7 +562,7 @@ def track(
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
persist: bool = False,
- **kwargs,
+ **kwargs: Any,
) -> List[Results]:
"""
Conducts object tracking on the specified input source using the registered trackers.
@@ -573,7 +576,7 @@ def track(
tracking. Can be a file path, URL, or video stream.
stream (bool): If True, treats the input source as a continuous video stream. Defaults to False.
persist (bool): If True, persists trackers between different calls to this method. Defaults to False.
- **kwargs (Any): Additional keyword arguments for configuring the tracking process.
+ **kwargs: Additional keyword arguments for configuring the tracking process.
Returns:
(List[ultralytics.engine.results.Results]): A list of tracking results, each a Results object.
@@ -582,7 +585,7 @@ def track(
AttributeError: If the predictor does not have registered trackers.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model.track(source="path/to/video.mp4", show=True)
>>> for r in results:
... print(r.boxes.id) # print tracking IDs
@@ -604,7 +607,7 @@ def track(
def val(
self,
validator=None,
- **kwargs,
+ **kwargs: Any,
):
"""
Validates the model using a specified dataset and validation configuration.
@@ -616,7 +619,7 @@ def val(
Args:
validator (ultralytics.engine.validator.BaseValidator | None): An instance of a custom validator class for
validating the model.
- **kwargs (Any): Arbitrary keyword arguments for customizing the validation process.
+ **kwargs: Arbitrary keyword arguments for customizing the validation process.
Returns:
(ultralytics.utils.metrics.DetMetrics): Validation metrics obtained from the validation process.
@@ -625,8 +628,8 @@ def val(
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = YOLO("yolov8n.pt")
- >>> results = model.val(data="coco128.yaml", imgsz=640)
+ >>> model = YOLO("yolo11n.pt")
+ >>> results = model.val(data="coco8.yaml", imgsz=640)
>>> print(results.box.map) # Print mAP50-95
"""
custom = {"rect": True} # method defaults
@@ -639,7 +642,7 @@ def val(
def benchmark(
self,
- **kwargs,
+ **kwargs: Any,
):
"""
Benchmarks the model across various export formats to evaluate performance.
@@ -650,7 +653,7 @@ def benchmark(
defaults, and any additional user-provided keyword arguments.
Args:
- **kwargs (Any): Arbitrary keyword arguments to customize the benchmarking process. These are combined with
+ **kwargs: Arbitrary keyword arguments to customize the benchmarking process. These are combined with
default configurations, model-specific arguments, and method defaults. Common options include:
- data (str): Path to the dataset for benchmarking.
- imgsz (int | List[int]): Image size for benchmarking.
@@ -658,6 +661,7 @@ def benchmark(
- int8 (bool): Whether to use int8 precision mode.
- device (str): Device to run the benchmark on (e.g., 'cpu', 'cuda').
- verbose (bool): Whether to print detailed benchmark information.
+ - format (str): Export format name for specific benchmarking
Returns:
(Dict): A dictionary containing the results of the benchmarking process, including metrics for
@@ -667,7 +671,7 @@ def benchmark(
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model.benchmark(data="coco8.yaml", imgsz=640, half=True)
>>> print(results)
"""
@@ -686,11 +690,12 @@ def benchmark(
verbose=kwargs.get("verbose"),
separate_outputs=args["separate_outputs"],
export_hw_optimized=args["export_hw_optimized"],
+ format=kwargs.get("format", "")
)
def export(
self,
- **kwargs,
+ **kwargs: Any,
) -> str:
"""
Exports the model to a different format suitable for deployment.
@@ -700,7 +705,7 @@ def export(
defaults, and any additional arguments provided.
Args:
- **kwargs (Dict): Arbitrary keyword arguments to customize the export process. These are combined with
+ **kwargs: Arbitrary keyword arguments to customize the export process. These are combined with
the model's overrides and method defaults. Common arguments include:
format (str): Export format (e.g., 'onnx', 'engine', 'coreml').
half (bool): Export model in half-precision.
@@ -719,7 +724,7 @@ def export(
RuntimeError: If the export process fails due to errors.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> model.export(format="onnx", dynamic=True, simplify=True)
'path/to/exported/model.onnx'
"""
@@ -739,7 +744,7 @@ def export(
def train(
self,
trainer=None,
- **kwargs,
+ **kwargs: Any,
):
"""
Trains the model using the specified dataset and training configuration.
@@ -754,7 +759,7 @@ def train(
Args:
trainer (BaseTrainer | None): Custom trainer instance for model training. If None, uses default.
- **kwargs (Any): Arbitrary keyword arguments for training configuration. Common options include:
+ **kwargs: Arbitrary keyword arguments for training configuration. Common options include:
data (str): Path to dataset configuration file.
epochs (int): Number of training epochs.
batch_size (int): Batch size for training.
@@ -774,8 +779,8 @@ def train(
ModuleNotFoundError: If the HUB SDK is not installed.
Examples:
- >>> model = YOLO("yolov8n.pt")
- >>> results = model.train(data="coco128.yaml", epochs=3)
+ >>> model = YOLO("yolo11n.pt")
+ >>> results = model.train(data="coco8.yaml", epochs=3)
"""
self._check_is_pytorch_model()
if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model
@@ -806,7 +811,7 @@ def train(
# Update model and cfg after training
if RANK in {-1, 0}:
ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
- self.model, _ = attempt_load_one_weight(ckpt)
+ self.model, self.ckpt = attempt_load_one_weight(ckpt)
self.overrides = self.model.args
self.metrics = getattr(self.trainer.validator, "metrics", None) # TODO: no metrics returned by DDP
return self.metrics
@@ -815,8 +820,8 @@ def tune(
self,
use_ray=False,
iterations=10,
- *args,
- **kwargs,
+ *args: Any,
+ **kwargs: Any,
):
"""
Conducts hyperparameter tuning for the model, with an option to use Ray Tune.
@@ -829,8 +834,8 @@ def tune(
Args:
use_ray (bool): If True, uses Ray Tune for hyperparameter tuning. Defaults to False.
iterations (int): The number of tuning iterations to perform. Defaults to 10.
- *args (List): Variable length argument list for additional arguments.
- **kwargs (Dict): Arbitrary keyword arguments. These are combined with the model's overrides and defaults.
+ *args: Variable length argument list for additional arguments.
+ **kwargs: Arbitrary keyword arguments. These are combined with the model's overrides and defaults.
Returns:
(Dict): A dictionary containing the results of the hyperparameter search.
@@ -839,7 +844,7 @@ def tune(
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model.tune(use_ray=True, iterations=20)
>>> print(results)
"""
@@ -874,7 +879,7 @@ def _apply(self, fn) -> "Model":
AssertionError: If the model is not a PyTorch model.
Examples:
- >>> model = Model("yolov8n.pt")
+ >>> model = Model("yolo11n.pt")
>>> model = model._apply(lambda t: t.cuda()) # Move model to GPU
"""
self._check_is_pytorch_model()
@@ -884,7 +889,7 @@ def _apply(self, fn) -> "Model":
return self
@property
- def names(self) -> list:
+ def names(self) -> Dict[int, str]:
"""
Retrieves the class names associated with the loaded model.
@@ -899,7 +904,7 @@ def names(self) -> list:
AttributeError: If the model or predictor does not have a 'names' attribute.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> print(model.names)
{0: 'person', 1: 'bicycle', 2: 'car', ...}
"""
@@ -918,23 +923,23 @@ def device(self) -> torch.device:
Retrieves the device on which the model's parameters are allocated.
This property determines the device (CPU or GPU) where the model's parameters are currently stored. It is
- applicable only to models that are instances of nn.Module.
+ applicable only to models that are instances of torch.nn.Module.
Returns:
(torch.device): The device (CPU/GPU) of the model.
Raises:
- AttributeError: If the model is not a PyTorch nn.Module instance.
+ AttributeError: If the model is not a torch.nn.Module instance.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> print(model.device)
device(type='cuda', index=0) # if CUDA is available
>>> model = model.to("cpu")
>>> print(model.device)
device(type='cpu')
"""
- return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None
+ return next(self.model.parameters()).device if isinstance(self.model, torch.nn.Module) else None
@property
def transforms(self):
@@ -949,7 +954,7 @@ def transforms(self):
(object | None): The transform object of the model if available, otherwise None.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> transforms = model.transforms
>>> if transforms:
... print(f"Model transforms: {transforms}")
@@ -978,9 +983,9 @@ def add_callback(self, event: str, func) -> None:
Examples:
>>> def on_train_start(trainer):
... print("Training is starting!")
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> model.add_callback("on_train_start", on_train_start)
- >>> model.train(data="coco128.yaml", epochs=1)
+ >>> model.train(data="coco8.yaml", epochs=1)
"""
self.callbacks[event].append(func)
@@ -997,7 +1002,7 @@ def clear_callback(self, event: str) -> None:
recognized by the Ultralytics callback system.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> model.add_callback("on_train_start", lambda: print("Training started"))
>>> model.clear_callback("on_train_start")
>>> # All callbacks for 'on_train_start' are now removed
@@ -1027,7 +1032,7 @@ def reset_callbacks(self) -> None:
modifications, ensuring consistent behavior across different runs or experiments.
Examples:
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> model.add_callback("on_train_start", custom_function)
>>> model.reset_callbacks()
# All callbacks are now reset to their default functions
@@ -1129,3 +1134,44 @@ def task_map(self) -> dict:
description of the expected behavior and structure.
"""
raise NotImplementedError("Please provide task map for your model!")
+
+ def eval(self):
+ """
+ Sets the model to evaluation mode.
+
+ This method changes the model's mode to evaluation, which affects layers like dropout and batch normalization
+ that behave differently during training and evaluation.
+
+ Returns:
+ (Model): The model instance with evaluation mode set.
+
+ Examples:
+ >> model = YOLO("yolo11n.pt")
+ >> model.eval()
+ """
+ self.model.eval()
+ return self
+
+ def __getattr__(self, name):
+ """
+ Enables accessing model attributes directly through the Model class.
+
+ This method provides a way to access attributes of the underlying model directly through the Model class
+ instance. It first checks if the requested attribute is 'model', in which case it returns the model from
+ the module dictionary. Otherwise, it delegates the attribute lookup to the underlying model.
+
+ Args:
+ name (str): The name of the attribute to retrieve.
+
+ Returns:
+ (Any): The requested attribute value.
+
+ Raises:
+ AttributeError: If the requested attribute does not exist in the model.
+
+ Examples:
+ >>> model = YOLO("yolo11n.pt")
+ >>> print(model.stride)
+ >>> print(model.task)
+ """
+ return self._modules["model"] if name == "model" else getattr(self.model, name)
diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py
index 94ec86476a5..afe301faf34 100644
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@@ -1,9 +1,9 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc.
Usage - sources:
- $ yolo mode=predict model=yolov8n.pt source=0 # webcam
+ $ yolo mode=predict model=yolo11n.pt source=0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
@@ -15,18 +15,21 @@
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP stream
Usage - formats:
- $ yolo mode=predict model=yolov8n.pt # PyTorch
- yolov8n.torchscript # TorchScript
- yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
- yolov8n_openvino_model # OpenVINO
- yolov8n.engine # TensorRT
- yolov8n.mlpackage # CoreML (macOS-only)
- yolov8n_saved_model # TensorFlow SavedModel
- yolov8n.pb # TensorFlow GraphDef
- yolov8n.tflite # TensorFlow Lite
- yolov8n_edgetpu.tflite # TensorFlow Edge TPU
- yolov8n_paddle_model # PaddlePaddle
- yolov8n_ncnn_model # NCNN
+ $ yolo mode=predict model=yolo11n.pt # PyTorch
+ yolo11n.torchscript # TorchScript
+ yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
+ yolo11n_openvino_model # OpenVINO
+ yolo11n.engine # TensorRT
+ yolo11n.mlpackage # CoreML (macOS-only)
+ yolo11n_saved_model # TensorFlow SavedModel
+ yolo11n.pb # TensorFlow GraphDef
+ yolo11n.tflite # TensorFlow Lite
+ yolo11n_edgetpu.tflite # TensorFlow Edge TPU
+ yolo11n_paddle_model # PaddlePaddle
+ yolo11n.mnn # MNN
+ yolo11n_ncnn_model # NCNN
+ yolo11n_imx_model # Sony IMX
+ yolo11n_rknn_model # Rockchip RKNN
"""
import platform
@@ -153,7 +156,11 @@ def pre_transform(self, im):
(list): A list of transformed images.
"""
same_shapes = len({x.shape for x in im}) == 1
- letterbox = LetterBox(self.imgsz, auto=self.args.rect and same_shapes and self.model.pt, stride=self.model.stride)
+ letterbox = LetterBox(
+ self.imgsz,
+ auto=self.args.rect and same_shapes and (self.model.pt or (getattr(self.model, "dynamic", False) and not self.model.imx)),
+ stride=self.model.stride,
+ )
return [letterbox(image=x) for x in im]
def postprocess(self, preds, img, orig_imgs):
@@ -365,7 +372,7 @@ def save_predicted_images(self, save_path="", frame=0):
# Save videos and streams
if self.dataset.mode in {"stream", "video"}:
fps = self.dataset.fps if self.dataset.mode == "video" else 30
- frames_path = f'{save_path.split(".", 1)[0]}_frames/'
+ frames_path = f"{save_path.split('.', 1)[0]}_frames/"
if save_path not in self.vid_writer: # new video
if self.args.save_frames:
Path(frames_path).mkdir(parents=True, exist_ok=True)
@@ -384,7 +391,7 @@ def save_predicted_images(self, save_path="", frame=0):
# Save images
else:
- cv2.imwrite(save_path, im)
+ cv2.imwrite(str(Path(save_path).with_suffix(".jpg")), im) # save to JPG for best support
def show(self, p=""):
"""Display an image in a window using the OpenCV imshow function."""
diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py
index 737711be15d..5c0ea442b95 100644
--- a/ultralytics/engine/results.py
+++ b/ultralytics/engine/results.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Ultralytics Results, Boxes and Masks classes for handling inference results.
@@ -307,7 +307,7 @@ def __len__(self):
if v is not None:
return len(v)
- def update(self, boxes=None, masks=None, probs=None, obb=None, regress=None):
+ def update(self, boxes=None, masks=None, probs=None, obb=None, keypoints=None, regress=None):
"""
Updates the Results object with new detection data.
@@ -320,6 +320,7 @@ def update(self, boxes=None, masks=None, probs=None, obb=None, regress=None):
masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
+ keypoints (torch.Tensor | None): A tensor of shape (N, 17, 3) containing keypoints.
Examples:
>>> results = model("image.jpg")
@@ -336,6 +337,8 @@ def update(self, boxes=None, masks=None, probs=None, obb=None, regress=None):
self.obb = OBB(obb, self.orig_shape)
if regress is not None:
self.regress = regress
+ if keypoints is not None:
+ self.keypoints = Keypoints(keypoints, self.orig_shape)
def _apply(self, fn, *args, **kwargs):
"""
@@ -496,8 +499,8 @@ def plot(
Examples:
>>> results = model("image.jpg")
>>> for result in results:
- ... im = result.plot()
- ... im.show()
+ >>> im = result.plot()
+ >>> im.show()
"""
assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
if img is None and isinstance(self.orig_img, torch.Tensor):
@@ -541,9 +544,9 @@ def plot(
# Plot Detect results
if pred_boxes is not None and show_boxes:
for i, d in enumerate(reversed(pred_boxes)):
- c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
+ c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
name = ("" if id is None else f"id:{id} ") + names[c]
- label = (f"{name} {conf:.2f}" if conf else name) if labels else None
+ label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
annotator.box_label(
box,
@@ -609,7 +612,7 @@ def show(self, *args, **kwargs):
>>> results = model("path/to/image.jpg")
>>> results[0].show() # Display the first result
>>> for result in results:
- ... result.show() # Display all results
+ >>> result.show() # Display all results
"""
self.plot(show=True, *args, **kwargs)
@@ -629,10 +632,10 @@ def save(self, filename=None, *args, **kwargs):
Examples:
>>> results = model("path/to/image.jpg")
>>> for result in results:
- ... result.save("annotated_image.jpg")
+ >>> result.save("annotated_image.jpg")
>>> # Or with custom plot arguments
>>> for result in results:
- ... result.save("annotated_image.jpg", conf=False, line_width=2)
+ >>> result.save("annotated_image.jpg", conf=False, line_width=2)
"""
if not filename:
filename = f"results_{Path(self.path).name}"
@@ -653,7 +656,7 @@ def verbose(self):
Examples:
>>> results = model("path/to/image.jpg")
>>> for result in results:
- ... print(result.verbose())
+ >>> print(result.verbose())
2 persons, 1 car, 3 traffic lights,
dog 0.92, cat 0.78, horse 0.64,
@@ -664,15 +667,13 @@ def verbose(self):
"""
log_string = ""
probs = self.probs
- boxes = self.boxes
- regress = self.regress
if len(self) == 0:
return log_string if probs is not None else f"{log_string}(no detections), "
if probs is not None:
log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
- if regress is not None:
+ if regress := self.regress:
log_string += f"{', '.join(f'{self.names[j]} {regress.data[j]:.2f}' for j in regress.value)}, "
- if boxes:
+ if boxes := self.boxes:
for c in boxes.cls.unique():
n = (boxes.cls == c).sum() # detections per class
log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
@@ -691,10 +692,10 @@ def save_txt(self, txt_file, save_conf=False):
Examples:
>>> from ultralytics import YOLO
- >>> model = YOLO("yolov8n.pt")
+ >>> model = YOLO("yolo11n.pt")
>>> results = model("path/to/image.jpg")
>>> for result in results:
- ... result.save_txt("output.txt")
+ >>> result.save_txt("output.txt")
Notes:
- The file will contain one line per detection or classification with the following structure:
@@ -757,7 +758,7 @@ def save_crop(self, save_dir, file_name=Path("im.jpg")):
Examples:
>>> results = model("path/to/image.jpg")
>>> for result in results:
- ... result.save_crop(save_dir="path/to/crops", file_name="detection")
+ >>> result.save_crop(save_dir="path/to/crops", file_name="detection")
"""
if self.probs is not None:
LOGGER.warning("WARNING โ ๏ธ Classify task do not support `save_crop`.")
@@ -772,7 +773,7 @@ def save_crop(self, save_dir, file_name=Path("im.jpg")):
save_one_box(
d.xyxy,
self.orig_img.copy(),
- file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
+ file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"),
BGR=True,
)
@@ -796,8 +797,9 @@ def summary(self, normalize=False, decimals=5):
Examples:
>>> results = model("image.jpg")
- >>> summary = results[0].summary()
- >>> print(summary)
+ >>> for result in results:
+ >>> summary = result.summary()
+ >>> print(summary)
"""
# Create list of detection dictionaries
results = []
@@ -859,10 +861,11 @@ def to_df(self, normalize=False, decimals=5):
Examples:
>>> results = model("path/to/image.jpg")
- >>> df_result = results[0].to_df()
- >>> print(df_result)
+ >>> for result in results:
+ >>> df_result = result.to_df()
+ >>> print(df_result)
"""
- import pandas as pd
+ import pandas as pd # scope for faster 'import ultralytics'
return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals))
@@ -887,8 +890,9 @@ def to_csv(self, normalize=False, decimals=5, *args, **kwargs):
Examples:
>>> results = model("path/to/image.jpg")
- >>> csv_result = results[0].to_csv()
- >>> print(csv_result)
+ >>> for result in results:
+ >>> csv_result = result.to_csv()
+ >>> print(csv_result)
"""
return self.to_df(normalize=normalize, decimals=decimals).to_csv(*args, **kwargs)
@@ -912,8 +916,9 @@ def to_xml(self, normalize=False, decimals=5, *args, **kwargs):
Examples:
>>> results = model("path/to/image.jpg")
- >>> xml_result = results[0].to_xml()
- >>> print(xml_result)
+ >>> for result in results:
+ >>> xml_result = result.to_xml()
+ >>> print(xml_result)
"""
check_requirements("lxml")
df = self.to_df(normalize=normalize, decimals=decimals)
@@ -942,8 +947,9 @@ def to_json(self, normalize=False, decimals=5):
Examples:
>>> results = model("path/to/image.jpg")
- >>> json_result = results[0].to_json()
- >>> print(json_result)
+ >>> for result in results:
+ >>> json_result = result.to_json()
+ >>> print(json_result)
Notes:
- For classification tasks, the JSON will contain class probabilities instead of bounding boxes.
@@ -957,6 +963,75 @@ def to_json(self, normalize=False, decimals=5):
return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
+ def to_sql(self, table_name="results", normalize=False, decimals=5, db_path="results.db"):
+ """
+ Converts detection results to an SQL-compatible format.
+
+ This method serializes the detection results into a format compatible with SQL databases.
+ It includes information about detected objects such as bounding boxes, class names, confidence scores,
+ and optionally segmentation masks, keypoints or oriented bounding boxes.
+
+ Args:
+ table_name (str): Name of the SQL table where the data will be inserted. Defaults to "detection_results".
+ normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
+ If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
+ decimals (int): Number of decimal places to round the bounding boxes values to. Defaults to 5.
+ db_path (str): Path to the SQLite database file. Defaults to "results.db".
+
+ Examples:
+ >>> results = model("path/to/image.jpg")
+ >>> for result in results:
+ >>> result.to_sql()
+ """
+ import json
+ import sqlite3
+
+ # Convert results to a list of dictionaries
+ data = self.summary(normalize=normalize, decimals=decimals)
+ if not data:
+ LOGGER.warning("โ ๏ธ No results to save to SQL. Results dict is empty")
+ return
+
+ # Connect to the SQLite database
+ conn = sqlite3.connect(db_path)
+ cursor = conn.cursor()
+
+ # Create table if it doesn't exist
+ columns = (
+ "id INTEGER PRIMARY KEY AUTOINCREMENT, class_name TEXT, confidence REAL, "
+ "box TEXT, masks TEXT, kpts TEXT, obb TEXT"
+ )
+ cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})")
+
+ # Insert data into the table
+ for i, item in enumerate(data):
+ detect, obb = None, None # necessary to reinit these variables inside for loop to avoid duplication
+ class_name = item.get("name")
+ box = item.get("box", {})
+ # Serialize the box as JSON for 'detect' and 'obb' based on key presence
+ if all(key in box for key in ["x1", "y1", "x2", "y2"]) and not any(key in box for key in ["x3", "x4"]):
+ detect = json.dumps(box)
+ if all(key in box for key in ["x1", "y1", "x2", "y2", "x3", "x4"]):
+ obb = json.dumps(box)
+
+ cursor.execute(
+ f"INSERT INTO {table_name} (class_name, confidence, box, masks, kpts, obb) VALUES (?, ?, ?, ?, ?, ?)",
+ (
+ class_name,
+ item.get("confidence"),
+ detect,
+ json.dumps(item.get("segments", {}).get("x", [])),
+ json.dumps(item.get("keypoints", {}).get("x", [])),
+ obb,
+ ),
+ )
+
+ # Commit and close the connection
+ conn.commit()
+ conn.close()
+
+ LOGGER.info(f"โ Detection results successfully written to SQL table '{table_name}' in database '{db_path}'.")
+
class Boxes(BaseTensor):
"""
@@ -1741,7 +1816,7 @@ def xyxy(self):
Examples:
>>> import torch
>>> from ultralytics import YOLO
- >>> model = YOLO("yolov8n-obb.pt")
+ >>> model = YOLO("yolo11n-obb.pt")
>>> results = model("path/to/image.jpg")
>>> for result in results:
... obb = result.obb
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 51611f7c65e..c47409118c9 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -1,9 +1,9 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Train a model on a dataset.
Usage:
- $ yolo mode=train model=yolov8n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16
+ $ yolo mode=train model=yolo11n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16
"""
import gc
@@ -12,7 +12,7 @@
import subprocess
import time
import warnings
-from copy import deepcopy
+from copy import copy, deepcopy
from datetime import datetime, timedelta
from pathlib import Path
@@ -119,7 +119,7 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
self.save_period = self.args.save_period
self.batch_size = self.args.batch
- self.epochs = self.args.epochs
+ self.epochs = self.args.epochs or 100 # in case users accidentally pass epochs=None with timed training
self.start_epoch = 0
if RANK == -1:
print_args(vars(self.args))
@@ -129,7 +129,7 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading
# Model and Dataset
- self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolov8n -> yolov8n.pt
+ self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolo11n -> yolo11n.pt
with torch_distributed_zero_first(LOCAL_RANK): # avoid auto-downloading dataset multiple times
self.trainset, self.testset = self.get_dataset()
self.ema = None
@@ -197,7 +197,7 @@ def train(self):
# Command
cmd, file = generate_ddp_command(world_size, self)
try:
- LOGGER.info(f'{colorstr("DDP:")} debug command {" ".join(cmd)}')
+ LOGGER.info(f"{colorstr('DDP:')} debug command {' '.join(cmd)}")
subprocess.run(cmd, check=True)
except Exception as e:
raise e
@@ -280,12 +280,7 @@ def _setup_train(self, world_size):
# Batch size
if self.batch_size < 1 and RANK == -1: # single-GPU only, estimate best batch size
- self.args.batch = self.batch_size = check_train_batch_size(
- model=self.model,
- imgsz=self.args.imgsz,
- amp=self.amp,
- batch=self.batch_size,
- )
+ self.args.batch = self.batch_size = self.auto_batch()
# Dataloaders
batch_size = self.batch_size // max(world_size, 1)
@@ -335,10 +330,10 @@ def _do_train(self, world_size=1):
self.train_time_start = time.time()
self.run_callbacks("on_train_start")
LOGGER.info(
- f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n'
- f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
+ f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n"
+ f"Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n"
f"Logging results to {colorstr('bold', self.save_dir)}\n"
- f'Starting training for ' + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...")
+ f"Starting training for " + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...")
)
if self.args.close_mosaic:
base_idx = (self.epochs - self.args.close_mosaic) * nb
@@ -470,10 +465,8 @@ def _do_train(self, world_size=1):
if RANK in {-1, 0}:
# Do final val with best.pt
- LOGGER.info(
- f"\n{epoch - self.start_epoch + 1} epochs completed in "
- f"{(time.time() - self.train_time_start) / 3600:.3f} hours."
- )
+ seconds = time.time() - self.train_time_start
+ LOGGER.info(f"\n{epoch - self.start_epoch + 1} epochs completed in {seconds / 3600:.3f} hours.")
self.final_eval()
if self.args.plots:
self.plot_metrics()
@@ -481,6 +474,16 @@ def _do_train(self, world_size=1):
self._clear_memory()
self.run_callbacks("teardown")
+ def auto_batch(self, max_num_obj=0):
+ """Get batch size by calculating memory occupation of model."""
+ return check_train_batch_size(
+ model=self.model,
+ imgsz=self.args.imgsz,
+ amp=self.amp,
+ batch=self.batch_size,
+ max_num_obj=max_num_obj,
+ ) # returns batch size
+
def _get_memory(self):
"""Get accelerator memory utilization in GB."""
if self.device.type == "mps":
@@ -505,7 +508,7 @@ def read_results_csv(self):
"""Read results.csv into a dict using pandas."""
import pandas as pd # scope for faster 'import ultralytics'
- return {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}
+ return pd.read_csv(self.csv).to_dict(orient="list")
def save_model(self):
"""Save model training checkpoints with additional metadata."""
@@ -657,10 +660,11 @@ def plot_training_labels(self):
def save_metrics(self, metrics):
"""Saves training metrics to a CSV file."""
keys, vals = list(metrics.keys()), list(metrics.values())
- n = len(metrics) + 1 # number of cols
- s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n") # header
+ n = len(metrics) + 2 # number of cols
+ s = "" if self.csv.exists() else (("%s," * n % tuple(["epoch", "time"] + keys)).rstrip(",") + "\n") # header
+ t = time.time() - self.train_time_start
with open(self.csv, "a") as f:
- f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n")
+ f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n")
def plot_metrics(self):
"""Plot and display metrics visually."""
@@ -752,7 +756,7 @@ def _close_dataloader_mosaic(self):
self.train_loader.dataset.mosaic = False
if hasattr(self.train_loader.dataset, "close_mosaic"):
LOGGER.info("Closing dataloader mosaic")
- self.train_loader.dataset.close_mosaic(hyp=self.args)
+ self.train_loader.dataset.close_mosaic(hyp=copy(self.args))
def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
"""
@@ -780,7 +784,7 @@ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5
f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
f"determining best 'optimizer', 'lr0' and 'momentum' automatically... "
)
- nc = getattr(model, "nc", 10) # number of classes
+ nc = self.data.get("nc", 10) # number of classes
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
@@ -795,6 +799,8 @@ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5
else: # weight (with decay)
g[0].append(param)
+ optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
+ name = {x.lower(): x for x in optimizers}.get(name.lower())
if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
elif name == "RMSProp":
@@ -803,15 +809,14 @@ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5
optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
else:
raise NotImplementedError(
- f"Optimizer '{name}' not found in list of available optimizers "
- f"[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto]."
- "To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics."
+ f"Optimizer '{name}' not found in list of available optimizers {optimizers}. "
+ "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
)
optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay
optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights)
LOGGER.info(
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
- f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)'
+ f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)"
)
return optimizer
diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py
index 2f42eb603df..6cc5eb273a2 100644
--- a/ultralytics/engine/tuner.py
+++ b/ultralytics/engine/tuner.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection, instance
segmentation, image classification, pose estimation, and multi-object tracking.
@@ -8,11 +8,11 @@
where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
Example:
- Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+ Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
```
"""
@@ -50,11 +50,11 @@ class Tuner:
Executes the hyperparameter evolution across multiple iterations.
Example:
- Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+ Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
```
@@ -62,7 +62,7 @@ class Tuner:
```python
from ultralytics import YOLO
- model = YOLO("yolov8n.pt")
+ model = YOLO("yolo11n.pt")
model.tune(space={key1: val1, key2: val2}) # custom search space dictionary
```
"""
@@ -101,7 +101,8 @@ def __init__(self, args=DEFAULT_CFG, _callbacks=None):
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
}
self.args = get_cfg(overrides=args)
- self.tune_dir = get_save_dir(self.args, name="tune")
+ self.tune_dir = get_save_dir(self.args, name=self.args.name or "tune")
+ self.args.name = None # reset to not affect training directory
self.tune_csv = self.tune_dir / "tune_results.csv"
self.callbacks = _callbacks or callbacks.get_default_callbacks()
self.prefix = colorstr("Tuner: ")
@@ -140,7 +141,7 @@ def _mutate(self, parent="single", n=5, mutation=0.8, sigma=0.2):
# Mutate
r = np.random # method
r.seed(int(time.time()))
- g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()]) # gains 0-1
+ g = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
ng = len(self.space)
v = np.ones(ng)
while all(v == 1): # mutate until a change occurs (prevent duplicates)
@@ -191,7 +192,7 @@ def __call__(self, model=None, iterations=10, cleanup=True):
try:
# Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
cmd = ["yolo", "train", *(f"{k}={v}" for k, v in train_args.items())]
- return_code = subprocess.run(cmd, check=True).returncode
+ return_code = subprocess.run(" ".join(cmd), check=True, shell=True).returncode
ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
metrics = torch.load(ckpt_file)["train_metrics"]
assert return_code == 0, "training failed"
@@ -224,12 +225,12 @@ def __call__(self, model=None, iterations=10, cleanup=True):
# Save and print tune results
header = (
- f'{self.prefix}{i + 1}/{iterations} iterations complete โ ({time.time() - t0:.2f}s)\n'
- f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n'
- f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n'
- f'{self.prefix}Best fitness metrics are {best_metrics}\n'
- f'{self.prefix}Best fitness model is {best_save_dir}\n'
- f'{self.prefix}Best fitness hyperparameters are printed below.\n'
+ f"{self.prefix}{i + 1}/{iterations} iterations complete โ ({time.time() - t0:.2f}s)\n"
+ f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n"
+ f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n"
+ f"{self.prefix}Best fitness metrics are {best_metrics}\n"
+ f"{self.prefix}Best fitness model is {best_save_dir}\n"
+ f"{self.prefix}Best fitness hyperparameters are printed below.\n"
)
LOGGER.info("\n" + header)
data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py
index 6221f15b090..dad8b49d238 100644
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@@ -1,23 +1,26 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Check a model's accuracy on a test or val split of a dataset.
Usage:
- $ yolo mode=val model=yolov8n.pt data=coco8.yaml imgsz=640
+ $ yolo mode=val model=yolo11n.pt data=coco8.yaml imgsz=640
Usage - formats:
- $ yolo mode=val model=yolov8n.pt # PyTorch
- yolov8n.torchscript # TorchScript
- yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
- yolov8n_openvino_model # OpenVINO
- yolov8n.engine # TensorRT
- yolov8n.mlpackage # CoreML (macOS-only)
- yolov8n_saved_model # TensorFlow SavedModel
- yolov8n.pb # TensorFlow GraphDef
- yolov8n.tflite # TensorFlow Lite
- yolov8n_edgetpu.tflite # TensorFlow Edge TPU
- yolov8n_paddle_model # PaddlePaddle
- yolov8n_ncnn_model # NCNN
+ $ yolo mode=val model=yolo11n.pt # PyTorch
+ yolo11n.torchscript # TorchScript
+ yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
+ yolo11n_openvino_model # OpenVINO
+ yolo11n.engine # TensorRT
+ yolo11n.mlpackage # CoreML (macOS-only)
+ yolo11n_saved_model # TensorFlow SavedModel
+ yolo11n.pb # TensorFlow GraphDef
+ yolo11n.tflite # TensorFlow Lite
+ yolo11n_edgetpu.tflite # TensorFlow Edge TPU
+ yolo11n_paddle_model # PaddlePaddle
+ yolo11n.mnn # MNN
+ yolo11n_ncnn_model # NCNN
+ yolo11n_imx_model # Sony IMX
+ yolo11n_rknn_model # Rockchip RKNN
"""
import json
@@ -121,6 +124,8 @@ def __call__(self, trainer=None, model=None):
self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
model.eval()
else:
+ if str(self.args.model).endswith(".yaml") and model is None:
+ LOGGER.warning("WARNING โ ๏ธ validating an untrained model YAML will result in 0 mAP.")
callbacks.add_integration_callbacks(self)
model = AutoBackend(
weights=model or self.args.model,
@@ -246,7 +251,7 @@ def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
cost_matrix = iou * (iou >= threshold)
if cost_matrix.any():
- labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True)
+ labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix)
valid = cost_matrix[labels_idx, detections_idx] > 0
if valid.any():
correct[detections_idx[valid], i] = True
diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py
index 9c9c9dfa16a..74c0dfeda7b 100644
--- a/ultralytics/hub/__init__.py
+++ b/ultralytics/hub/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import requests
@@ -63,13 +63,13 @@ def login(api_key: str = None, save=True) -> bool:
return True
else:
# Failed to authenticate with HUB
- LOGGER.info(f"{PREFIX}Get API key from {api_key_url} and then run 'yolo hub login API_KEY'")
+ LOGGER.info(f"{PREFIX}Get API key from {api_key_url} and then run 'yolo login API_KEY'")
return False
def logout():
"""
- Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo hub login'.
+ Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo login'.
Example:
```python
@@ -79,7 +79,7 @@ def logout():
```
"""
SETTINGS["api_key"] = ""
- LOGGER.info(f"{PREFIX}logged out โ . To log in again, use 'yolo hub login'.")
+ LOGGER.info(f"{PREFIX}logged out โ . To log in again, use 'yolo login'.")
def reset_model(model_id=""):
diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py
index 3c7c6d3d25d..2e62739f31c 100644
--- a/ultralytics/hub/auth.py
+++ b/ultralytics/hub/auth.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import requests
@@ -68,7 +68,7 @@ def __init__(self, api_key="", verbose=False):
if verbose:
LOGGER.info(f"{PREFIX}New authentication successful โ ")
elif verbose:
- LOGGER.info(f"{PREFIX}Get API key from {API_KEY_URL} and then run 'yolo hub login API_KEY'")
+ LOGGER.info(f"{PREFIX}Get API key from {API_KEY_URL} and then run 'yolo login API_KEY'")
def request_api_key(self, max_attempts=3):
"""
diff --git a/ultralytics/hub/google/__init__.py b/ultralytics/hub/google/__init__.py
index 9090297a713..0acd2dd26da 100644
--- a/ultralytics/hub/google/__init__.py
+++ b/ultralytics/hub/google/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import concurrent.futures
import statistics
diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py
index 89b5ddfc1e9..37fba131359 100644
--- a/ultralytics/hub/session.py
+++ b/ultralytics/hub/session.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import shutil
import threading
diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py
index 2fc956fb348..5f837588947 100644
--- a/ultralytics/hub/utils.py
+++ b/ultralytics/hub/utils.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import os
import platform
@@ -170,7 +170,7 @@ def func(func_method, func_url, **func_kwargs):
class Events:
"""
A class for collecting anonymous event analytics. Event analytics are enabled when sync=True in settings and
- disabled when sync=False. Run 'yolo settings' to see and update settings YAML file.
+ disabled when sync=False. Run 'yolo settings' to see and update settings.
Attributes:
url (str): The URL to send anonymous events.
diff --git a/ultralytics/models/__init__.py b/ultralytics/models/__init__.py
index aff620a9a92..ead1e923041 100644
--- a/ultralytics/models/__init__.py
+++ b/ultralytics/models/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .fastsam import FastSAM
from .nas import NAS
diff --git a/ultralytics/models/fastsam/__init__.py b/ultralytics/models/fastsam/__init__.py
index 7be2ba1edfa..8c224ac8f9e 100644
--- a/ultralytics/models/fastsam/__init__.py
+++ b/ultralytics/models/fastsam/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .model import FastSAM
from .predict import FastSAMPredictor
diff --git a/ultralytics/models/fastsam/model.py b/ultralytics/models/fastsam/model.py
index 4e3f44f8378..f9deb7a12b9 100644
--- a/ultralytics/models/fastsam/model.py
+++ b/ultralytics/models/fastsam/model.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from pathlib import Path
diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py
index 9910237b0f3..0d019afb9e2 100644
--- a/ultralytics/models/fastsam/predict.py
+++ b/ultralytics/models/fastsam/predict.py
@@ -1,4 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
import torch
from PIL import Image
@@ -64,6 +65,9 @@ def prompt(self, results, bboxes=None, points=None, labels=None, texts=None):
if not isinstance(results, list):
results = [results]
for result in results:
+ if len(result) == 0:
+ prompt_results.append(result)
+ continue
masks = result.masks.data
if masks.shape[1:] != result.orig_shape:
masks = scale_masks(masks[None], result.orig_shape)[0]
@@ -84,9 +88,9 @@ def prompt(self, results, bboxes=None, points=None, labels=None, texts=None):
if labels is None:
labels = torch.ones(points.shape[0])
labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
- assert len(labels) == len(
- points
- ), f"Excepted `labels` got same size as `point`, but got {len(labels)} and {len(points)}"
+ assert len(labels) == len(points), (
+ f"Excepted `labels` got same size as `point`, but got {len(labels)} and {len(points)}"
+ )
point_idx = (
torch.ones(len(result), dtype=torch.bool, device=self.device)
if labels.sum() == 0 # all negative points
diff --git a/ultralytics/models/fastsam/utils.py b/ultralytics/models/fastsam/utils.py
index 0f41b1ad4cb..a8e1aa172ba 100644
--- a/ultralytics/models/fastsam/utils.py
+++ b/ultralytics/models/fastsam/utils.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
diff --git a/ultralytics/models/fastsam/val.py b/ultralytics/models/fastsam/val.py
index 9014b27a767..aa130dbfc9a 100644
--- a/ultralytics/models/fastsam/val.py
+++ b/ultralytics/models/fastsam/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.models.yolo.segment import SegmentationValidator
from ultralytics.utils.metrics import SegmentMetrics
diff --git a/ultralytics/models/nas/__init__.py b/ultralytics/models/nas/__init__.py
index b095a050bad..c36c0a42f03 100644
--- a/ultralytics/models/nas/__init__.py
+++ b/ultralytics/models/nas/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .model import NAS
from .predict import NASPredictor
diff --git a/ultralytics/models/nas/model.py b/ultralytics/models/nas/model.py
index bc0ccc5942d..10fd72b4e46 100644
--- a/ultralytics/models/nas/model.py
+++ b/ultralytics/models/nas/model.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
YOLO-NAS model interface.
diff --git a/ultralytics/models/nas/predict.py b/ultralytics/models/nas/predict.py
index 1978f395bc4..e140900e7ba 100644
--- a/ultralytics/models/nas/predict.py
+++ b/ultralytics/models/nas/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
diff --git a/ultralytics/models/nas/val.py b/ultralytics/models/nas/val.py
index c4fb73b699b..ca01e94e002 100644
--- a/ultralytics/models/nas/val.py
+++ b/ultralytics/models/nas/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
@@ -38,13 +38,7 @@ def postprocess(self, preds_in):
"""Apply Non-maximum suppression to prediction outputs."""
boxes = ops.xyxy2xywh(preds_in[0][0])
preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)
- return ops.non_max_suppression(
+ return super().postprocess(
preds,
- self.args.conf,
- self.args.iou,
- labels=self.lb,
- multi_label=False,
- agnostic=self.args.single_cls or self.args.agnostic_nms,
- max_det=self.args.max_det,
max_time_img=0.5,
)
diff --git a/ultralytics/models/rtdetr/__init__.py b/ultralytics/models/rtdetr/__init__.py
index 172c74b45cc..a6d038d652c 100644
--- a/ultralytics/models/rtdetr/__init__.py
+++ b/ultralytics/models/rtdetr/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .model import RTDETR
from .predict import RTDETRPredictor
diff --git a/ultralytics/models/rtdetr/model.py b/ultralytics/models/rtdetr/model.py
index 440df1798fe..fa4123a8a24 100644
--- a/ultralytics/models/rtdetr/model.py
+++ b/ultralytics/models/rtdetr/model.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time
performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient
diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py
index 492254356d9..782cc2f640d 100644
--- a/ultralytics/models/rtdetr/predict.py
+++ b/ultralytics/models/rtdetr/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py
index cb11a727948..2fc30f9f39a 100644
--- a/ultralytics/models/rtdetr/train.py
+++ b/ultralytics/models/rtdetr/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from copy import copy
@@ -68,8 +68,11 @@ def build_dataset(self, img_path, mode="val", batch=None):
hyp=self.args,
rect=False,
cache=self.args.cache or None,
+ single_cls=self.args.single_cls or False,
prefix=colorstr(f"{mode}: "),
+ classes=self.args.classes,
data=self.data,
+ fraction=self.args.fraction if mode == "train" else 1.0,
)
def get_validator(self):
diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py
index 9122750c27b..761d20dfb26 100644
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
diff --git a/ultralytics/models/sam/__init__.py b/ultralytics/models/sam/__init__.py
index a29f5cb3f3c..2d9de7b64ea 100644
--- a/ultralytics/models/sam/__init__.py
+++ b/ultralytics/models/sam/__init__.py
@@ -1,6 +1,6 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .model import SAM
-from .predict import Predictor, SAM2Predictor
+from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
-__all__ = "SAM", "Predictor", "SAM2Predictor" # tuple or list
+__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor" # tuple or list
diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py
index 55db3e011cb..4abce4cd7db 100644
--- a/ultralytics/models/sam/amg.py
+++ b/ultralytics/models/sam/amg.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import math
from itertools import product
@@ -76,7 +76,7 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
def generate_crop_boxes(
im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
) -> Tuple[List[List[int]], List[int]]:
- """Generates crop boxes of varying sizes for multi-scale image processing, with layered overlapping regions."""
+ """Generates crop boxes of varying sizes for multiscale image processing, with layered overlapping regions."""
crop_boxes, layer_idxs = [], []
im_h, im_w = im_size
short_side = min(im_h, im_w)
diff --git a/ultralytics/models/sam/build.py b/ultralytics/models/sam/build.py
index 0e7ddedcf0f..47c9d5a345b 100644
--- a/ultralytics/models/sam/build.py
+++ b/ultralytics/models/sam/build.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
@@ -210,8 +210,6 @@ def _build_sam(
state_dict = torch.load(f)
sam.load_state_dict(state_dict)
sam.eval()
- # sam.load_state_dict(torch.load(checkpoint), strict=True)
- # sam.eval()
return sam
@@ -265,6 +263,7 @@ def _build_sam2(
memory_attention = MemoryAttention(d_model=256, pos_enc_at_input=True, num_layers=4, layer=MemoryAttentionLayer())
memory_encoder = MemoryEncoder(out_dim=64)
+ is_sam2_1 = checkpoint is not None and "sam2.1" in checkpoint
sam2 = SAM2Model(
image_encoder=image_encoder,
memory_attention=memory_attention,
@@ -290,6 +289,9 @@ def _build_sam2(
multimask_max_pt_num=1,
use_mlp_for_obj_ptr_proj=True,
compile_image_encoder=False,
+ no_obj_embed_spatial=is_sam2_1,
+ proj_tpos_enc_in_obj_ptrs=is_sam2_1,
+ use_signed_tpos_enc_to_obj_ptrs=is_sam2_1,
sam_mask_decoder_extra_args=dict(
dynamic_multimask_via_stability=True,
dynamic_multimask_stability_delta=0.05,
@@ -315,6 +317,10 @@ def _build_sam2(
"sam2_s.pt": build_sam2_s,
"sam2_b.pt": build_sam2_b,
"sam2_l.pt": build_sam2_l,
+ "sam2.1_t.pt": build_sam2_t,
+ "sam2.1_s.pt": build_sam2_s,
+ "sam2.1_b.pt": build_sam2_b,
+ "sam2.1_l.pt": build_sam2_l,
}
diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py
index e685dc4e4fd..d9fb501b795 100644
--- a/ultralytics/models/sam/model.py
+++ b/ultralytics/models/sam/model.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
SAM model interface.
@@ -148,7 +148,7 @@ def info(self, detailed=False, verbose=True):
verbose (bool): If True, prints the information to the console.
Returns:
- (Tuple): A tuple containing the model's information (string representations of the model).
+ (tuple): A tuple containing the model's information (string representations of the model).
Examples:
>>> sam = SAM("sam_b.pt")
diff --git a/ultralytics/models/sam/modules/__init__.py b/ultralytics/models/sam/modules/__init__.py
index 9e68dc12245..77a19dcf0f8 100644
--- a/ultralytics/models/sam/modules/__init__.py
+++ b/ultralytics/models/sam/modules/__init__.py
@@ -1 +1 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
diff --git a/ultralytics/models/sam/modules/blocks.py b/ultralytics/models/sam/modules/blocks.py
index 026443c69fa..9abcc4406e2 100644
--- a/ultralytics/models/sam/modules/blocks.py
+++ b/ultralytics/models/sam/modules/blocks.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import copy
import math
@@ -502,11 +502,11 @@ def do_pool(x: torch.Tensor, pool: nn.Module, norm: nn.Module = None) -> torch.T
class MultiScaleAttention(nn.Module):
"""
- Implements multi-scale self-attention with optional query pooling for efficient feature extraction.
+ Implements multiscale self-attention with optional query pooling for efficient feature extraction.
- This class provides a flexible implementation of multi-scale attention, allowing for optional
+ This class provides a flexible implementation of multiscale attention, allowing for optional
downsampling of query features through pooling. It's designed to enhance the model's ability to
- capture multi-scale information in visual tasks.
+ capture multiscale information in visual tasks.
Attributes:
dim (int): Input dimension of the feature map.
@@ -518,7 +518,7 @@ class MultiScaleAttention(nn.Module):
proj (nn.Linear): Output projection.
Methods:
- forward: Applies multi-scale attention to the input tensor.
+ forward: Applies multiscale attention to the input tensor.
Examples:
>>> import torch
@@ -537,7 +537,7 @@ def __init__(
num_heads: int,
q_pool: nn.Module = None,
):
- """Initializes multi-scale attention with optional query pooling for efficient feature extraction."""
+ """Initializes multiscale attention with optional query pooling for efficient feature extraction."""
super().__init__()
self.dim = dim
@@ -552,7 +552,7 @@ def __init__(
self.proj = nn.Linear(dim_out, dim_out)
def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Applies multi-scale attention with optional query pooling to extract multi-scale features."""
+ """Applies multiscale attention with optional query pooling to extract multiscale features."""
B, H, W, _ = x.shape
# qkv with shape (B, H * W, 3, nHead, C)
qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1)
@@ -582,9 +582,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
class MultiScaleBlock(nn.Module):
"""
- A multi-scale attention block with window partitioning and query pooling for efficient vision transformers.
+ A multiscale attention block with window partitioning and query pooling for efficient vision transformers.
- This class implements a multi-scale attention mechanism with optional window partitioning and downsampling,
+ This class implements a multiscale attention mechanism with optional window partitioning and downsampling,
designed for use in vision transformer architectures.
Attributes:
@@ -601,7 +601,7 @@ class MultiScaleBlock(nn.Module):
proj (nn.Linear | None): Projection layer for dimension mismatch.
Methods:
- forward: Processes input tensor through the multi-scale block.
+ forward: Processes input tensor through the multiscale block.
Examples:
>>> block = MultiScaleBlock(dim=256, dim_out=512, num_heads=8, window_size=7)
@@ -623,7 +623,7 @@ def __init__(
act_layer: nn.Module = nn.GELU,
window_size: int = 0,
):
- """Initializes a multi-scale attention block with window partitioning and optional query pooling."""
+ """Initializes a multiscale attention block with window partitioning and optional query pooling."""
super().__init__()
if isinstance(norm_layer, str):
@@ -660,7 +660,7 @@ def __init__(
self.proj = nn.Linear(dim, dim_out)
def forward(self, x: torch.Tensor) -> torch.Tensor:
- """Processes input through multi-scale attention and MLP, with optional windowing and downsampling."""
+ """Processes input through multiscale attention and MLP, with optional windowing and downsampling."""
shortcut = x # B, H, W, C
x = self.norm1(x)
diff --git a/ultralytics/models/sam/modules/decoders.py b/ultralytics/models/sam/modules/decoders.py
index 7c27ca176b5..ee9497f6c67 100644
--- a/ultralytics/models/sam/modules/decoders.py
+++ b/ultralytics/models/sam/modules/decoders.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from typing import List, Optional, Tuple, Type
diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py
index 7fa7b405d33..a6e9fae887a 100644
--- a/ultralytics/models/sam/modules/encoders.py
+++ b/ultralytics/models/sam/modules/encoders.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from typing import List, Optional, Tuple, Type
@@ -479,9 +479,9 @@ def __init__(
self.trunk = trunk
self.neck = neck
self.scalp = scalp
- assert (
- self.trunk.channel_list == self.neck.backbone_channel_list
- ), f"Channel dims of trunk {self.trunk.channel_list} and neck {self.neck.backbone_channel_list} do not match."
+ assert self.trunk.channel_list == self.neck.backbone_channel_list, (
+ f"Channel dims of trunk {self.trunk.channel_list} and neck {self.neck.backbone_channel_list} do not match."
+ )
def forward(self, sample: torch.Tensor):
"""Encodes input through patch embedding, positional embedding, transformer blocks, and neck module."""
diff --git a/ultralytics/models/sam/modules/memory_attention.py b/ultralytics/models/sam/modules/memory_attention.py
index b55b0730274..14998f37a92 100644
--- a/ultralytics/models/sam/modules/memory_attention.py
+++ b/ultralytics/models/sam/modules/memory_attention.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import copy
from typing import Optional
diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py
index 2728b0b4818..8f5c5b77466 100644
--- a/ultralytics/models/sam/modules/sam.py
+++ b/ultralytics/models/sam/modules/sam.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
@@ -36,8 +36,6 @@ class SAMModel(nn.Module):
image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
- pixel_mean (torch.Tensor): Mean pixel values for image normalization, shape (3, 1, 1).
- pixel_std (torch.Tensor): Standard deviation values for image normalization, shape (3, 1, 1).
Methods:
__init__: Initializes the SAMModel with encoders, decoder, and normalization parameters.
@@ -161,18 +159,19 @@ def __init__(
use_multimask_token_for_obj_ptr: bool = False,
iou_prediction_use_sigmoid=False,
memory_temporal_stride_for_eval=1,
- add_all_frames_to_correct_as_cond=False,
non_overlap_masks_for_mem_enc=False,
use_obj_ptrs_in_encoder=False,
max_obj_ptrs_in_encoder=16,
add_tpos_enc_to_obj_ptrs=True,
proj_tpos_enc_in_obj_ptrs=False,
+ use_signed_tpos_enc_to_obj_ptrs=False,
only_obj_ptrs_in_the_past_for_eval=False,
pred_obj_scores: bool = False,
pred_obj_scores_mlp: bool = False,
fixed_no_obj_ptr: bool = False,
soft_no_obj_ptr: bool = False,
use_mlp_for_obj_ptr_proj: bool = False,
+ no_obj_embed_spatial: bool = False,
sam_mask_decoder_extra_args=None,
compile_image_encoder: bool = False,
):
@@ -205,8 +204,6 @@ def __init__(
use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers.
iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1].
memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation.
- add_all_frames_to_correct_as_cond (bool): Whether to append frames with correction clicks to conditioning
- frame list.
non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in
memory encoder during evaluation.
use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder.
@@ -216,6 +213,9 @@ def __init__(
the encoder.
proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
encoding in object pointers.
+ use_signed_tpos_enc_to_obj_ptrs (bool): whether to use signed distance (instead of unsigned absolute distance)
+ in the temporal positional encoding in the object pointers, only relevant when both `use_obj_ptrs_in_encoder=True`
+ and `add_tpos_enc_to_obj_ptrs=True`.
only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past
during evaluation.
pred_obj_scores (bool): Whether to predict if there is an object in the frame.
@@ -223,6 +223,7 @@ def __init__(
fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present.
soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation.
use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
+ no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames.
sam_mask_decoder_extra_args (Dict | None): Extra arguments for constructing the SAM mask decoder.
compile_image_encoder (bool): Whether to compile the image encoder for faster inference.
@@ -253,6 +254,7 @@ def __init__(
if proj_tpos_enc_in_obj_ptrs:
assert add_tpos_enc_to_obj_ptrs # these options need to be used together
self.proj_tpos_enc_in_obj_ptrs = proj_tpos_enc_in_obj_ptrs
+ self.use_signed_tpos_enc_to_obj_ptrs = use_signed_tpos_enc_to_obj_ptrs
self.only_obj_ptrs_in_the_past_for_eval = only_obj_ptrs_in_the_past_for_eval
# Part 2: memory attention to condition current frame's visual features
@@ -309,9 +311,12 @@ def __init__(
self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim))
trunc_normal_(self.no_obj_ptr, std=0.02)
self.use_mlp_for_obj_ptr_proj = use_mlp_for_obj_ptr_proj
+ self.no_obj_embed_spatial = None
+ if no_obj_embed_spatial:
+ self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim))
+ trunc_normal_(self.no_obj_embed_spatial, std=0.02)
self._build_sam_heads()
- self.add_all_frames_to_correct_as_cond = add_all_frames_to_correct_as_cond
self.max_cond_frames_in_attn = max_cond_frames_in_attn
# Model compilation
@@ -342,8 +347,7 @@ def _build_sam_heads(self):
self.sam_prompt_embed_dim = self.hidden_dim
self.sam_image_embedding_size = self.image_size // self.backbone_stride
- # build PromptEncoder and MaskDecoder from SAM
- # (their hyperparameters like `mask_in_chans=16` are from SAM code)
+ # Build PromptEncoder and MaskDecoder from SAM (hyperparameters like `mask_in_chans=16` are from SAM code)
self.sam_prompt_encoder = PromptEncoder(
embed_dim=self.sam_prompt_embed_dim,
image_embedding_size=(
@@ -418,12 +422,11 @@ def _forward_sam_heads(
low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
ious: Tensor of shape (B, M) with estimated IoU for each output mask.
- low_res_masks: Tensor of shape (B, 1, H*4, W*4) with best low-resolution mask.
- high_res_masks: Tensor of shape (B, 1, H*16, W*16) with best high-resolution mask.
+ low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
+ high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
- object_score_logits: Tensor of shape (B,) with object score logits.
-
- Where M is 3 if multimask_output=True, and 1 if multimask_output=False.
+ object_score_logits: Tensor of shape (B) with object score logits.
+ Where M is 3 if multimask_output=True, and 1 if multimask_output=False.
Examples:
>>> backbone_features = torch.rand(1, 256, 32, 32)
@@ -481,12 +484,7 @@ def _forward_sam_heads(
boxes=None,
masks=sam_mask_prompt,
)
- (
- low_res_multimasks,
- ious,
- sam_output_tokens,
- object_score_logits,
- ) = self.sam_mask_decoder(
+ low_res_multimasks, ious, sam_output_tokens, object_score_logits = self.sam_mask_decoder(
image_embeddings=backbone_features,
image_pe=self.sam_prompt_encoder.get_dense_pe(),
sparse_prompt_embeddings=sparse_embeddings,
@@ -498,13 +496,8 @@ def _forward_sam_heads(
if self.pred_obj_scores:
is_obj_appearing = object_score_logits > 0
- # Mask used for spatial memories is always a *hard* choice between obj and no obj,
- # consistent with the actual mask prediction
- low_res_multimasks = torch.where(
- is_obj_appearing[:, None, None],
- low_res_multimasks,
- NO_OBJ_SCORE,
- )
+ # Spatial memory mask is a *hard* choice between obj and no obj, consistent with actual mask prediction
+ low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, NO_OBJ_SCORE)
# convert masks from possibly bfloat16 (or float16) to float32
# (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
@@ -533,8 +526,6 @@ def _forward_sam_heads(
if self.pred_obj_scores:
# Allow *soft* no obj ptr, unlike for masks
if self.soft_no_obj_ptr:
- # Only hard possible with gt
- assert not self.teacher_force_obj_scores_for_mem
lambda_is_obj_appearing = object_score_logits.sigmoid()
else:
lambda_is_obj_appearing = is_obj_appearing.float()
@@ -612,7 +603,6 @@ def forward_image(self, img_batch: torch.Tensor):
def _prepare_backbone_features(self, backbone_out):
"""Prepares and flattens visual features from the image backbone output for further processing."""
- backbone_out = backbone_out.copy()
assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
@@ -647,11 +637,12 @@ def _prepare_memory_conditioned_features(
if self.num_maskmem == 0: # Disable memory and skip fusion
return current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W)
num_obj_ptr_tokens = 0
+ tpos_sign_mul = -1 if track_in_reverse else 1
# Step 1: condition the visual features of the current frame on previous memories
if not is_init_cond_frame:
# Retrieve the memories encoded with the maskmem backbone
to_cat_memory, to_cat_memory_pos_embed = [], []
- # Add conditioning frames's output first (all cond frames have t_pos=0 for
+ # Add conditioning frame's output first (all cond frames have t_pos=0 for
# when getting temporal positional embedding below)
assert len(output_dict["cond_frame_outputs"]) > 0
# Select a maximum number of temporally closest cond frames for cross attention
@@ -664,7 +655,7 @@ def _prepare_memory_conditioned_features(
# the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1
# We also allow taking the memory frame non-consecutively (with r>1), in which case
# we take (self.num_maskmem - 2) frames among every r-th frames plus the last frame.
- r = self.memory_temporal_stride_for_eval
+ r = 1 if self.training else self.memory_temporal_stride_for_eval
for t_pos in range(1, self.num_maskmem):
t_rel = self.num_maskmem - t_pos # how many frames before current frame
if t_rel == 1:
@@ -693,11 +684,11 @@ def _prepare_memory_conditioned_features(
if prev is None:
continue # skip padding frames
# "maskmem_features" might have been offloaded to CPU in demo use cases,
- # so we load it back to GPU (it's a no-op if it's already on GPU).
- feats = prev["maskmem_features"].cuda(non_blocking=True)
+ # so we load it back to inference device (it's a no-op if it's already on device).
+ feats = prev["maskmem_features"].to(device=device, non_blocking=True)
to_cat_memory.append(feats.flatten(2).permute(2, 0, 1))
# Spatial positional encoding (it might have been offloaded to CPU in eval)
- maskmem_enc = prev["maskmem_pos_enc"][-1].cuda()
+ maskmem_enc = prev["maskmem_pos_enc"][-1].to(device=device)
maskmem_enc = maskmem_enc.flatten(2).permute(2, 0, 1)
# Temporal positional encoding
maskmem_enc = maskmem_enc + self.maskmem_tpos_enc[self.num_maskmem - t_pos - 1]
@@ -718,7 +709,14 @@ def _prepare_memory_conditioned_features(
ptr_cond_outputs = selected_cond_outputs
pos_and_ptrs = [
# Temporal pos encoding contains how far away each pointer is from current frame
- (abs(frame_idx - t), out["obj_ptr"])
+ (
+ (
+ (frame_idx - t) * tpos_sign_mul
+ if self.use_signed_tpos_enc_to_obj_ptrs
+ else abs(frame_idx - t)
+ ),
+ out["obj_ptr"],
+ )
for t, out in ptr_cond_outputs.items()
]
# Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame
@@ -787,6 +785,7 @@ def _encode_new_memory(
current_vision_feats,
feat_sizes,
pred_masks_high_res,
+ object_score_logits,
is_mask_from_pts,
):
"""Encodes frame features and masks into a new memory representation for video segmentation."""
@@ -812,17 +811,20 @@ def _encode_new_memory(
mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
if self.sigmoid_bias_for_mem_enc != 0.0:
mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
- maskmem_out = self.memory_encoder(
- pix_feat,
- mask_for_mem,
- skip_mask_sigmoid=True, # sigmoid already applied
- )
+ maskmem_out = self.memory_encoder(pix_feat, mask_for_mem, skip_mask_sigmoid=True) # sigmoid already applied
maskmem_features = maskmem_out["vision_features"]
maskmem_pos_enc = maskmem_out["vision_pos_enc"]
+ # add a no-object embedding to the spatial memory to indicate that the frame
+ # is predicted to be occluded (i.e. no object is appearing in the frame)
+ if self.no_obj_embed_spatial is not None:
+ is_obj_appearing = (object_score_logits > 0).float()
+ maskmem_features += (1 - is_obj_appearing[..., None, None]) * self.no_obj_embed_spatial[
+ ..., None, None
+ ].expand(*maskmem_features.shape)
return maskmem_features, maskmem_pos_enc
- def track_step(
+ def _track_step(
self,
frame_idx,
is_init_cond_frame,
@@ -833,15 +835,8 @@ def track_step(
mask_inputs,
output_dict,
num_frames,
- track_in_reverse=False, # tracking in reverse time order (for demo usage)
- # Whether to run the memory encoder on the predicted masks. Sometimes we might want
- # to skip the memory encoder with `run_mem_encoder=False`. For example,
- # in demo we might call `track_step` multiple times for each user click,
- # and only encode the memory when the user finalizes their clicks. And in ablation
- # settings like SAM training on static images, we don't need the memory encoder.
- run_mem_encoder=True,
- # The previously predicted SAM mask logits (which can be fed together with new clicks in demo).
- prev_sam_mask_logits=None,
+ track_in_reverse,
+ prev_sam_mask_logits,
):
"""Performs a single tracking step, updating object masks and memory features based on current frame inputs."""
current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs}
@@ -861,7 +856,7 @@ def track_step(
sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs)
else:
# fused the visual feature with previous memory features in the memory bank
- pix_feat_with_mem = self._prepare_memory_conditioned_features(
+ pix_feat = self._prepare_memory_conditioned_features(
frame_idx=frame_idx,
is_init_cond_frame=is_init_cond_frame,
current_vision_feats=current_vision_feats[-1:],
@@ -880,34 +875,34 @@ def track_step(
mask_inputs = prev_sam_mask_logits
multimask_output = self._use_multimask(is_init_cond_frame, point_inputs)
sam_outputs = self._forward_sam_heads(
- backbone_features=pix_feat_with_mem,
+ backbone_features=pix_feat,
point_inputs=point_inputs,
mask_inputs=mask_inputs,
high_res_features=high_res_features,
multimask_output=multimask_output,
)
- (
- _,
- _,
- _,
- low_res_masks,
- high_res_masks,
- obj_ptr,
- _,
- ) = sam_outputs
-
- current_out["pred_masks"] = low_res_masks
- current_out["pred_masks_high_res"] = high_res_masks
- current_out["obj_ptr"] = obj_ptr
+ return current_out, sam_outputs, high_res_features, pix_feat
- # Finally run the memory encoder on the predicted mask to encode
- # it into a new memory feature (that can be used in future frames)
+ def _encode_memory_in_output(
+ self,
+ current_vision_feats,
+ feat_sizes,
+ point_inputs,
+ run_mem_encoder,
+ high_res_masks,
+ object_score_logits,
+ current_out,
+ ):
+ """Finally run the memory encoder on the predicted mask to encode, it into a new memory feature (that can be
+ used in future frames).
+ """
if run_mem_encoder and self.num_maskmem > 0:
high_res_masks_for_mem_enc = high_res_masks
maskmem_features, maskmem_pos_enc = self._encode_new_memory(
current_vision_feats=current_vision_feats,
feat_sizes=feat_sizes,
pred_masks_high_res=high_res_masks_for_mem_enc,
+ object_score_logits=object_score_logits,
is_mask_from_pts=(point_inputs is not None),
)
current_out["maskmem_features"] = maskmem_features
@@ -916,6 +911,62 @@ def track_step(
current_out["maskmem_features"] = None
current_out["maskmem_pos_enc"] = None
+ def track_step(
+ self,
+ frame_idx,
+ is_init_cond_frame,
+ current_vision_feats,
+ current_vision_pos_embeds,
+ feat_sizes,
+ point_inputs,
+ mask_inputs,
+ output_dict,
+ num_frames,
+ track_in_reverse=False, # tracking in reverse time order (for demo usage)
+ # Whether to run the memory encoder on the predicted masks. Sometimes we might want
+ # to skip the memory encoder with `run_mem_encoder=False`. For example,
+ # in demo we might call `track_step` multiple times for each user click,
+ # and only encode the memory when the user finalizes their clicks. And in ablation
+ # settings like SAM training on static images, we don't need the memory encoder.
+ run_mem_encoder=True,
+ # The previously predicted SAM mask logits (which can be fed together with new clicks in demo).
+ prev_sam_mask_logits=None,
+ ):
+ """Performs a single tracking step, updating object masks and memory features based on current frame inputs."""
+ current_out, sam_outputs, _, _ = self._track_step(
+ frame_idx,
+ is_init_cond_frame,
+ current_vision_feats,
+ current_vision_pos_embeds,
+ feat_sizes,
+ point_inputs,
+ mask_inputs,
+ output_dict,
+ num_frames,
+ track_in_reverse,
+ prev_sam_mask_logits,
+ )
+ _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs
+
+ current_out["pred_masks"] = low_res_masks
+ current_out["pred_masks_high_res"] = high_res_masks
+ current_out["obj_ptr"] = obj_ptr
+ if not self.training:
+ # Only add this in inference (to avoid unused param in activation checkpointing;
+ # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
+ current_out["object_score_logits"] = object_score_logits
+
+ # Run memory encoder on the predicted mask to encode it into a new memory feature (for use in future frames)
+ self._encode_memory_in_output(
+ current_vision_feats,
+ feat_sizes,
+ point_inputs,
+ run_mem_encoder,
+ high_res_masks,
+ object_score_logits,
+ current_out,
+ )
+
return current_out
def _use_multimask(self, is_init_cond_frame, point_inputs):
@@ -927,8 +978,9 @@ def _use_multimask(self, is_init_cond_frame, point_inputs):
and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
)
- def _apply_non_overlapping_constraints(self, pred_masks):
- """Applies non-overlapping constraints to masks, keeping highest scoring object per location."""
+ @staticmethod
+ def _apply_non_overlapping_constraints(pred_masks):
+ """Applies non-overlapping constraints to masks, keeping the highest scoring object per location."""
batch_size = pred_masks.size(0)
if batch_size == 1:
return pred_masks
@@ -944,6 +996,10 @@ def _apply_non_overlapping_constraints(self, pred_masks):
pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
return pred_masks
+ def set_binarize(self, binarize=False):
+ """Set binarize for VideoPredictor."""
+ self.binarize_mask_from_pts_for_mem_enc = binarize
+
def set_imgsz(self, imgsz):
"""
Set image size to make model compatible with different image sizes.
diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py
index d036ab98745..1b181f7a06b 100644
--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
# --------------------------------------------------------
# TinyViT Model Architecture
@@ -955,7 +955,8 @@ def _check_lr_scale(m):
self.apply(_check_lr_scale)
- def _init_weights(self, m):
+ @staticmethod
+ def _init_weights(m):
"""Initializes weights for linear and normalization layers in the TinyViT model."""
if isinstance(m, nn.Linear):
# NOTE: This initialization is needed only for training.
diff --git a/ultralytics/models/sam/modules/transformer.py b/ultralytics/models/sam/modules/transformer.py
index 5cc0b02f7f1..9b9c2bf6121 100644
--- a/ultralytics/models/sam/modules/transformer.py
+++ b/ultralytics/models/sam/modules/transformer.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import math
from typing import Tuple, Type
diff --git a/ultralytics/models/sam/modules/utils.py b/ultralytics/models/sam/modules/utils.py
index 8bfb13760d9..6751b87da2e 100644
--- a/ultralytics/models/sam/modules/utils.py
+++ b/ultralytics/models/sam/modules/utils.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from typing import Tuple
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index 686ef70c638..345fc7c98fe 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Generate predictions using the Segment Anything Model (SAM).
@@ -8,6 +8,8 @@
segmentation tasks.
"""
+from collections import OrderedDict
+
import numpy as np
import torch
import torch.nn.functional as F
@@ -16,7 +18,7 @@
from ultralytics.engine.predictor import BasePredictor
from ultralytics.engine.results import Results
from ultralytics.utils import DEFAULT_CFG, ops
-from ultralytics.utils.torch_utils import select_device
+from ultralytics.utils.torch_utils import select_device, smart_inference_mode
from .amg import (
batch_iterator,
@@ -71,8 +73,8 @@ class Predictor(BasePredictor):
>>> predictor = Predictor()
>>> predictor.setup_model(model_path="sam_model.pt")
>>> predictor.set_image("image.jpg")
- >>> masks, scores, boxes = predictor.generate()
- >>> results = predictor.postprocess((masks, scores, boxes), im, orig_img)
+ >>> bboxes = [[100, 100, 200, 200]]
+ >>> results = predictor(bboxes=bboxes)
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
@@ -89,13 +91,13 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
_callbacks (Dict | None): Dictionary of callback functions to customize behavior.
Examples:
- >>> predictor = Predictor(cfg=DEFAULT_CFG)
- >>> predictor = Predictor(overrides={"imgsz": 640})
- >>> predictor = Predictor(_callbacks={"on_predict_start": custom_callback})
+ >>> predictor_example = Predictor(cfg=DEFAULT_CFG)
+ >>> predictor_example_with_imgsz = Predictor(overrides={"imgsz": 640})
+ >>> predictor_example_with_callback = Predictor(_callbacks={"on_predict_start": custom_callback})
"""
if overrides is None:
overrides = {}
- overrides.update(dict(task="segment", mode="predict"))
+ overrides.update(dict(task="segment", mode="predict", batch=1))
super().__init__(cfg, overrides, _callbacks)
self.args.retina_masks = True
self.im = None
@@ -114,7 +116,7 @@ def preprocess(self, im):
im (torch.Tensor | List[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays.
Returns:
- (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
+ im (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
Examples:
>>> predictor = Predictor()
@@ -181,21 +183,21 @@ def inference(self, im, bboxes=None, points=None, labels=None, masks=None, multi
**kwargs (Any): Additional keyword arguments.
Returns:
- (tuple): Contains the following three elements:
- - np.ndarray: The output masks in shape (C, H, W), where C is the number of generated masks.
- - np.ndarray: An array of length C containing quality scores predicted by the model for each mask.
- - np.ndarray: Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256.
+ (np.ndarray): The output masks in shape (C, H, W), where C is the number of generated masks.
+ (np.ndarray): An array of length C containing quality scores predicted by the model for each mask.
+ (np.ndarray): Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256.
Examples:
>>> predictor = Predictor()
>>> predictor.setup_model(model_path="sam_model.pt")
>>> predictor.set_image("image.jpg")
- >>> masks, scores, logits = predictor.inference(im, bboxes=[[0, 0, 100, 100]])
+ >>> results = predictor(bboxes=[[0, 0, 100, 100]])
"""
# Override prompts if any stored in self.prompts
bboxes = self.prompts.pop("bboxes", bboxes)
points = self.prompts.pop("points", points)
masks = self.prompts.pop("masks", masks)
+ labels = self.prompts.pop("labels", labels)
if all(i is None for i in [bboxes, points, masks]):
return self.generate(im, *args, **kwargs)
@@ -212,16 +214,17 @@ def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None
Args:
im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W).
bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
- points (np.ndarray | List | None): Points indicating object locations with shape (N, 2), in pixels.
- labels (np.ndarray | List | None): Point prompt labels with shape (N,). 1 for foreground, 0 for background.
+ points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
+ labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
masks (np.ndarray | None): Low-res masks from previous predictions with shape (N, H, W). For SAM, H=W=256.
multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
+ Raises:
+ AssertionError: If the number of points don't match the number of labels, in case labels were passed.
+
Returns:
- (tuple): Tuple containing:
- - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks.
- - np.ndarray: Quality scores predicted by the model for each mask, with length C.
- - np.ndarray: Low-resolution logits with shape (C, H, W) for subsequent inference, where H=W=256.
+ (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks.
+ (np.ndarray): Quality scores predicted by the model for each mask, with length C.
Examples:
>>> predictor = Predictor()
@@ -231,26 +234,7 @@ def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None
"""
features = self.get_im_features(im) if self.features is None else self.features
- src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:]
- r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1])
- # Transform input prompts
- if points is not None:
- points = torch.as_tensor(points, dtype=torch.float32, device=self.device)
- points = points[None] if points.ndim == 1 else points
- # Assuming labels are all positive if users don't pass labels.
- if labels is None:
- labels = np.ones(points.shape[0])
- labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
- points *= r
- # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1)
- points, labels = points[:, None, :], labels[:, None]
- if bboxes is not None:
- bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device)
- bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes
- bboxes *= r
- if masks is not None:
- masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1)
-
+ bboxes, points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
points = (points, labels) if points is not None else None
# Embed prompts
sparse_embeddings, dense_embeddings = self.model.prompt_encoder(points=points, boxes=bboxes, masks=masks)
@@ -268,6 +252,48 @@ def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None
# `d` could be 1 or 3 depends on `multimask_output`.
return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
+ def _prepare_prompts(self, dst_shape, bboxes=None, points=None, labels=None, masks=None):
+ """
+ Prepares and transforms the input prompts for processing based on the destination shape.
+
+ Args:
+ dst_shape (tuple): The target shape (height, width) for the prompts.
+ bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
+ points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
+ labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
+ masks (List | np.ndarray, Optional): Masks for the objects, where each mask is a 2D array.
+
+ Raises:
+ AssertionError: If the number of points don't match the number of labels, in case labels were passed.
+
+ Returns:
+ (tuple): A tuple containing transformed bounding boxes, points, labels, and masks.
+ """
+ src_shape = self.batch[1][0].shape[:2]
+ r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1])
+ # Transform input prompts
+ if points is not None:
+ points = torch.as_tensor(points, dtype=torch.float32, device=self.device)
+ points = points[None] if points.ndim == 1 else points
+ # Assuming labels are all positive if users don't pass labels.
+ if labels is None:
+ labels = np.ones(points.shape[:-1])
+ labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
+ assert points.shape[-2] == labels.shape[-1], (
+ f"Number of points {points.shape[-2]} should match number of labels {labels.shape[-1]}."
+ )
+ points *= r
+ if points.ndim == 2:
+ # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1)
+ points, labels = points[:, None, :], labels[:, None]
+ if bboxes is not None:
+ bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device)
+ bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes
+ bboxes *= r
+ if masks is not None:
+ masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1)
+ return bboxes, points, labels, masks
+
def generate(
self,
im,
@@ -302,10 +328,9 @@ def generate(
crop_nms_thresh (float): IoU cutoff for NMS to remove duplicate masks between crops.
Returns:
- (Tuple[torch.Tensor, torch.Tensor, torch.Tensor]): A tuple containing:
- - pred_masks (torch.Tensor): Segmented masks with shape (N, H, W).
- - pred_scores (torch.Tensor): Confidence scores for each mask with shape (N,).
- - pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 4).
+ pred_masks (torch.Tensor): Segmented masks with shape (N, H, W).
+ pred_scores (torch.Tensor): Confidence scores for each mask with shape (N,).
+ pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 4).
Examples:
>>> predictor = Predictor()
@@ -381,7 +406,7 @@ def generate(
return pred_masks, pred_scores, pred_bboxes
- def setup_model(self, model, verbose=True):
+ def setup_model(self, model=None, verbose=True):
"""
Initializes the Segment Anything Model (SAM) for inference.
@@ -389,7 +414,7 @@ def setup_model(self, model, verbose=True):
parameters for image normalization and other Ultralytics compatibility settings.
Args:
- model (torch.nn.Module): A pre-trained SAM model. If None, a model will be built based on configuration.
+ model (torch.nn.Module | None): A pretrained SAM model. If None, a new model is built based on config.
verbose (bool): If True, prints selected device information.
Examples:
@@ -432,7 +457,7 @@ def postprocess(self, preds, img, orig_imgs):
orig_imgs (List[np.ndarray] | torch.Tensor): The original, unprocessed images.
Returns:
- (List[Results]): List of Results objects containing detection masks, bounding boxes, and other
+ results (List[Results]): List of Results objects containing detection masks, bounding boxes, and other
metadata for each processed image.
Examples:
@@ -451,7 +476,7 @@ def postprocess(self, preds, img, orig_imgs):
results = []
for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]):
if len(masks) == 0:
- masks = None
+ masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
else:
masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
masks = masks > self.model.mask_threshold # to bool
@@ -527,9 +552,9 @@ def set_image(self, image):
def get_im_features(self, im):
"""Extracts image features using the SAM model's image encoder for subsequent mask prediction."""
- assert (
- isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1]
- ), f"SAM models only support square image size, but got {self.imgsz}."
+ assert isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1], (
+ f"SAM models only support square image size, but got {self.imgsz}."
+ )
self.model.set_imgsz(self.imgsz)
return self.model.image_encoder(im)
@@ -559,9 +584,8 @@ def remove_small_regions(masks, min_area=0, nms_thresh=0.7):
nms_thresh (float): IoU threshold for the NMS algorithm to remove duplicate boxes.
Returns:
- (tuple):
- - new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W).
- - keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
+ new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W).
+ keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
Examples:
>>> masks = torch.rand(5, 640, 640) > 0.5 # 5 random binary masks
@@ -622,8 +646,8 @@ class SAM2Predictor(Predictor):
>>> predictor = SAM2Predictor(cfg)
>>> predictor.set_image("path/to/image.jpg")
>>> bboxes = [[100, 100, 200, 200]]
- >>> masks, scores, _ = predictor.prompt_inference(predictor.im, bboxes=bboxes)
- >>> print(f"Predicted {len(masks)} masks with average score {scores.mean():.2f}")
+ >>> result = predictor(bboxes=bboxes)[0]
+ >>> print(f"Predicted {len(result.masks)} masks with average score {result.boxes.conf.mean():.2f}")
"""
_bb_feat_sizes = [
@@ -663,17 +687,15 @@ def prompt_inference(
img_idx (int): Index of the image in the batch to process.
Returns:
- (tuple): Tuple containing:
- - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks.
- - np.ndarray: Quality scores for each mask, with length C.
- - np.ndarray: Low-resolution logits with shape (C, 256, 256) for subsequent inference.
+ (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks.
+ (np.ndarray): Quality scores for each mask, with length C.
Examples:
>>> predictor = SAM2Predictor(cfg)
>>> image = torch.rand(1, 3, 640, 640)
>>> bboxes = [[100, 100, 200, 200]]
- >>> masks, scores, logits = predictor.prompt_inference(image, bboxes=bboxes)
- >>> print(f"Generated {masks.shape[0]} masks with average score {scores.mean():.2f}")
+ >>> result = predictor(image, bboxes=bboxes)[0]
+ >>> print(f"Generated {result.masks.shape[0]} masks with average score {result.boxes.conf.mean():.2f}")
Notes:
- The method supports batched inference for multiple objects when points or bboxes are provided.
@@ -685,34 +707,7 @@ def prompt_inference(
"""
features = self.get_im_features(im) if self.features is None else self.features
- src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:]
- r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1])
- # Transform input prompts
- if points is not None:
- points = torch.as_tensor(points, dtype=torch.float32, device=self.device)
- points = points[None] if points.ndim == 1 else points
- # Assuming labels are all positive if users don't pass labels.
- if labels is None:
- labels = torch.ones(points.shape[0])
- labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
- points *= r
- # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1)
- points, labels = points[:, None], labels[:, None]
- if bboxes is not None:
- bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device)
- bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes
- bboxes = bboxes.view(-1, 2, 2) * r
- bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(len(bboxes), -1)
- # NOTE: merge "boxes" and "points" into a single "points" input
- # (where boxes are added at the beginning) to model.sam_prompt_encoder
- if points is not None:
- points = torch.cat([bboxes, points], dim=1)
- labels = torch.cat([bbox_labels, labels], dim=1)
- else:
- points, labels = bboxes, bbox_labels
- if masks is not None:
- masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1)
-
+ points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
points = (points, labels) if points is not None else None
sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
@@ -736,6 +731,36 @@ def prompt_inference(
# `d` could be 1 or 3 depends on `multimask_output`.
return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
+ def _prepare_prompts(self, dst_shape, bboxes=None, points=None, labels=None, masks=None):
+ """
+ Prepares and transforms the input prompts for processing based on the destination shape.
+
+ Args:
+ dst_shape (tuple): The target shape (height, width) for the prompts.
+ bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
+ points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
+ labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
+ masks (List | np.ndarray, Optional): Masks for the objects, where each mask is a 2D array.
+
+ Raises:
+ AssertionError: If the number of points don't match the number of labels, in case labels were passed.
+
+ Returns:
+ (tuple): A tuple containing transformed points, labels, and masks.
+ """
+ bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, bboxes, points, labels, masks)
+ if bboxes is not None:
+ bboxes = bboxes.view(-1, 2, 2)
+ bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(len(bboxes), -1)
+ # NOTE: merge "boxes" and "points" into a single "points" input
+ # (where boxes are added at the beginning) to model.sam_prompt_encoder
+ if points is not None:
+ points = torch.cat([bboxes, points], dim=1)
+ labels = torch.cat([bbox_labels, labels], dim=1)
+ else:
+ points, labels = bboxes, bbox_labels
+ return points, labels, masks
+
def set_image(self, image):
"""
Preprocesses and sets a single image for inference using the SAM2 model.
@@ -770,9 +795,9 @@ def set_image(self, image):
def get_im_features(self, im):
"""Extracts image features from the SAM image encoder for subsequent processing."""
- assert (
- isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1]
- ), f"SAM 2 models only support square image size, but got {self.imgsz}."
+ assert isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1], (
+ f"SAM 2 models only support square image size, but got {self.imgsz}."
+ )
self.model.set_imgsz(self.imgsz)
self._bb_feat_sizes = [[x // (4 * i) for x in self.imgsz] for i in [1, 2, 4]]
@@ -785,3 +810,796 @@ def get_im_features(self, im):
for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
][::-1]
return {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
+
+
+class SAM2VideoPredictor(SAM2Predictor):
+ """
+ SAM2VideoPredictor to handle user interactions with videos and manage inference states.
+
+ This class extends the functionality of SAM2Predictor to support video processing and maintains
+ the state of inference operations. It includes configurations for managing non-overlapping masks,
+ clearing memory for non-conditional inputs, and setting up callbacks for prediction events.
+
+ Attributes:
+ inference_state (Dict): A dictionary to store the current state of inference operations.
+ non_overlap_masks (bool): A flag indicating whether masks should be non-overlapping.
+ clear_non_cond_mem_around_input (bool): A flag to control clearing non-conditional memory around inputs.
+ clear_non_cond_mem_for_multi_obj (bool): A flag to control clearing non-conditional memory for multi-object scenarios.
+ callbacks (Dict): A dictionary of callbacks for various prediction lifecycle events.
+
+ Args:
+ cfg (Dict, Optional): Configuration settings for the predictor. Defaults to DEFAULT_CFG.
+ overrides (Dict, Optional): Additional configuration overrides. Defaults to None.
+ _callbacks (List, Optional): Custom callbacks to be added. Defaults to None.
+
+ Note:
+ The `fill_hole_area` attribute is defined but not used in the current implementation.
+ """
+
+ # fill_hole_area = 8 # not used
+
+ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """
+ Initialize the predictor with configuration and optional overrides.
+
+ This constructor initializes the SAM2VideoPredictor with a given configuration, applies any
+ specified overrides, and sets up the inference state along with certain flags
+ that control the behavior of the predictor.
+
+ Args:
+ cfg (Dict): Configuration dictionary containing default settings.
+ overrides (Dict | None): Dictionary of values to override default configuration.
+ _callbacks (Dict | None): Dictionary of callback functions to customize behavior.
+
+ Examples:
+ >>> predictor = SAM2VideoPredictor(cfg=DEFAULT_CFG)
+ >>> predictor_example_with_imgsz = SAM2VideoPredictor(overrides={"imgsz": 640})
+ >>> predictor_example_with_callback = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback})
+ """
+ super().__init__(cfg, overrides, _callbacks)
+ self.inference_state = {}
+ self.non_overlap_masks = True
+ self.clear_non_cond_mem_around_input = False
+ self.clear_non_cond_mem_for_multi_obj = False
+ self.callbacks["on_predict_start"].append(self.init_state)
+
+ def get_model(self):
+ """
+ Retrieves and configures the model with binarization enabled.
+
+ Note:
+ This method overrides the base class implementation to set the binarize flag to True.
+ """
+ model = super().get_model()
+ model.set_binarize(True)
+ return model
+
+ def inference(self, im, bboxes=None, points=None, labels=None, masks=None):
+ """
+ Perform image segmentation inference based on the given input cues, using the currently loaded image. This
+ method leverages SAM's (Segment Anything Model) architecture consisting of image encoder, prompt encoder, and
+ mask decoder for real-time and promptable segmentation tasks.
+
+ Args:
+ im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
+ bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
+ points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
+ labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+ masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
+
+ Returns:
+ (np.ndarray): The output masks in shape CxHxW, where C is the number of generated masks.
+ (np.ndarray): An array of length C containing quality scores predicted by the model for each mask.
+ """
+ # Override prompts if any stored in self.prompts
+ bboxes = self.prompts.pop("bboxes", bboxes)
+ points = self.prompts.pop("points", points)
+ masks = self.prompts.pop("masks", masks)
+
+ frame = self.dataset.frame
+ self.inference_state["im"] = im
+ output_dict = self.inference_state["output_dict"]
+ if len(output_dict["cond_frame_outputs"]) == 0: # initialize prompts
+ points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
+ if points is not None:
+ for i in range(len(points)):
+ self.add_new_prompts(obj_id=i, points=points[[i]], labels=labels[[i]], frame_idx=frame)
+ elif masks is not None:
+ for i in range(len(masks)):
+ self.add_new_prompts(obj_id=i, masks=masks[[i]], frame_idx=frame)
+ self.propagate_in_video_preflight()
+
+ consolidated_frame_inds = self.inference_state["consolidated_frame_inds"]
+ batch_size = len(self.inference_state["obj_idx_to_id"])
+ if len(output_dict["cond_frame_outputs"]) == 0:
+ raise RuntimeError("No points are provided; please add points first")
+
+ if frame in consolidated_frame_inds["cond_frame_outputs"]:
+ storage_key = "cond_frame_outputs"
+ current_out = output_dict[storage_key][frame]
+ if self.clear_non_cond_mem_around_input and (self.clear_non_cond_mem_for_multi_obj or batch_size <= 1):
+ # clear non-conditioning memory of the surrounding frames
+ self._clear_non_cond_mem_around_input(frame)
+ elif frame in consolidated_frame_inds["non_cond_frame_outputs"]:
+ storage_key = "non_cond_frame_outputs"
+ current_out = output_dict[storage_key][frame]
+ else:
+ storage_key = "non_cond_frame_outputs"
+ current_out = self._run_single_frame_inference(
+ output_dict=output_dict,
+ frame_idx=frame,
+ batch_size=batch_size,
+ is_init_cond_frame=False,
+ point_inputs=None,
+ mask_inputs=None,
+ reverse=False,
+ run_mem_encoder=True,
+ )
+ output_dict[storage_key][frame] = current_out
+ # Create slices of per-object outputs for subsequent interaction with each
+ # individual object after tracking.
+ self._add_output_per_object(frame, current_out, storage_key)
+ self.inference_state["frames_already_tracked"].append(frame)
+ pred_masks = current_out["pred_masks"].flatten(0, 1)
+ pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0] # filter blank masks
+
+ return pred_masks, torch.ones(len(pred_masks), dtype=pred_masks.dtype, device=pred_masks.device)
+
+ def postprocess(self, preds, img, orig_imgs):
+ """
+ Post-processes the predictions to apply non-overlapping constraints if required.
+
+ This method extends the post-processing functionality by applying non-overlapping constraints
+ to the predicted masks if the `non_overlap_masks` flag is set to True. This ensures that
+ the masks do not overlap, which can be useful for certain applications.
+
+ Args:
+ preds (Tuple[torch.Tensor]): The predictions from the model.
+ img (torch.Tensor): The processed image tensor.
+ orig_imgs (List[np.ndarray]): The original images before processing.
+
+ Returns:
+ results (list): The post-processed predictions.
+
+ Note:
+ If `non_overlap_masks` is True, the method applies constraints to ensure non-overlapping masks.
+ """
+ results = super().postprocess(preds, img, orig_imgs)
+ if self.non_overlap_masks:
+ for result in results:
+ if result.masks is None or len(result.masks) == 0:
+ continue
+ result.masks.data = self.model._apply_non_overlapping_constraints(result.masks.data.unsqueeze(0))[0]
+ return results
+
+ @smart_inference_mode()
+ def add_new_prompts(
+ self,
+ obj_id,
+ points=None,
+ labels=None,
+ masks=None,
+ frame_idx=0,
+ ):
+ """
+ Adds new points or masks to a specific frame for a given object ID.
+
+ This method updates the inference state with new prompts (points or masks) for a specified
+ object and frame index. It ensures that the prompts are either points or masks, but not both,
+ and updates the internal state accordingly. It also handles the generation of new segmentations
+ based on the provided prompts and the existing state.
+
+ Args:
+ obj_id (int): The ID of the object to which the prompts are associated.
+ points (torch.Tensor, Optional): The coordinates of the points of interest. Defaults to None.
+ labels (torch.Tensor, Optional): The labels corresponding to the points. Defaults to None.
+ masks (torch.Tensor, optional): Binary masks for the object. Defaults to None.
+ frame_idx (int, optional): The index of the frame to which the prompts are applied. Defaults to 0.
+
+ Returns:
+ (tuple): A tuple containing the flattened predicted masks and a tensor of ones indicating the number of objects.
+
+ Raises:
+ AssertionError: If both `masks` and `points` are provided, or neither is provided.
+
+ Note:
+ - Only one type of prompt (either points or masks) can be added per call.
+ - If the frame is being tracked for the first time, it is treated as an initial conditioning frame.
+ - The method handles the consolidation of outputs and resizing of masks to the original video resolution.
+ """
+ assert (masks is None) ^ (points is None), "'masks' and 'points' prompts are not compatible with each other."
+ obj_idx = self._obj_id_to_idx(obj_id)
+
+ point_inputs = None
+ pop_key = "point_inputs_per_obj"
+ if points is not None:
+ point_inputs = {"point_coords": points, "point_labels": labels}
+ self.inference_state["point_inputs_per_obj"][obj_idx][frame_idx] = point_inputs
+ pop_key = "mask_inputs_per_obj"
+ self.inference_state["mask_inputs_per_obj"][obj_idx][frame_idx] = masks
+ self.inference_state[pop_key][obj_idx].pop(frame_idx, None)
+ # If this frame hasn't been tracked before, we treat it as an initial conditioning
+ # frame, meaning that the inputs points are to generate segments on this frame without
+ # using any memory from other frames, like in SAM. Otherwise (if it has been tracked),
+ # the input points will be used to correct the already tracked masks.
+ is_init_cond_frame = frame_idx not in self.inference_state["frames_already_tracked"]
+ obj_output_dict = self.inference_state["output_dict_per_obj"][obj_idx]
+ obj_temp_output_dict = self.inference_state["temp_output_dict_per_obj"][obj_idx]
+ # Add a frame to conditioning output if it's an initial conditioning frame or
+ # if the model sees all frames receiving clicks/mask as conditioning frames.
+ is_cond = is_init_cond_frame or self.model.add_all_frames_to_correct_as_cond
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+
+ # Get any previously predicted mask logits on this object and feed it along with
+ # the new clicks into the SAM mask decoder.
+ prev_sam_mask_logits = None
+ # lookup temporary output dict first, which contains the most recent output
+ # (if not found, then lookup conditioning and non-conditioning frame output)
+ if point_inputs is not None:
+ prev_out = (
+ obj_temp_output_dict[storage_key].get(frame_idx)
+ or obj_output_dict["cond_frame_outputs"].get(frame_idx)
+ or obj_output_dict["non_cond_frame_outputs"].get(frame_idx)
+ )
+
+ if prev_out is not None and prev_out.get("pred_masks") is not None:
+ prev_sam_mask_logits = prev_out["pred_masks"].to(device=self.device, non_blocking=True)
+ # Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues.
+ prev_sam_mask_logits.clamp_(-32.0, 32.0)
+ current_out = self._run_single_frame_inference(
+ output_dict=obj_output_dict, # run on the slice of a single object
+ frame_idx=frame_idx,
+ batch_size=1, # run on the slice of a single object
+ is_init_cond_frame=is_init_cond_frame,
+ point_inputs=point_inputs,
+ mask_inputs=masks,
+ reverse=False,
+ # Skip the memory encoder when adding clicks or mask. We execute the memory encoder
+ # at the beginning of `propagate_in_video` (after user finalize their clicks). This
+ # allows us to enforce non-overlapping constraints on all objects before encoding
+ # them into memory.
+ run_mem_encoder=False,
+ prev_sam_mask_logits=prev_sam_mask_logits,
+ )
+ # Add the output to the output dict (to be used as future memory)
+ obj_temp_output_dict[storage_key][frame_idx] = current_out
+
+ # Resize the output mask to the original video resolution
+ consolidated_out = self._consolidate_temp_output_across_obj(
+ frame_idx,
+ is_cond=is_cond,
+ run_mem_encoder=False,
+ )
+ pred_masks = consolidated_out["pred_masks"].flatten(0, 1)
+ return pred_masks.flatten(0, 1), torch.ones(1, dtype=pred_masks.dtype, device=pred_masks.device)
+
+ @smart_inference_mode()
+ def propagate_in_video_preflight(self):
+ """
+ Prepare inference_state and consolidate temporary outputs before tracking.
+
+ This method marks the start of tracking, disallowing the addition of new objects until the session is reset.
+ It consolidates temporary outputs from `temp_output_dict_per_obj` and merges them into `output_dict`.
+ Additionally, it clears non-conditioning memory around input frames and ensures that the state is consistent
+ with the provided inputs.
+ """
+ # Tracking has started and we don't allow adding new objects until session is reset.
+ self.inference_state["tracking_has_started"] = True
+ batch_size = len(self.inference_state["obj_idx_to_id"])
+
+ # Consolidate per-object temporary outputs in "temp_output_dict_per_obj" and
+ # add them into "output_dict".
+ temp_output_dict_per_obj = self.inference_state["temp_output_dict_per_obj"]
+ output_dict = self.inference_state["output_dict"]
+ # "consolidated_frame_inds" contains indices of those frames where consolidated
+ # temporary outputs have been added (either in this call or any previous calls
+ # to `propagate_in_video_preflight`).
+ consolidated_frame_inds = self.inference_state["consolidated_frame_inds"]
+ for is_cond in {False, True}:
+ # Separately consolidate conditioning and non-conditioning temp outputs
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+ # Find all the frames that contain temporary outputs for any objects
+ # (these should be the frames that have just received clicks for mask inputs
+ # via `add_new_points` or `add_new_mask`)
+ temp_frame_inds = set()
+ for obj_temp_output_dict in temp_output_dict_per_obj.values():
+ temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
+ consolidated_frame_inds[storage_key].update(temp_frame_inds)
+ # consolidate the temporary output across all objects on this frame
+ for frame_idx in temp_frame_inds:
+ consolidated_out = self._consolidate_temp_output_across_obj(
+ frame_idx, is_cond=is_cond, run_mem_encoder=True
+ )
+ # merge them into "output_dict" and also create per-object slices
+ output_dict[storage_key][frame_idx] = consolidated_out
+ self._add_output_per_object(frame_idx, consolidated_out, storage_key)
+ if self.clear_non_cond_mem_around_input and (self.clear_non_cond_mem_for_multi_obj or batch_size <= 1):
+ # clear non-conditioning memory of the surrounding frames
+ self._clear_non_cond_mem_around_input(frame_idx)
+
+ # clear temporary outputs in `temp_output_dict_per_obj`
+ for obj_temp_output_dict in temp_output_dict_per_obj.values():
+ obj_temp_output_dict[storage_key].clear()
+
+ # edge case: if an output is added to "cond_frame_outputs", we remove any prior
+ # output on the same frame in "non_cond_frame_outputs"
+ for frame_idx in output_dict["cond_frame_outputs"]:
+ output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
+ for obj_output_dict in self.inference_state["output_dict_per_obj"].values():
+ for frame_idx in obj_output_dict["cond_frame_outputs"]:
+ obj_output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
+ for frame_idx in consolidated_frame_inds["cond_frame_outputs"]:
+ assert frame_idx in output_dict["cond_frame_outputs"]
+ consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx)
+
+ # Make sure that the frame indices in "consolidated_frame_inds" are exactly those frames
+ # with either points or mask inputs (which should be true under a correct workflow).
+ all_consolidated_frame_inds = (
+ consolidated_frame_inds["cond_frame_outputs"] | consolidated_frame_inds["non_cond_frame_outputs"]
+ )
+ input_frames_inds = set()
+ for point_inputs_per_frame in self.inference_state["point_inputs_per_obj"].values():
+ input_frames_inds.update(point_inputs_per_frame.keys())
+ for mask_inputs_per_frame in self.inference_state["mask_inputs_per_obj"].values():
+ input_frames_inds.update(mask_inputs_per_frame.keys())
+ assert all_consolidated_frame_inds == input_frames_inds
+
+ @staticmethod
+ def init_state(predictor):
+ """
+ Initialize an inference state for the predictor.
+
+ This function sets up the initial state required for performing inference on video data.
+ It includes initializing various dictionaries and ordered dictionaries that will store
+ inputs, outputs, and other metadata relevant to the tracking process.
+
+ Args:
+ predictor (SAM2VideoPredictor): The predictor object for which to initialize the state.
+ """
+ if len(predictor.inference_state) > 0: # means initialized
+ return
+ assert predictor.dataset is not None
+ assert predictor.dataset.mode == "video"
+
+ inference_state = {
+ "num_frames": predictor.dataset.frames,
+ "point_inputs_per_obj": {}, # inputs points on each frame
+ "mask_inputs_per_obj": {}, # inputs mask on each frame
+ "constants": {}, # values that don't change across frames (so we only need to hold one copy of them)
+ # mapping between client-side object id and model-side object index
+ "obj_id_to_idx": OrderedDict(),
+ "obj_idx_to_id": OrderedDict(),
+ "obj_ids": [],
+ # A storage to hold the model's tracking results and states on each frame
+ "output_dict": {
+ "cond_frame_outputs": {}, # dict containing {frame_idx: }
+ "non_cond_frame_outputs": {}, # dict containing {frame_idx: }
+ },
+ # Slice (view) of each object tracking results, sharing the same memory with "output_dict"
+ "output_dict_per_obj": {},
+ # A temporary storage to hold new outputs when user interact with a frame
+ # to add clicks or mask (it's merged into "output_dict" before propagation starts)
+ "temp_output_dict_per_obj": {},
+ # Frames that already holds consolidated outputs from click or mask inputs
+ # (we directly use their consolidated outputs during tracking)
+ "consolidated_frame_inds": {
+ "cond_frame_outputs": set(), # set containing frame indices
+ "non_cond_frame_outputs": set(), # set containing frame indices
+ },
+ # metadata for each tracking frame (e.g. which direction it's tracked)
+ "tracking_has_started": False,
+ "frames_already_tracked": [],
+ }
+ predictor.inference_state = inference_state
+
+ def get_im_features(self, im, batch=1):
+ """
+ Extracts and processes image features using SAM2's image encoder for subsequent segmentation tasks.
+
+ Args:
+ im (torch.Tensor): The input image tensor.
+ batch (int, optional): The batch size for expanding features if there are multiple prompts. Defaults to 1.
+
+ Returns:
+ vis_feats (torch.Tensor): The visual features extracted from the image.
+ vis_pos_embed (torch.Tensor): The positional embeddings for the visual features.
+ feat_sizes (List(Tuple[int])): A list containing the sizes of the extracted features.
+
+ Note:
+ - If `batch` is greater than 1, the features are expanded to fit the batch size.
+ - The method leverages the model's `_prepare_backbone_features` method to prepare the backbone features.
+ """
+ backbone_out = self.model.forward_image(im)
+ if batch > 1: # expand features if there's more than one prompt
+ for i, feat in enumerate(backbone_out["backbone_fpn"]):
+ backbone_out["backbone_fpn"][i] = feat.expand(batch, -1, -1, -1)
+ for i, pos in enumerate(backbone_out["vision_pos_enc"]):
+ pos = pos.expand(batch, -1, -1, -1)
+ backbone_out["vision_pos_enc"][i] = pos
+ _, vis_feats, vis_pos_embed, feat_sizes = self.model._prepare_backbone_features(backbone_out)
+ return vis_feats, vis_pos_embed, feat_sizes
+
+ def _obj_id_to_idx(self, obj_id):
+ """
+ Map client-side object id to model-side object index.
+
+ Args:
+ obj_id (int): The unique identifier of the object provided by the client side.
+
+ Returns:
+ obj_idx (int): The index of the object on the model side.
+
+ Raises:
+ RuntimeError: If an attempt is made to add a new object after tracking has started.
+
+ Note:
+ - The method updates or retrieves mappings between object IDs and indices stored in
+ `inference_state`.
+ - It ensures that new objects can only be added before tracking commences.
+ - It maintains two-way mappings between IDs and indices (`obj_id_to_idx` and `obj_idx_to_id`).
+ - Additional data structures are initialized for the new object to store inputs and outputs.
+ """
+ obj_idx = self.inference_state["obj_id_to_idx"].get(obj_id, None)
+ if obj_idx is not None:
+ return obj_idx
+
+ # This is a new object id not sent to the server before. We only allow adding
+ # new objects *before* the tracking starts.
+ allow_new_object = not self.inference_state["tracking_has_started"]
+ if allow_new_object:
+ # get the next object slot
+ obj_idx = len(self.inference_state["obj_id_to_idx"])
+ self.inference_state["obj_id_to_idx"][obj_id] = obj_idx
+ self.inference_state["obj_idx_to_id"][obj_idx] = obj_id
+ self.inference_state["obj_ids"] = list(self.inference_state["obj_id_to_idx"])
+ # set up input and output structures for this object
+ self.inference_state["point_inputs_per_obj"][obj_idx] = {}
+ self.inference_state["mask_inputs_per_obj"][obj_idx] = {}
+ self.inference_state["output_dict_per_obj"][obj_idx] = {
+ "cond_frame_outputs": {}, # dict containing {frame_idx: }
+ "non_cond_frame_outputs": {}, # dict containing {frame_idx: }
+ }
+ self.inference_state["temp_output_dict_per_obj"][obj_idx] = {
+ "cond_frame_outputs": {}, # dict containing {frame_idx: }
+ "non_cond_frame_outputs": {}, # dict containing {frame_idx: }
+ }
+ return obj_idx
+ else:
+ raise RuntimeError(
+ f"Cannot add new object id {obj_id} after tracking starts. "
+ f"All existing object ids: {self.inference_state['obj_ids']}. "
+ f"Please call 'reset_state' to restart from scratch."
+ )
+
+ def _run_single_frame_inference(
+ self,
+ output_dict,
+ frame_idx,
+ batch_size,
+ is_init_cond_frame,
+ point_inputs,
+ mask_inputs,
+ reverse,
+ run_mem_encoder,
+ prev_sam_mask_logits=None,
+ ):
+ """
+ Run tracking on a single frame based on current inputs and previous memory.
+
+ Args:
+ output_dict (Dict): The dictionary containing the output states of the tracking process.
+ frame_idx (int): The index of the current frame.
+ batch_size (int): The batch size for processing the frame.
+ is_init_cond_frame (bool): Indicates if the current frame is an initial conditioning frame.
+ point_inputs (Dict, Optional): Input points and their labels. Defaults to None.
+ mask_inputs (torch.Tensor, Optional): Input binary masks. Defaults to None.
+ reverse (bool): Indicates if the tracking should be performed in reverse order.
+ run_mem_encoder (bool): Indicates if the memory encoder should be executed.
+ prev_sam_mask_logits (torch.Tensor, Optional): Previous mask logits for the current object. Defaults to None.
+
+ Returns:
+ current_out (dict): A dictionary containing the output of the tracking step, including updated features and predictions.
+
+ Raises:
+ AssertionError: If both `point_inputs` and `mask_inputs` are provided, or neither is provided.
+
+ Note:
+ - The method assumes that `point_inputs` and `mask_inputs` are mutually exclusive.
+ - The method retrieves image features using the `get_im_features` method.
+ - The `maskmem_pos_enc` is assumed to be constant across frames, hence only one copy is stored.
+ - The `fill_holes_in_mask_scores` function is commented out and currently unsupported due to CUDA extension requirements.
+ """
+ # Retrieve correct image features
+ current_vision_feats, current_vision_pos_embeds, feat_sizes = self.get_im_features(
+ self.inference_state["im"], batch_size
+ )
+
+ # point and mask should not appear as input simultaneously on the same frame
+ assert point_inputs is None or mask_inputs is None
+ current_out = self.model.track_step(
+ frame_idx=frame_idx,
+ is_init_cond_frame=is_init_cond_frame,
+ current_vision_feats=current_vision_feats,
+ current_vision_pos_embeds=current_vision_pos_embeds,
+ feat_sizes=feat_sizes,
+ point_inputs=point_inputs,
+ mask_inputs=mask_inputs,
+ output_dict=output_dict,
+ num_frames=self.inference_state["num_frames"],
+ track_in_reverse=reverse,
+ run_mem_encoder=run_mem_encoder,
+ prev_sam_mask_logits=prev_sam_mask_logits,
+ )
+
+ maskmem_features = current_out["maskmem_features"]
+ if maskmem_features is not None:
+ current_out["maskmem_features"] = maskmem_features.to(
+ dtype=torch.float16, device=self.device, non_blocking=True
+ )
+ # NOTE: Do not support the `fill_holes_in_mask_scores` function since it needs cuda extensions
+ # potentially fill holes in the predicted masks
+ # if self.fill_hole_area > 0:
+ # pred_masks = current_out["pred_masks"].to(self.device, non_blocking=True)
+ # pred_masks = fill_holes_in_mask_scores(pred_masks, self.fill_hole_area)
+
+ # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+ current_out["maskmem_pos_enc"] = self._get_maskmem_pos_enc(current_out["maskmem_pos_enc"])
+ return current_out
+
+ def _get_maskmem_pos_enc(self, out_maskmem_pos_enc):
+ """
+ Caches and manages the positional encoding for mask memory across frames and objects.
+
+ This method optimizes storage by caching the positional encoding (`maskmem_pos_enc`) for
+ mask memory, which is constant across frames and objects, thus reducing the amount of
+ redundant information stored during an inference session. It checks if the positional
+ encoding has already been cached; if not, it caches a slice of the provided encoding.
+ If the batch size is greater than one, it expands the cached positional encoding to match
+ the current batch size.
+
+ Args:
+ out_maskmem_pos_enc (List[torch.Tensor] or None): The positional encoding for mask memory.
+ Should be a list of tensors or None.
+
+ Returns:
+ out_maskmem_pos_enc (List[torch.Tensor]): The positional encoding for mask memory, either cached or expanded.
+
+ Note:
+ - The method assumes that `out_maskmem_pos_enc` is a list of tensors or None.
+ - Only a single object's slice is cached since the encoding is the same across objects.
+ - The method checks if the positional encoding has already been cached in the session's constants.
+ - If the batch size is greater than one, the cached encoding is expanded to fit the batch size.
+ """
+ model_constants = self.inference_state["constants"]
+ # "out_maskmem_pos_enc" should be either a list of tensors or None
+ if out_maskmem_pos_enc is not None:
+ if "maskmem_pos_enc" not in model_constants:
+ assert isinstance(out_maskmem_pos_enc, list)
+ # only take the slice for one object, since it's same across objects
+ maskmem_pos_enc = [x[:1].clone() for x in out_maskmem_pos_enc]
+ model_constants["maskmem_pos_enc"] = maskmem_pos_enc
+ else:
+ maskmem_pos_enc = model_constants["maskmem_pos_enc"]
+ # expand the cached maskmem_pos_enc to the actual batch size
+ batch_size = out_maskmem_pos_enc[0].size(0)
+ if batch_size > 1:
+ out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc]
+ return out_maskmem_pos_enc
+
+ def _consolidate_temp_output_across_obj(
+ self,
+ frame_idx,
+ is_cond=False,
+ run_mem_encoder=False,
+ ):
+ """
+ Consolidates per-object temporary outputs into a single output for all objects.
+
+ This method combines the temporary outputs for each object on a given frame into a unified
+ output. It fills in any missing objects either from the main output dictionary or leaves
+ placeholders if they do not exist in the main output. Optionally, it can re-run the memory
+ encoder after applying non-overlapping constraints to the object scores.
+
+ Args:
+ frame_idx (int): The index of the frame for which to consolidate outputs.
+ is_cond (bool, Optional): Indicates if the frame is considered a conditioning frame.
+ Defaults to False.
+ run_mem_encoder (bool, Optional): Specifies whether to run the memory encoder after
+ consolidating the outputs. Defaults to False.
+
+ Returns:
+ consolidated_out (dict): A consolidated output dictionary containing the combined results for all objects.
+
+ Note:
+ - The method initializes the consolidated output with placeholder values for missing objects.
+ - It searches for outputs in both the temporary and main output dictionaries.
+ - If `run_mem_encoder` is True, it applies non-overlapping constraints and re-runs the memory encoder.
+ - The `maskmem_features` and `maskmem_pos_enc` are only populated when `run_mem_encoder` is True.
+ """
+ batch_size = len(self.inference_state["obj_idx_to_id"])
+ storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+
+ # Initialize `consolidated_out`. Its "maskmem_features" and "maskmem_pos_enc"
+ # will be added when rerunning the memory encoder after applying non-overlapping
+ # constraints to object scores. Its "pred_masks" are prefilled with a large
+ # negative value (NO_OBJ_SCORE) to represent missing objects.
+ consolidated_out = {
+ "maskmem_features": None,
+ "maskmem_pos_enc": None,
+ "pred_masks": torch.full(
+ size=(batch_size, 1, self.imgsz[0] // 4, self.imgsz[1] // 4),
+ fill_value=-1024.0,
+ dtype=torch.float32,
+ device=self.device,
+ ),
+ "obj_ptr": torch.full(
+ size=(batch_size, self.model.hidden_dim),
+ fill_value=-1024.0,
+ dtype=torch.float32,
+ device=self.device,
+ ),
+ "object_score_logits": torch.full(
+ size=(batch_size, 1),
+ # default to 10.0 for object_score_logits, i.e. assuming the object is
+ # present as sigmoid(10)=1, same as in `predict_masks` of `MaskDecoder`
+ fill_value=10.0,
+ dtype=torch.float32,
+ device=self.device,
+ ),
+ }
+ for obj_idx in range(batch_size):
+ obj_temp_output_dict = self.inference_state["temp_output_dict_per_obj"][obj_idx]
+ obj_output_dict = self.inference_state["output_dict_per_obj"][obj_idx]
+ out = (
+ obj_temp_output_dict[storage_key].get(frame_idx)
+ # If the object doesn't appear in "temp_output_dict_per_obj" on this frame,
+ # we fall back and look up its previous output in "output_dict_per_obj".
+ # We look up both "cond_frame_outputs" and "non_cond_frame_outputs" in
+ # "output_dict_per_obj" to find a previous output for this object.
+ or obj_output_dict["cond_frame_outputs"].get(frame_idx)
+ or obj_output_dict["non_cond_frame_outputs"].get(frame_idx)
+ )
+ # If the object doesn't appear in "output_dict_per_obj" either, we skip it
+ # and leave its mask scores to the default scores (i.e. the NO_OBJ_SCORE
+ # placeholder above) and set its object pointer to be a dummy pointer.
+ if out is None:
+ # Fill in dummy object pointers for those objects without any inputs or
+ # tracking outcomes on this frame (only do it under `run_mem_encoder=True`,
+ # i.e. when we need to build the memory for tracking).
+ if run_mem_encoder:
+ # fill object pointer with a dummy pointer (based on an empty mask)
+ consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = self._get_empty_mask_ptr(frame_idx)
+ continue
+ # Add the temporary object output mask to consolidated output mask
+ consolidated_out["pred_masks"][obj_idx : obj_idx + 1] = out["pred_masks"]
+ consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = out["obj_ptr"]
+
+ # Optionally, apply non-overlapping constraints on the consolidated scores and rerun the memory encoder
+ if run_mem_encoder:
+ high_res_masks = F.interpolate(
+ consolidated_out["pred_masks"],
+ size=self.imgsz,
+ mode="bilinear",
+ align_corners=False,
+ )
+ if self.model.non_overlap_masks_for_mem_enc:
+ high_res_masks = self.model._apply_non_overlapping_constraints(high_res_masks)
+ consolidated_out["maskmem_features"], consolidated_out["maskmem_pos_enc"] = self._run_memory_encoder(
+ batch_size=batch_size,
+ high_res_masks=high_res_masks,
+ is_mask_from_pts=True, # these frames are what the user interacted with
+ object_score_logits=consolidated_out["object_score_logits"],
+ )
+
+ return consolidated_out
+
+ def _get_empty_mask_ptr(self, frame_idx):
+ """
+ Get a dummy object pointer based on an empty mask on the current frame.
+
+ Args:
+ frame_idx (int): The index of the current frame for which to generate the dummy object pointer.
+
+ Returns:
+ (torch.Tensor): A tensor representing the dummy object pointer generated from the empty mask.
+ """
+ # Retrieve correct image features
+ current_vision_feats, current_vision_pos_embeds, feat_sizes = self.get_im_features(self.inference_state["im"])
+
+ # Feed the empty mask and image feature above to get a dummy object pointer
+ current_out = self.model.track_step(
+ frame_idx=frame_idx,
+ is_init_cond_frame=True,
+ current_vision_feats=current_vision_feats,
+ current_vision_pos_embeds=current_vision_pos_embeds,
+ feat_sizes=feat_sizes,
+ point_inputs=None,
+ # A dummy (empty) mask with a single object
+ mask_inputs=torch.zeros((1, 1, *self.imgsz), dtype=torch.float32, device=self.device),
+ output_dict={},
+ num_frames=self.inference_state["num_frames"],
+ track_in_reverse=False,
+ run_mem_encoder=False,
+ prev_sam_mask_logits=None,
+ )
+ return current_out["obj_ptr"]
+
+ def _run_memory_encoder(self, batch_size, high_res_masks, object_score_logits, is_mask_from_pts):
+ """
+ Run the memory encoder on masks.
+
+ This is usually after applying non-overlapping constraints to object scores. Since their scores changed, their
+ memory also needs to be computed again with the memory encoder.
+
+ Args:
+ batch_size (int): The batch size for processing the frame.
+ high_res_masks (torch.Tensor): High-resolution masks for which to compute the memory.
+ object_score_logits (torch.Tensor): Logits representing the object scores.
+ is_mask_from_pts (bool): Indicates if the mask is derived from point interactions.
+
+ Returns:
+ (tuple[torch.Tensor, torch.Tensor]): A tuple containing the encoded mask features and positional encoding.
+ """
+ # Retrieve correct image features
+ current_vision_feats, _, feat_sizes = self.get_im_features(self.inference_state["im"], batch_size)
+ maskmem_features, maskmem_pos_enc = self.model._encode_new_memory(
+ current_vision_feats=current_vision_feats,
+ feat_sizes=feat_sizes,
+ pred_masks_high_res=high_res_masks,
+ is_mask_from_pts=is_mask_from_pts,
+ object_score_logits=object_score_logits,
+ )
+
+ # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+ maskmem_pos_enc = self._get_maskmem_pos_enc(maskmem_pos_enc)
+ return maskmem_features.to(dtype=torch.float16, device=self.device, non_blocking=True), maskmem_pos_enc
+
+ def _add_output_per_object(self, frame_idx, current_out, storage_key):
+ """
+ Split a multi-object output into per-object output slices and add them into Output_Dict_Per_Obj.
+
+ The resulting slices share the same tensor storage.
+
+ Args:
+ frame_idx (int): The index of the current frame.
+ current_out (Dict): The current output dictionary containing multi-object outputs.
+ storage_key (str): The key used to store the output in the per-object output dictionary.
+ """
+ maskmem_features = current_out["maskmem_features"]
+ assert maskmem_features is None or isinstance(maskmem_features, torch.Tensor)
+
+ maskmem_pos_enc = current_out["maskmem_pos_enc"]
+ assert maskmem_pos_enc is None or isinstance(maskmem_pos_enc, list)
+
+ for obj_idx, obj_output_dict in self.inference_state["output_dict_per_obj"].items():
+ obj_slice = slice(obj_idx, obj_idx + 1)
+ obj_out = {
+ "maskmem_features": None,
+ "maskmem_pos_enc": None,
+ "pred_masks": current_out["pred_masks"][obj_slice],
+ "obj_ptr": current_out["obj_ptr"][obj_slice],
+ }
+ if maskmem_features is not None:
+ obj_out["maskmem_features"] = maskmem_features[obj_slice]
+ if maskmem_pos_enc is not None:
+ obj_out["maskmem_pos_enc"] = [x[obj_slice] for x in maskmem_pos_enc]
+ obj_output_dict[storage_key][frame_idx] = obj_out
+
+ def _clear_non_cond_mem_around_input(self, frame_idx):
+ """
+ Remove the non-conditioning memory around the input frame.
+
+ When users provide correction clicks, the surrounding frames' non-conditioning memories can still contain outdated
+ object appearance information and could confuse the model. This method clears those non-conditioning memories
+ surrounding the interacted frame to avoid giving the model both old and new information about the object.
+
+ Args:
+ frame_idx (int): The index of the current frame where user interaction occurred.
+ """
+ r = self.model.memory_temporal_stride_for_eval
+ frame_idx_begin = frame_idx - r * self.model.num_maskmem
+ frame_idx_end = frame_idx + r * self.model.num_maskmem
+ for t in range(frame_idx_begin, frame_idx_end + 1):
+ self.inference_state["output_dict"]["non_cond_frame_outputs"].pop(t, None)
+ for obj_output_dict in self.inference_state["output_dict_per_obj"].values():
+ obj_output_dict["non_cond_frame_outputs"].pop(t, None)
diff --git a/ultralytics/models/utils/__init__.py b/ultralytics/models/utils/__init__.py
index 9e68dc12245..77a19dcf0f8 100644
--- a/ultralytics/models/utils/__init__.py
+++ b/ultralytics/models/utils/__init__.py
@@ -1 +1 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py
index a42b13413fd..42f437439b8 100644
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
import torch.nn as nn
@@ -243,12 +243,11 @@ def _get_loss(
if len(gt_bboxes):
gt_scores[idx] = bbox_iou(pred_bboxes.detach(), gt_bboxes, xywh=True).squeeze(-1)
- loss = {}
- loss.update(self._get_loss_class(pred_scores, targets, gt_scores, len(gt_bboxes), postfix))
- loss.update(self._get_loss_bbox(pred_bboxes, gt_bboxes, postfix))
- # if masks is not None and gt_mask is not None:
- # loss.update(self._get_loss_mask(masks, gt_mask, match_indices, postfix))
- return loss
+ return {
+ **self._get_loss_class(pred_scores, targets, gt_scores, len(gt_bboxes), postfix),
+ **self._get_loss_bbox(pred_bboxes, gt_bboxes, postfix),
+ # **(self._get_loss_mask(masks, gt_mask, match_indices, postfix) if masks is not None and gt_mask is not None else {})
+ }
def forward(self, pred_bboxes, pred_scores, batch, postfix="", **kwargs):
"""
diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py
index 51dba006c8d..7133072ec14 100644
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
import torch.nn as nn
@@ -172,7 +172,7 @@ def get_cdn_group(
bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn'
is less than or equal to 0, the function returns None for all elements in the tuple.
"""
- if (not training) or num_dn <= 0:
+ if (not training) or num_dn <= 0 or batch is None:
return None, None, None, None
gt_groups = batch["gt_groups"]
total_num = sum(gt_groups)
diff --git a/ultralytics/models/yolo/__init__.py b/ultralytics/models/yolo/__init__.py
index e31144d47a8..82450fd436f 100644
--- a/ultralytics/models/yolo/__init__.py
+++ b/ultralytics/models/yolo/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.models.yolo import classify, detect, obb, pose, segment, world
from ultralytics.models.yolo import regress
diff --git a/ultralytics/models/yolo/classify/__init__.py b/ultralytics/models/yolo/classify/__init__.py
index ca92f892de4..3a10629229f 100644
--- a/ultralytics/models/yolo/classify/__init__.py
+++ b/ultralytics/models/yolo/classify/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.models.yolo.classify.predict import ClassificationPredictor
from ultralytics.models.yolo.classify.train import ClassificationTrainer
diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py
index 596931a176a..cf5314585cb 100644
--- a/ultralytics/models/yolo/classify/predict.py
+++ b/ultralytics/models/yolo/classify/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import cv2
import torch
@@ -21,7 +21,7 @@ class ClassificationPredictor(BasePredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.classify import ClassificationPredictor
- args = dict(model="yolov8n-cls.pt", source=ASSETS)
+ args = dict(model="yolo11n-cls.pt", source=ASSETS)
predictor = ClassificationPredictor(overrides=args)
predictor.predict_cli()
```
@@ -53,6 +53,7 @@ def postprocess(self, preds, img, orig_imgs):
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
+ preds = preds[0] if isinstance(preds, (list, tuple)) else preds
return [
Results(orig_img, path=img_path, names=self.model.names, probs=pred)
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py
index e51349fa989..1eb638a4a40 100644
--- a/ultralytics/models/yolo/classify/train.py
+++ b/ultralytics/models/yolo/classify/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from copy import copy
@@ -8,7 +8,7 @@
from ultralytics.engine.trainer import BaseTrainer
from ultralytics.models import yolo
from ultralytics.nn.tasks import ClassificationModel
-from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK, colorstr
+from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
from ultralytics.utils.plotting import plot_images, plot_results
from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
@@ -24,7 +24,7 @@ class ClassificationTrainer(BaseTrainer):
```python
from ultralytics.models.yolo.classify import ClassificationTrainer
- args = dict(model="yolov8n-cls.pt", data="imagenet10", epochs=3)
+ args = dict(model="yolo11n-cls.pt", data="imagenet10", epochs=3)
trainer = ClassificationTrainer(overrides=args)
trainer.train()
```
@@ -141,7 +141,6 @@ def final_eval(self):
self.metrics = self.validator(model=f)
self.metrics.pop("fitness", None)
self.run_callbacks("on_fit_epoch_end")
- LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}")
def plot_training_samples(self, batch, ni):
"""Plots training samples with their annotations."""
diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py
index e54f04118a2..51aa01f4861 100644
--- a/ultralytics/models/yolo/classify/val.py
+++ b/ultralytics/models/yolo/classify/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
@@ -20,7 +20,7 @@ class ClassificationValidator(BaseValidator):
```python
from ultralytics.models.yolo.classify import ClassificationValidator
- args = dict(model="yolov8n-cls.pt", data="imagenet10")
+ args = dict(model="yolo11n-cls.pt", data="imagenet10")
validator = ClassificationValidator(args=args)
validator()
```
@@ -71,6 +71,10 @@ def finalize_metrics(self, *args, **kwargs):
self.metrics.confusion_matrix = self.confusion_matrix
self.metrics.save_dir = self.save_dir
+ def postprocess(self, preds):
+ """Preprocesses the classification predictions."""
+ return preds[0] if isinstance(preds, (list, tuple)) else preds
+
def get_stats(self):
"""Returns a dictionary of metrics obtained by processing targets and predictions."""
self.metrics.process(self.targets, self.pred)
diff --git a/ultralytics/models/yolo/detect/__init__.py b/ultralytics/models/yolo/detect/__init__.py
index 5f3e62c1862..caece94ae0c 100644
--- a/ultralytics/models/yolo/detect/__init__.py
+++ b/ultralytics/models/yolo/detect/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .predict import DetectionPredictor
from .train import DetectionTrainer
diff --git a/ultralytics/models/yolo/detect/predict.py b/ultralytics/models/yolo/detect/predict.py
index 6dbbb15e459..23c31f6a396 100644
--- a/ultralytics/models/yolo/detect/predict.py
+++ b/ultralytics/models/yolo/detect/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.engine.predictor import BasePredictor
from ultralytics.engine.results import Results
@@ -15,13 +15,13 @@ class DetectionPredictor(BasePredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.detect import DetectionPredictor
- args = dict(model="yolov8n.pt", source=ASSETS)
+ args = dict(model="yolo11n.pt", source=ASSETS)
predictor = DetectionPredictor(overrides=args)
predictor.predict_cli()
```
"""
- def postprocess(self, preds, img, orig_imgs):
+ def postprocess(self, preds, img, orig_imgs, **kwargs):
"""Post-processes predictions and returns a list of Results objects."""
if self.separate_outputs: # Quant friendly export with separated outputs
preds = decode_bbox(preds, img.shape, self.device)
@@ -30,16 +30,48 @@ def postprocess(self, preds, img, orig_imgs):
preds,
self.args.conf,
self.args.iou,
- agnostic=self.args.agnostic_nms,
+ self.args.classes,
+ self.args.agnostic_nms,
max_det=self.args.max_det,
- classes=self.args.classes,
+ nc=len(self.model.names),
+ end2end=getattr(self.model, "end2end", False),
+ rotated=self.args.task == "obb",
)
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
- results = []
- for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
- pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
- results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
- return results
+ return self.construct_results(preds, img, orig_imgs, **kwargs)
+
+ def construct_results(self, preds, img, orig_imgs):
+ """
+ Constructs a list of result objects from the predictions.
+
+ Args:
+ preds (List[torch.Tensor]): List of predicted bounding boxes and scores.
+ img (torch.Tensor): The image after preprocessing.
+ orig_imgs (List[np.ndarray]): List of original images before preprocessing.
+
+ Returns:
+ (list): List of result objects containing the original images, image paths, class names, and bounding boxes.
+ """
+ return [
+ self.construct_result(pred, img, orig_img, img_path)
+ for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
+ ]
+
+ def construct_result(self, pred, img, orig_img, img_path):
+ """
+ Constructs the result object from the prediction.
+
+ Args:
+ pred (torch.Tensor): The predicted bounding boxes and scores.
+ img (torch.Tensor): The image after preprocessing.
+ orig_img (np.ndarray): The original image before preprocessing.
+ img_path (str): The path to the original image.
+
+ Returns:
+ (Results): The result object containing the original image, image path, class names, and bounding boxes.
+ """
+ pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+ return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6])
diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py
index 5be24c946ed..eea16e73af1 100644
--- a/ultralytics/models/yolo/detect/train.py
+++ b/ultralytics/models/yolo/detect/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import math
import random
@@ -24,7 +24,7 @@ class DetectionTrainer(BaseTrainer):
```python
from ultralytics.models.yolo.detect import DetectionTrainer
- args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3)
+ args = dict(model="yolo11n.pt", data="coco8.yaml", epochs=3)
trainer = DetectionTrainer(overrides=args)
trainer.train()
```
@@ -141,3 +141,10 @@ def plot_training_labels(self):
boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
cls = np.concatenate([lb["cls"] for lb in self.train_loader.dataset.labels], 0)
plot_labels(boxes, cls.squeeze(), names=self.data["names"], save_dir=self.save_dir, on_plot=self.on_plot)
+
+ def auto_batch(self):
+ """Get batch size by calculating memory occupation of model."""
+ train_dataset = self.build_dataset(self.trainset, mode="train", batch=16)
+ # 4 for mosaic augmentation
+ max_num_obj = max(len(label["cls"]) for label in train_dataset.labels) * 4
+ return super().auto_batch(max_num_obj)
diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py
index f36290534f1..55274f92c75 100644
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import os
from pathlib import Path
@@ -23,7 +23,7 @@ class DetectionValidator(BaseValidator):
```python
from ultralytics.models.yolo.detect import DetectionValidator
- args = dict(model="yolov8n.pt", data="coco8.yaml")
+ args = dict(model="yolo11n.pt", data="coco8.yaml")
validator = DetectionValidator(args=args)
validator()
```
@@ -75,10 +75,11 @@ def init_metrics(self, model):
and (val.endswith(f"{os.sep}val2017.txt") or val.endswith(f"{os.sep}test-dev2017.txt"))
) # is COCO
self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS
- self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(len(model.names)))
- self.args.save_json |= (self.is_coco or self.is_lvis) and not self.training # run on final val if training COCO
+ self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1, len(model.names) + 1))
+ self.args.save_json |= self.args.val and (self.is_coco or self.is_lvis) and not self.training # run final val
self.names = model.names
self.nc = len(model.names)
+ self.end2end = getattr(model, "end2end", False)
self.metrics.names = self.names
self.metrics.plot = self.args.plots
self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf)
@@ -100,9 +101,12 @@ def postprocess(self, preds, img_shape):
self.args.conf,
self.args.iou,
labels=self.lb,
+ nc=self.nc,
multi_label=True,
agnostic=self.args.single_cls or self.args.agnostic_nms,
max_det=self.args.max_det,
+ end2end=self.end2end,
+ rotated=self.args.task == "obb",
)
def _prepare_batch(self, si, batch):
@@ -159,8 +163,8 @@ def update_metrics(self, preds, batch):
# Evaluate
if nl:
stat["tp"] = self._process_batch(predn, bbox, cls)
- if self.args.plots:
- self.confusion_matrix.process_batch(predn, bbox, cls)
+ if self.args.plots:
+ self.confusion_matrix.process_batch(predn, bbox, cls)
for k in self.stats.keys():
self.stats[k].append(stat[k])
@@ -172,7 +176,7 @@ def update_metrics(self, preds, batch):
predn,
self.args.save_conf,
pbatch["ori_shape"],
- self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt',
+ self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
)
def finalize_metrics(self, *args, **kwargs):
@@ -292,8 +296,7 @@ def pred_to_json(self, predn, filename):
self.jdict.append(
{
"image_id": image_id,
- "category_id": self.class_map[int(p[5])]
- + (1 if self.is_lvis else 0), # index starts from 1 if it's lvis
+ "category_id": self.class_map[int(p[5])],
"bbox": [round(x, 3) for x in b],
"score": round(p[4], 5),
}
diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py
index 6b5269d500d..f34ac4b6867 100644
--- a/ultralytics/models/yolo/model.py
+++ b/ultralytics/models/yolo/model.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from pathlib import Path
@@ -12,7 +12,7 @@
class YOLO(Model):
"""YOLO (You Only Look Once) object detection model."""
- def __init__(self, model="yolov8n.pt", task=None, verbose=False):
+ def __init__(self, model="yolo11n.pt", task=None, verbose=False):
"""Initialize YOLO model, switching to YOLOWorld if model filename contains '-world'."""
path = Path(model)
if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model
diff --git a/ultralytics/models/yolo/obb/__init__.py b/ultralytics/models/yolo/obb/__init__.py
index f60349a79d1..61e3e3c6a82 100644
--- a/ultralytics/models/yolo/obb/__init__.py
+++ b/ultralytics/models/yolo/obb/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .predict import OBBPredictor
from .train import OBBTrainer
diff --git a/ultralytics/models/yolo/obb/predict.py b/ultralytics/models/yolo/obb/predict.py
index 19308cb6c44..ef6214d4213 100644
--- a/ultralytics/models/yolo/obb/predict.py
+++ b/ultralytics/models/yolo/obb/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
@@ -16,7 +16,7 @@ class OBBPredictor(DetectionPredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.obb import OBBPredictor
- args = dict(model="yolov8n-obb.pt", source=ASSETS)
+ args = dict(model="yolo11n-obb.pt", source=ASSETS)
predictor = OBBPredictor(overrides=args)
predictor.predict_cli()
```
@@ -27,27 +27,20 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
super().__init__(cfg, overrides, _callbacks)
self.args.task = "obb"
- def postprocess(self, preds, img, orig_imgs):
- """Post-processes predictions and returns a list of Results objects."""
- preds = ops.non_max_suppression(
- preds,
- self.args.conf,
- self.args.iou,
- agnostic=self.args.agnostic_nms,
- max_det=self.args.max_det,
- nc=len(self.model.names),
- classes=self.args.classes,
- rotated=True,
- )
-
- if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
- orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
-
- results = []
- for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
- rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
- rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
- # xywh, r, conf, cls
- obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
- results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb))
- return results
+ def construct_result(self, pred, img, orig_img, img_path):
+ """
+ Constructs the result object from the prediction.
+
+ Args:
+ pred (torch.Tensor): The predicted bounding boxes, scores, and rotation angles.
+ img (torch.Tensor): The image after preprocessing.
+ orig_img (np.ndarray): The original image before preprocessing.
+ img_path (str): The path to the original image.
+
+ Returns:
+ (Results): The result object containing the original image, image path, class names, and oriented bounding boxes.
+ """
+ rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
+ rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
+ obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
+ return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
diff --git a/ultralytics/models/yolo/obb/train.py b/ultralytics/models/yolo/obb/train.py
index 18def219748..c88272b1545 100644
--- a/ultralytics/models/yolo/obb/train.py
+++ b/ultralytics/models/yolo/obb/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from copy import copy
@@ -15,7 +15,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.obb import OBBTrainer
- args = dict(model="yolov8n-obb.pt", data="dota8.yaml", epochs=3)
+ args = dict(model="yolo11n-obb.pt", data="dota8.yaml", epochs=3)
trainer = OBBTrainer(overrides=args)
trainer.train()
```
@@ -39,4 +39,6 @@ def get_model(self, cfg=None, weights=None, verbose=True):
def get_validator(self):
"""Return an instance of OBBValidator for validation of YOLO model."""
self.loss_names = "box_loss", "cls_loss", "dfl_loss"
- return yolo.obb.OBBValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
+ return yolo.obb.OBBValidator(
+ self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
+ )
diff --git a/ultralytics/models/yolo/obb/val.py b/ultralytics/models/yolo/obb/val.py
index 93bb2bfabb2..b5cb89f1452 100644
--- a/ultralytics/models/yolo/obb/val.py
+++ b/ultralytics/models/yolo/obb/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from pathlib import Path
@@ -18,7 +18,7 @@ class OBBValidator(DetectionValidator):
```python
from ultralytics.models.yolo.obb import OBBValidator
- args = dict(model="yolov8n-obb.pt", data="dota8.yaml")
+ args = dict(model="yolo11n-obb.pt", data="dota8.yaml")
validator = OBBValidator(args=args)
validator(model=args["model"])
```
@@ -36,20 +36,6 @@ def init_metrics(self, model):
val = self.data.get(self.args.split, "") # validation path
self.is_dota = isinstance(val, str) and "DOTA" in val # is COCO
- def postprocess(self, preds):
- """Apply Non-maximum suppression to prediction outputs."""
- return ops.non_max_suppression(
- preds,
- self.args.conf,
- self.args.iou,
- labels=self.lb,
- nc=self.nc,
- multi_label=True,
- agnostic=self.args.single_cls or self.args.agnostic_nms,
- max_det=self.args.max_det,
- rotated=True,
- )
-
def _process_batch(self, detections, gt_bboxes, gt_cls):
"""
Perform computation of the correct prediction matrix for a batch of detections and ground truth bounding boxes.
@@ -160,10 +146,10 @@ def eval_json(self, stats):
for d in data:
image_id = d["image_id"]
score = d["score"]
- classname = self.names[d["category_id"]].replace(" ", "-")
+ classname = self.names[d["category_id"] - 1].replace(" ", "-")
p = d["poly"]
- with open(f'{pred_txt / f"Task1_{classname}"}.txt', "a") as f:
+ with open(f"{pred_txt / f'Task1_{classname}'}.txt", "a") as f:
f.writelines(f"{image_id} {score} {p[0]} {p[1]} {p[2]} {p[3]} {p[4]} {p[5]} {p[6]} {p[7]}\n")
# Save merged results, this could result slightly lower map than using official merging script,
# because of the probiou calculation.
@@ -175,7 +161,7 @@ def eval_json(self, stats):
image_id = d["image_id"].split("__")[0]
pattern = re.compile(r"\d+___\d+")
x, y = (int(c) for c in re.findall(pattern, d["image_id"])[0].split("___"))
- bbox, score, cls = d["rbox"], d["score"], d["category_id"]
+ bbox, score, cls = d["rbox"], d["score"], d["category_id"] - 1
bbox[0] += x
bbox[1] += y
bbox.extend([score, cls])
@@ -197,7 +183,7 @@ def eval_json(self, stats):
p = [round(i, 3) for i in x[:-2]] # poly
score = round(x[-2], 3)
- with open(f'{pred_merged_txt / f"Task1_{classname}"}.txt', "a") as f:
+ with open(f"{pred_merged_txt / f'Task1_{classname}'}.txt", "a") as f:
f.writelines(f"{image_id} {score} {p[0]} {p[1]} {p[2]} {p[3]} {p[4]} {p[5]} {p[6]} {p[7]}\n")
return stats
diff --git a/ultralytics/models/yolo/pose/__init__.py b/ultralytics/models/yolo/pose/__init__.py
index d56694301f1..396167b08f8 100644
--- a/ultralytics/models/yolo/pose/__init__.py
+++ b/ultralytics/models/yolo/pose/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .predict import PosePredictor
from .train import PoseTrainer
diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py
index 6950b4aea37..b6d4440d700 100644
--- a/ultralytics/models/yolo/pose/predict.py
+++ b/ultralytics/models/yolo/pose/predict.py
@@ -1,5 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
from ultralytics.engine.results import Results
@@ -17,7 +16,7 @@ class PosePredictor(DetectionPredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.pose import PosePredictor
- args = dict(model="yolov8n-pose.pt", source=ASSETS)
+ args = dict(model="yolo11n-pose.pt", source=ASSETS)
predictor = PosePredictor(overrides=args)
predictor.predict_cli()
```
@@ -82,3 +81,25 @@ def postprocess(self, preds, img, orig_imgs):
Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts)
)
return results
+
+ def construct_result(self, pred, img, orig_img, img_path):
+ """
+ Constructs the result object from the prediction.
+
+ Args:
+ pred (torch.Tensor): The predicted bounding boxes, scores, and keypoints.
+ img (torch.Tensor): The image after preprocessing.
+ orig_img (np.ndarray): The original image before preprocessing.
+ img_path (str): The path to the original image.
+ Returns:
+ (Results): The result object containing the original image, image path, class names, bounding boxes, and keypoints.
+ """
+
+ result = super().construct_result(pred, img, orig_img, img_path)
+ if self.separate_outputs:
+ pred_kpts = pred[:, 6:].view(len(pred), *kpt_shape) if len(pred) else pred[:, 6:]
+ else:
+ pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
+ pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape)
+ result.update(keypoints=pred_kpts)
+ return result
\ No newline at end of file
diff --git a/ultralytics/models/yolo/pose/train.py b/ultralytics/models/yolo/pose/train.py
index e52f449cf82..4a4f0ced2fb 100644
--- a/ultralytics/models/yolo/pose/train.py
+++ b/ultralytics/models/yolo/pose/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from copy import copy
@@ -16,7 +16,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.pose import PoseTrainer
- args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3)
+ args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml", epochs=3)
trainer = PoseTrainer(overrides=args)
trainer.train()
```
diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py
index 6b431a21def..909c35b229d 100644
--- a/ultralytics/models/yolo/pose/val.py
+++ b/ultralytics/models/yolo/pose/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import os
from pathlib import Path
@@ -22,7 +22,7 @@ class PoseValidator(DetectionValidator):
```python
from ultralytics.models.yolo.pose import PoseValidator
- args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml")
+ args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
validator = PoseValidator(args=args)
validator()
```
@@ -151,8 +151,8 @@ def update_metrics(self, preds, batch):
if nl:
stat["tp"] = self._process_batch(predn, bbox, cls)
stat["tp_p"] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch["kpts"])
- if self.args.plots:
- self.confusion_matrix.process_batch(predn, bbox, cls)
+ if self.args.plots:
+ self.confusion_matrix.process_batch(predn, bbox, cls)
for k in self.stats.keys():
self.stats[k].append(stat[k])
@@ -166,7 +166,7 @@ def update_metrics(self, preds, batch):
pred_kpts,
self.args.save_conf,
pbatch["ori_shape"],
- self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt',
+ self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
)
def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=None):
@@ -184,7 +184,7 @@ def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=
gt_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing ground truth keypoints.
Returns:
- torch.Tensor: A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels,
+ (torch.Tensor): A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels,
where N is the number of detections.
Example:
diff --git a/ultralytics/models/yolo/segment/__init__.py b/ultralytics/models/yolo/segment/__init__.py
index ec1ac7991a9..36a921a9a36 100644
--- a/ultralytics/models/yolo/segment/__init__.py
+++ b/ultralytics/models/yolo/segment/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .predict import SegmentationPredictor
from .train import SegmentationTrainer
diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py
index 3264d36aac5..444f2482ac3 100644
--- a/ultralytics/models/yolo/segment/predict.py
+++ b/ultralytics/models/yolo/segment/predict.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
@@ -17,7 +17,7 @@ class SegmentationPredictor(DetectionPredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.segment import SegmentationPredictor
- args = dict(model="yolov8n-seg.pt", source=ASSETS)
+ args = dict(model="yolo11n-seg.pt", source=ASSETS)
predictor = SegmentationPredictor(overrides=args)
predictor.predict_cli()
```
@@ -70,4 +70,46 @@ def postprocess(self, preds, img, orig_imgs):
masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
- return results
\ No newline at end of file
+ return results
+
+ def construct_results(self, preds, img, orig_imgs, protos):
+ """
+ Constructs a list of result objects from the predictions.
+
+ Args:
+ preds (List[torch.Tensor]): List of predicted bounding boxes, scores, and masks.
+ img (torch.Tensor): The image after preprocessing.
+ orig_imgs (List[np.ndarray]): List of original images before preprocessing.
+ protos (List[torch.Tensor]): List of prototype masks.
+
+ Returns:
+ (list): List of result objects containing the original images, image paths, class names, bounding boxes, and masks.
+ """
+ return [
+ self.construct_result(pred, img, orig_img, img_path, proto)
+ for pred, orig_img, img_path, proto in zip(preds, orig_imgs, self.batch[0], protos)
+ ]
+
+ def construct_result(self, pred, img, orig_img, img_path, proto):
+ """
+ Constructs the result object from the prediction.
+
+ Args:
+ pred (np.ndarray): The predicted bounding boxes, scores, and masks.
+ img (torch.Tensor): The image after preprocessing.
+ orig_img (np.ndarray): The original image before preprocessing.
+ img_path (str): The path to the original image.
+ proto (torch.Tensor): The prototype masks.
+
+ Returns:
+ (Results): The result object containing the original image, image path, class names, bounding boxes, and masks.
+ """
+ if not len(pred): # save empty boxes
+ masks = None
+ elif self.args.retina_masks:
+ pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+ masks = ops.process_mask_native(proto, pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
+ else:
+ masks = ops.process_mask(proto, pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
+ pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
+ return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)
diff --git a/ultralytics/models/yolo/segment/train.py b/ultralytics/models/yolo/segment/train.py
index f5b25ed6115..0142ecccfa6 100644
--- a/ultralytics/models/yolo/segment/train.py
+++ b/ultralytics/models/yolo/segment/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from copy import copy
@@ -16,7 +16,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.segment import SegmentationTrainer
- args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml", epochs=3)
+ args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml", epochs=3)
trainer = SegmentationTrainer(overrides=args)
trainer.train()
```
diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py
index 98708c93eab..1fbc9cc7ded 100644
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import os
from multiprocessing.pool import ThreadPool
@@ -24,7 +24,7 @@ class SegmentationValidator(DetectionValidator):
```python
from ultralytics.models.yolo.segment import SegmentationValidator
- args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml")
+ args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml")
validator = SegmentationValidator(args=args)
validator()
```
@@ -153,8 +153,8 @@ def update_metrics(self, preds, batch):
stat["tp_m"] = self._process_batch(
predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True
)
- if self.args.plots:
- self.confusion_matrix.process_batch(predn, bbox, cls)
+ if self.args.plots:
+ self.confusion_matrix.process_batch(predn, bbox, cls)
for k in self.stats.keys():
self.stats[k].append(stat[k])
@@ -180,7 +180,7 @@ def update_metrics(self, preds, batch):
pred_masks,
self.args.save_conf,
pbatch["ori_shape"],
- self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt',
+ self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
)
def finalize_metrics(self, *args, **kwargs):
diff --git a/ultralytics/models/yolo/world/__init__.py b/ultralytics/models/yolo/world/__init__.py
index 1d401999cdf..4380d244602 100644
--- a/ultralytics/models/yolo/world/__init__.py
+++ b/ultralytics/models/yolo/world/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .train import WorldTrainer
diff --git a/ultralytics/models/yolo/world/train.py b/ultralytics/models/yolo/world/train.py
index 5fffd0974c8..1a16a2d1d1e 100644
--- a/ultralytics/models/yolo/world/train.py
+++ b/ultralytics/models/yolo/world/train.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import itertools
diff --git a/ultralytics/models/yolo/world/train_world.py b/ultralytics/models/yolo/world/train_world.py
index df26986d9c5..3cbdb2a4e77 100644
--- a/ultralytics/models/yolo/world/train_world.py
+++ b/ultralytics/models/yolo/world/train_world.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
from ultralytics.data.utils import check_det_dataset
diff --git a/ultralytics/nn/__init__.py b/ultralytics/nn/__init__.py
index 242b690865f..e6142fe38fa 100644
--- a/ultralytics/nn/__init__.py
+++ b/ultralytics/nn/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .tasks import (
BaseModel,
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index dfda624d36d..07ea5bbd410 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -1,7 +1,6 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import ast
-import contextlib
import json
import platform
import zipfile
@@ -14,8 +13,8 @@
import torch.nn as nn
from PIL import Image
-from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, ROOT, yaml_load
-from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml
+from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load
+from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip
from ultralytics.utils.downloads import attempt_download_asset, is_url
@@ -45,8 +44,10 @@ def check_class_names(names):
def default_class_names(data=None):
"""Applies default class names to an input YAML file or returns numerical class names."""
if data:
- with contextlib.suppress(Exception):
+ try:
return yaml_load(check_yaml(data))["names"]
+ except Exception:
+ pass
return {i: f"class{i}" for i in range(999)} # return default if above errors
@@ -58,21 +59,24 @@ class AutoBackend(nn.Module):
range of formats, each with specific naming conventions as outlined below:
Supported Formats and Naming Conventions:
- | Format | File Suffix |
- |-----------------------|------------------|
- | PyTorch | *.pt |
- | TorchScript | *.torchscript |
- | ONNX Runtime | *.onnx |
- | ONNX OpenCV DNN | *.onnx (dnn=True)|
- | OpenVINO | *openvino_model/ |
- | CoreML | *.mlpackage |
- | TensorRT | *.engine |
- | TensorFlow SavedModel | *_saved_model |
- | TensorFlow GraphDef | *.pb |
- | TensorFlow Lite | *.tflite |
- | TensorFlow Edge TPU | *_edgetpu.tflite |
- | PaddlePaddle | *_paddle_model |
- | NCNN | *_ncnn_model |
+ | Format | File Suffix |
+ | --------------------- | ----------------- |
+ | PyTorch | *.pt |
+ | TorchScript | *.torchscript |
+ | ONNX Runtime | *.onnx |
+ | ONNX OpenCV DNN | *.onnx (dnn=True) |
+ | OpenVINO | *openvino_model/ |
+ | CoreML | *.mlpackage |
+ | TensorRT | *.engine |
+ | TensorFlow SavedModel | *_saved_model/ |
+ | TensorFlow GraphDef | *.pb |
+ | TensorFlow Lite | *.tflite |
+ | TensorFlow Edge TPU | *_edgetpu.tflite |
+ | PaddlePaddle | *_paddle_model/ |
+ | MNN | *.mnn |
+ | NCNN | *_ncnn_model/ |
+ | IMX | *_imx_model/ |
+ | RKNN | *_rknn_model/ |
This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
models across various platforms.
@@ -81,7 +85,7 @@ class AutoBackend(nn.Module):
@torch.no_grad()
def __init__(
self,
- weights="yolov8n.pt",
+ weights="yolo11n.pt",
device=torch.device("cpu"),
dnn=False,
data=None,
@@ -94,7 +98,7 @@ def __init__(
Initialize the AutoBackend for inference.
Args:
- weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
+ weights (str | torch.nn.Module): Path to the model weights file or a module instance. Defaults to 'yolo11n.pt'.
device (torch.device): Device to run the model on. Defaults to CPU.
dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
@@ -119,17 +123,21 @@ def __init__(
edgetpu,
tfjs,
paddle,
+ mnn,
ncnn,
+ imx,
+ rknn,
triton,
) = self._model_type(w)
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
- nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
+ nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCWH)
stride = 32 # default stride
- model, metadata = None, None
+ end2end = False # default end2end
+ model, metadata, task = None, None, None
# Set device
cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA
- if cuda and not any([nn_module, pt, jit, engine, onnx]): # GPU dataloader formats
+ if cuda and not any([nn_module, pt, jit, engine, onnx, paddle]): # GPU dataloader formats
device = torch.device("cpu")
cuda = False
@@ -179,8 +187,8 @@ def __init__(
check_requirements("opencv-python>=4.5.4")
net = cv2.dnn.readNetFromONNX(w)
- # ONNX Runtime
- elif onnx:
+ # ONNX Runtime and IMX
+ elif onnx or imx:
LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
if IS_RASPBERRYPI or IS_JETSON:
@@ -188,10 +196,49 @@ def __init__(
check_requirements("numpy==1.23.5")
import onnxruntime
- providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]
- session = onnxruntime.InferenceSession(w, providers=providers)
+ providers = ["CPUExecutionProvider"]
+ if cuda and "CUDAExecutionProvider" in onnxruntime.get_available_providers():
+ providers.insert(0, "CUDAExecutionProvider")
+ elif cuda: # Only log warning if CUDA was requested but unavailable
+ LOGGER.warning("WARNING โ ๏ธ Failed to start ONNX Runtime with CUDA. Using CPU...")
+ device = torch.device("cpu")
+ cuda = False
+ LOGGER.info(f"Using ONNX Runtime {providers[0]}")
+ if onnx:
+ session = onnxruntime.InferenceSession(w, providers=providers)
+ else:
+ check_requirements(
+ ["model-compression-toolkit==2.1.1", "sony-custom-layers[torch]==0.2.0", "onnxruntime-extensions"]
+ )
+ w = next(Path(w).glob("*.onnx"))
+ LOGGER.info(f"Loading {w} for ONNX IMX inference...")
+ import mct_quantizers as mctq
+ from sony_custom_layers.pytorch.object_detection import nms_ort # noqa
+
+ session = onnxruntime.InferenceSession(
+ w, mctq.get_ort_session_options(), providers=["CPUExecutionProvider"]
+ )
+ task = "detect"
+
output_names = [x.name for x in session.get_outputs()]
metadata = session.get_modelmeta().custom_metadata_map
+ dynamic = isinstance(session.get_outputs()[0].shape[0], str)
+ fp16 = True if "float16" in session.get_inputs()[0].type else False
+ if not dynamic:
+ io = session.io_binding()
+ bindings = []
+ for output in session.get_outputs():
+ out_fp16 = "float16" in output.type
+ y_tensor = torch.empty(output.shape, dtype=torch.float16 if out_fp16 else torch.float32).to(device)
+ io.bind_output(
+ name=output.name,
+ device_type=device.type,
+ device_id=device.index if cuda else 0,
+ element_type=np.float16 if out_fp16 else np.float32,
+ shape=tuple(y_tensor.shape),
+ buffer_ptr=y_tensor.data_ptr(),
+ )
+ bindings.append(y_tensor)
# OpenVINO
elif xml:
@@ -221,14 +268,19 @@ def __init__(
# TensorRT
elif engine:
LOGGER.info(f"Loading {w} for TensorRT inference...")
+
+ if IS_JETSON and PYTHON_VERSION <= "3.8.0":
+ # fix error: `np.bool` was a deprecated alias for the builtin `bool` for JetPack 4 with Python <= 3.8.0
+ check_requirements("numpy==1.23.5")
+
try:
import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download
except ImportError:
if LINUX:
- check_requirements("tensorrt>7.0.0,<=10.1.0")
+ check_requirements("tensorrt>7.0.0,!=10.1.0")
import tensorrt as trt # noqa
check_version(trt.__version__, ">=7.0.0", hard=True)
- check_version(trt.__version__, "<=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
+ check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
if device.type == "cpu":
device = torch.device("cuda:0")
Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
@@ -241,6 +293,12 @@ def __init__(
except UnicodeDecodeError:
f.seek(0) # engine file may lack embedded Ultralytics metadata
model = runtime.deserialize_cuda_engine(f.read()) # read engine
+ if "dla" in str(device.type):
+ dla_core = int(device.type.split(":")[1])
+ assert dla_core in {0, 1}, (
+ "Expected device type for inference in DLA is 'dla:0' or 'dla:1', but received '{device.type}'"
+ )
+ runtime.DLA_core = dla_core
# Model context
try:
@@ -264,8 +322,8 @@ def __init__(
if -1 in tuple(model.get_tensor_shape(name)):
dynamic = True
context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1]))
- if dtype == np.float16:
- fp16 = True
+ if dtype == np.float16:
+ fp16 = True
else:
output_names.append(name)
shape = tuple(context.get_tensor_shape(name))
@@ -321,8 +379,10 @@ def wrap_frozen_graph(gd, inputs, outputs):
with open(w, "rb") as f:
gd.ParseFromString(f.read())
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
- with contextlib.suppress(StopIteration): # find metadata in SavedModel alongside GraphDef
+ try: # find metadata in SavedModel alongside GraphDef
metadata = next(Path(w).resolve().parent.rglob(f"{Path(w).stem}_saved_model*/metadata.yaml"))
+ except StopIteration:
+ pass
# TFLite or TFLite Edge TPU
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
@@ -333,11 +393,16 @@ def wrap_frozen_graph(gd, inputs, outputs):
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate
if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
- LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")
+ device = device[3:] if str(device).startswith("tpu") else ":0"
+ LOGGER.info(f"Loading {w} on device {device[1:]} for TensorFlow Lite Edge TPU inference...")
delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[
platform.system()
]
- interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
+ interpreter = Interpreter(
+ model_path=w,
+ experimental_delegates=[load_delegate(delegate, options={"device": device})],
+ )
+ device = "cpu" # Required, otherwise PyTorch will try to use the wrong device
else: # TFLite
LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
interpreter = Interpreter(model_path=w) # load TFLite model
@@ -345,10 +410,12 @@ def wrap_frozen_graph(gd, inputs, outputs):
input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs
# Load metadata
- with contextlib.suppress(zipfile.BadZipFile):
+ try:
with zipfile.ZipFile(w, "r") as model:
meta_file = model.namelist()[0]
metadata = ast.literal_eval(model.read(meta_file).decode("utf-8"))
+ except zipfile.BadZipFile:
+ pass
# TF.js
elif tfjs:
@@ -371,6 +438,23 @@ def wrap_frozen_graph(gd, inputs, outputs):
output_names = predictor.get_output_names()
metadata = w.parents[1] / "metadata.yaml"
+ # MNN
+ elif mnn:
+ LOGGER.info(f"Loading {w} for MNN inference...")
+ check_requirements("MNN") # requires MNN
+ import os
+
+ import MNN
+
+ config = {"precision": "low", "backend": "CPU", "numThread": (os.cpu_count() + 1) // 2}
+ rt = MNN.nn.create_runtime_manager((config,))
+ net = MNN.nn.load_module_from_file(w, [], [], runtime_manager=rt, rearrange=True)
+
+ def torch_to_mnn(x):
+ return MNN.expr.const(x.data_ptr(), x.shape)
+
+ metadata = json.loads(net.get_info()["bizCode"])
+
# NCNN
elif ncnn:
LOGGER.info(f"Loading {w} for NCNN inference...")
@@ -392,6 +476,23 @@ def wrap_frozen_graph(gd, inputs, outputs):
from ultralytics.utils.triton import TritonRemoteModel
model = TritonRemoteModel(w)
+ metadata = model.metadata
+
+ # RKNN
+ elif rknn:
+ if not is_rockchip():
+ raise OSError("RKNN inference is only supported on Rockchip devices.")
+ LOGGER.info(f"Loading {w} for RKNN inference...")
+ check_requirements("rknn-toolkit-lite2")
+ from rknnlite.api import RKNNLite
+
+ w = Path(w)
+ if not w.is_file(): # if not *.rknn
+ w = next(w.rglob("*.rknn")) # get *.rknn file from *_rknn_model dir
+ rknn_model = RKNNLite()
+ rknn_model.load_rknn(w)
+ rknn_model.init_runtime()
+ metadata = Path(w).parent / "metadata.yaml"
# Any other format (unsupported)
else:
@@ -409,7 +510,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
for k, v in metadata.items():
if k in {"stride", "batch"}:
metadata[k] = int(v)
- elif k in {"imgsz", "names", "kpt_shape"} and isinstance(v, str):
+ elif k in {"imgsz", "names", "kpt_shape", "args"} and isinstance(v, str):
metadata[k] = eval(v)
stride = metadata["stride"]
task = metadata["task"]
@@ -417,6 +518,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
imgsz = metadata["imgsz"]
names = metadata["names"]
kpt_shape = metadata.get("kpt_shape")
+ end2end = metadata.get("args", {}).get("nms", False)
elif not (pt or triton or nn_module):
LOGGER.warning(f"WARNING โ ๏ธ Metadata not found for 'model={weights}'")
@@ -466,9 +568,26 @@ def forward(self, im, augment=False, visualize=False, embed=None):
y = self.net.forward()
# ONNX Runtime
- elif self.onnx:
- im = im.cpu().numpy() # torch to numpy
- y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
+ elif self.onnx or self.imx:
+ if self.dynamic:
+ im = im.cpu().numpy() # torch to numpy
+ y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
+ else:
+ if not self.cuda:
+ im = im.cpu()
+ self.io.bind_input(
+ name="images",
+ device_type=im.device.type,
+ device_id=im.device.index if im.device.type == "cuda" else 0,
+ element_type=np.float16 if self.fp16 else np.float32,
+ shape=tuple(im.shape),
+ buffer_ptr=im.data_ptr(),
+ )
+ self.session.run_with_iobinding(self.io)
+ y = self.bindings
+ if self.imx:
+ # boxes, conf, cls
+ y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1)
# OpenVINO
elif self.xml:
@@ -496,7 +615,7 @@ def callback(request, userdata):
# TensorRT
elif self.engine:
- if self.dynamic or im.shape != self.bindings["images"].shape:
+ if self.dynamic and im.shape != self.bindings["images"].shape:
if self.is_trt10:
self.context.set_input_shape("images", im.shape)
self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
@@ -532,10 +651,9 @@ def callback(request, userdata):
# box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
# conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
# y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
- elif len(y) == 1: # classification model
- y = list(y.values())
- elif len(y) == 2: # segmentation model
- y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
+ y = list(y.values())
+ if len(y) == 2 and len(y[1].shape) != 4: # segmentation model
+ y = list(reversed(y)) # reversed for segmentation models (pred, proto)
# PaddlePaddle
elif self.paddle:
@@ -544,6 +662,12 @@ def callback(request, userdata):
self.predictor.run()
y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
+ # MNN
+ elif self.mnn:
+ input_var = self.torch_to_mnn(im)
+ output_var = self.net.onForward([input_var])
+ y = [x.read() for x in output_var]
+
# NCNN
elif self.ncnn:
mat_in = self.pyncnn.Mat(im[0].cpu().numpy())
@@ -557,6 +681,12 @@ def callback(request, userdata):
im = im.cpu().numpy() # torch to numpy
y = self.model(im)
+ # RKNN
+ elif self.rknn:
+ im = (im.cpu().numpy() * 255).astype("uint8")
+ im = im if isinstance(im, (list, tuple)) else [im]
+ y = self.rknn_model.inference(inputs=im)
+
# TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
else:
im = im.cpu().numpy()
@@ -583,17 +713,18 @@ def callback(request, userdata):
if x.ndim == 3: # if task is not classification, excluding masks (ndim=4) as well
# Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695
# xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models
- if x.shape[-1] == 6: # end-to-end model
+ if x.shape[-1] == 6 or self.end2end: # end-to-end model
x[:, :, [0, 2]] *= w
x[:, :, [1, 3]] *= h
- # TODO: pose end-to-end model
+ if self.task == "pose":
+ x[:, :, 6::3] *= w
+ x[:, :, 7::3] *= h
else:
x[:, [0, 2]] *= w
x[:, [1, 3]] *= h
- if self.task == "pose" or x.shape[1] > 5:
- kpt_offset = x.shape[1] - self.kpt_shape[0] * self.kpt_shape[1]
- x[:, kpt_offset::3] *= w
- x[:, kpt_offset+1::3] *= h
+ if self.task == "pose":
+ x[:, 5::3] *= w
+ x[:, 6::3] *= h
y.append(x)
# TF segment fixes: export is reversed vs ONNX export and protos are transposed
if len(y) == 2: # segment with (det, proto) output order reversed
@@ -609,8 +740,7 @@ def callback(request, userdata):
# print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes
if isinstance(y, (list, tuple)):
if len(self.names) == 999 and (self.task == "segment" or len(y) == 2): # segments and names not defined
- ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes
- nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400)
+ nc = y[0].shape[1] - y[1].shape[1] - 4 # y = (1, 32, 160, 160), (1, 116, 8400)
self.names = {i: f"class{i}" for i in range(nc)}
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
else:
@@ -650,7 +780,7 @@ def _model_type(p="path/to/model.pt"):
saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
Args:
- p: path to the model file. Defaults to path/to/model.pt
+ p (str): path to the model file. Defaults to path/to/model.pt
Examples:
>>> model = AutoBackend(weights="path/to/model.onnx")
diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py
index edf7e6a0c85..ddc387223f2 100644
--- a/ultralytics/nn/modules/__init__.py
+++ b/ultralytics/nn/modules/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Ultralytics modules.
@@ -56,6 +56,7 @@
RepVGGDW,
ResNetLayer,
SCDown,
+ TorchVision,
)
from .conv import (
CBAM,
@@ -68,6 +69,7 @@
DWConvTranspose2d,
Focus,
GhostConv,
+ Index,
LightConv,
RepConv,
SpatialAttention,
@@ -160,4 +162,6 @@
"C2fCIB",
"Attention",
"PSA",
+ "TorchVision",
+ "Index",
)
diff --git a/ultralytics/nn/modules/activation.py b/ultralytics/nn/modules/activation.py
index aaf636e7625..cc6b44b47b6 100644
--- a/ultralytics/nn/modules/activation.py
+++ b/ultralytics/nn/modules/activation.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Activation modules."""
import torch
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index 91a94f6ad73..1f16310acc8 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Block modules."""
import torch
@@ -49,6 +49,7 @@
"Attention",
"PSA",
"SCDown",
+ "TorchVision",
)
@@ -242,7 +243,8 @@ def forward(self, x):
def forward_split(self, x):
"""Forward pass using split() instead of chunk()."""
- y = list(self.cv1(x).split((self.c, self.c), 1))
+ y = self.cv1(x).split((self.c, self.c), 1)
+ y = [y[0], y[1]]
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
@@ -315,8 +317,8 @@ def __init__(self, c1, c2, n=3, e=1.0):
"""Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
super().__init__()
c_ = int(c2 * e) # hidden channels
- self.cv1 = Conv(c1, c2, 1, 1)
- self.cv2 = Conv(c1, c2, 1, 1)
+ self.cv1 = Conv(c1, c_, 1, 1)
+ self.cv2 = Conv(c1, c_, 1, 1)
self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()
@@ -1142,3 +1144,49 @@ def __init__(self, c1, c2, k, s):
def forward(self, x):
"""Applies convolution and downsampling to the input tensor in the SCDown module."""
return self.cv2(self.cv1(x))
+
+
+class TorchVision(nn.Module):
+ """
+ TorchVision module to allow loading any torchvision model.
+
+ This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and customize the model by truncating or unwrapping layers.
+
+ Attributes:
+ m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
+
+ Args:
+ model (str): Name of the torchvision model to load.
+ weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
+ unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True.
+ truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2.
+ split (bool, optional): Returns output from intermediate child modules as list. Default is False.
+ """
+
+ def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
+ """Load the model and weights from torchvision."""
+ import torchvision # scope for faster 'import ultralytics'
+
+ super().__init__()
+ if hasattr(torchvision.models, "get_model"):
+ self.m = torchvision.models.get_model(model, weights=weights)
+ else:
+ self.m = torchvision.models.__dict__[model](pretrained=bool(weights))
+ if unwrap:
+ layers = list(self.m.children())
+ if isinstance(layers[0], nn.Sequential): # Second-level for some models like EfficientNet, Swin
+ layers = [*list(layers[0].children()), *layers[1:]]
+ self.m = nn.Sequential(*(layers[:-truncate] if truncate else layers))
+ self.split = split
+ else:
+ self.split = False
+ self.m.head = self.m.heads = nn.Identity()
+
+ def forward(self, x):
+ """Forward pass through the model."""
+ if self.split:
+ y = [x]
+ y.extend(m(y[-1]) for m in self.m)
+ else:
+ y = self.m(x)
+ return y
diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py
index aaa70f5745a..6c15e1d66cc 100644
--- a/ultralytics/nn/modules/conv.py
+++ b/ultralytics/nn/modules/conv.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Convolution modules."""
import math
@@ -21,6 +21,7 @@
"CBAM",
"Concat",
"RepConv",
+ "Index",
)
@@ -50,7 +51,7 @@ def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
- """Perform transposed convolution of 2D data."""
+ """Apply convolution and activation without batch normalization."""
return self.act(self.conv(x))
@@ -330,3 +331,20 @@ def __init__(self, dimension=1):
def forward(self, x):
"""Forward pass for the YOLOv8 mask Proto module."""
return torch.cat(x, self.d)
+
+
+class Index(nn.Module):
+ """Returns a particular index of the input."""
+
+ def __init__(self, index=0):
+ """Returns a particular index of the input."""
+ super().__init__()
+ self.index = index
+
+ def forward(self, x):
+ """
+ Forward pass.
+
+ Expects a list of tensors as input.
+ """
+ return x[self.index]
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index ece9a2b81a1..91889f5c14f 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Model head modules."""
import copy
@@ -19,10 +19,11 @@
__all__ += "Regress", "Regress6",
class Detect(nn.Module):
- """YOLOv8 Detect head for detection models."""
+ """YOLO Detect head for detection models."""
dynamic = False # force grid reconstruction
export = False # export mode
+ format = None # export format
end2end = False # end2end
max_det = 300 # max_det
shape = None
@@ -30,8 +31,10 @@ class Detect(nn.Module):
strides = torch.empty(0) # init
separate_outputs = False
+ legacy = False # backward compatibility for v3/v5/v8/v9 models
+
def __init__(self, nc=80, ch=()):
- """Initializes the YOLOv8 detection layer with specified number of classes and channels."""
+ """Initializes the YOLO detection layer with specified number of classes and channels."""
super().__init__()
self.nc = nc # number of classes
self.nl = len(ch) # number of detection layers
@@ -42,13 +45,17 @@ def __init__(self, nc=80, ch=()):
self.cv2 = nn.ModuleList(
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
)
- self.cv3 = nn.ModuleList(
- nn.Sequential(
- nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
- nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
- nn.Conv2d(c3, self.nc, 1),
+ self.cv3 = (
+ nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
+ if self.legacy
+ else nn.ModuleList(
+ nn.Sequential(
+ nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
+ nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
+ nn.Conv2d(c3, self.nc, 1),
+ )
+ for x in ch
)
- for x in ch
)
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
@@ -111,7 +118,7 @@ def _inference(self, x):
# Inference path
shape = x[0].shape # BCHW
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
- if self.dynamic or self.shape != shape:
+ if self.format != "imx" and (self.dynamic or self.shape != shape):
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
@@ -129,6 +136,11 @@ def _inference(self, x):
grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
norm = self.strides / (self.stride[0] * grid_size)
dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
+ elif self.export and self.format == "imx":
+ dbox = self.decode_bboxes(
+ self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
+ )
+ return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
else:
dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
@@ -147,9 +159,9 @@ def bias_init(self):
a[-1].bias.data[:] = 1.0 # box
b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
- def decode_bboxes(self, bboxes, anchors):
+ def decode_bboxes(self, bboxes, anchors, xywh=True):
"""Decode bounding boxes."""
- return dist2bbox(bboxes, anchors, xywh=not self.end2end, dim=1)
+ return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)
@staticmethod
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
@@ -177,7 +189,7 @@ def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
class Segment(Detect):
- """YOLOv8 Segment head for segmentation models."""
+ """YOLO Segment head for segmentation models."""
def __init__(self, nc=80, nm=32, npr=256, ch=()):
"""Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
@@ -209,7 +221,7 @@ def forward(self, x):
class OBB(Detect):
- """YOLOv8 OBB detection head for detection with rotation models."""
+ """YOLO OBB detection head for detection with rotation models."""
def __init__(self, nc=80, ne=1, ch=()):
"""Initialize OBB with number of classes `nc` and layer channels `ch`."""
@@ -239,7 +251,7 @@ def decode_bboxes(self, bboxes, anchors):
class Pose(Detect):
- """YOLOv8 Pose head for keypoints models."""
+ """YOLO Pose head for keypoints models."""
separate_pose = False
@@ -273,16 +285,20 @@ def forward(self, x):
def kpts_decode(self, bs, kpts, shape):
"""Decodes keypoints."""
ndim = self.kpt_shape[1]
- if self.export: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
- y = kpts.view(bs, *self.kpt_shape, -1)
- if self.format in {"tflite", "edgetpu"}:
+ if self.export:
+ if self.format in {
+ "tflite",
+ "edgetpu",
+ }: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
# Precompute normalization factor to increase numerical stability
- grid_w = shape[2]
- grid_h = shape[3]
- grid_size = torch.tensor([grid_w, grid_h], device=kpts.device).reshape(2, 1)
+ y = kpts.view(bs, *self.kpt_shape, -1)
+ grid_h, grid_w = self.shape[2], self.shape[3]
+ grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
norm = self.strides / (self.stride[0] * grid_size)
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
else:
+ # NCNN fix
+ y = kpts.view(bs, *self.kpt_shape, -1)
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
if ndim == 3:
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
@@ -297,10 +313,12 @@ def kpts_decode(self, bs, kpts, shape):
class Classify(nn.Module):
- """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
+ """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
+
+ export = False # export mode
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
- """Initializes YOLOv8 classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
+ """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
super().__init__()
c_ = 1280 # efficientnet_b0 size
self.conv = Conv(c1, c_, k, s, p, g)
@@ -313,7 +331,10 @@ def forward(self, x):
if isinstance(x, list):
x = torch.cat(x, 1)
x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
- return x if self.training else x.softmax(1)
+ if self.training:
+ return x
+ y = x.softmax(1) # get final output
+ return y if self.export else (y, x)
class Regress(nn.Module):
@@ -377,10 +398,10 @@ def forward(self, x):
class WorldDetect(Detect):
- """Head for integrating YOLOv8 detection models with semantic understanding from text embeddings."""
+ """Head for integrating YOLO detection models with semantic understanding from text embeddings."""
def __init__(self, nc=80, embed=512, with_bn=False, ch=()):
- """Initialize YOLOv8 detection layer with nc classes and layer channels ch."""
+ """Initialize YOLO detection layer with nc classes and layer channels ch."""
super().__init__(nc, ch)
c3 = max(ch[0], min(self.nc, 100))
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py
index bae008ac6c2..c198736908e 100644
--- a/ultralytics/nn/modules/transformer.py
+++ b/ultralytics/nn/modules/transformer.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Transformer modules."""
import math
diff --git a/ultralytics/nn/modules/utils.py b/ultralytics/nn/modules/utils.py
index a7c86391c42..c7837ebe6c6 100644
--- a/ultralytics/nn/modules/utils.py
+++ b/ultralytics/nn/modules/utils.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Module utils."""
import copy
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index a1930c238b2..5b303969abe 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -1,13 +1,14 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import contextlib
import pickle
+import re
import types
from copy import deepcopy
from pathlib import Path
+import thop
import torch
-import torch.nn as nn
from ultralytics.nn.modules import (
AIFI,
@@ -49,6 +50,7 @@
HGBlock,
HGStem,
ImagePoolingAttn,
+ Index,
Pose,
Regress,
Regress6,
@@ -60,6 +62,7 @@
RTDETRDecoder,
SCDown,
Segment,
+ TorchVision,
WorldDetect,
v10Detect,
)
@@ -86,13 +89,8 @@
time_sync,
)
-try:
- import thop
-except ImportError:
- thop = None
-
-class BaseModel(nn.Module):
+class BaseModel(torch.nn.Module):
"""The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
def forward(self, x, *args, **kwargs):
@@ -155,7 +153,7 @@ def _predict_once(self, x, profile=False, visualize=False, embed=None):
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
if embed and m.i in embed:
- embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
+ embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
if m.i == max(embed):
return torch.unbind(torch.cat(embeddings, 1), dim=0)
return x
@@ -174,12 +172,9 @@ def _profile_one_layer(self, m, x, dt):
the provided list.
Args:
- m (nn.Module): The layer to be profiled.
+ m (torch.nn.Module): The layer to be profiled.
x (torch.Tensor): The input data to the layer.
dt (list): A list to store the computation time of the layer.
-
- Returns:
- None
"""
c = m == self.model[-1] and isinstance(x, list) # is final layer list, copy input as inplace fix
flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs
@@ -199,7 +194,7 @@ def fuse(self, verbose=True):
computation efficiency.
Returns:
- (nn.Module): The fused model is returned.
+ (torch.nn.Module): The fused model is returned.
"""
if not self.is_fused():
for m in self.model.modules():
@@ -233,7 +228,7 @@ def is_fused(self, thresh=10):
Returns:
(bool): True if the number of BatchNorm layers in the model is less than the threshold, False otherwise.
"""
- bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
+ bn = tuple(v for k, v in torch.nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
return sum(isinstance(v, bn) for v in self.modules()) < thresh # True if < 'thresh' BatchNorm layers in model
def info(self, detailed=False, verbose=True, imgsz=640):
@@ -300,12 +295,18 @@ def init_criterion(self):
class DetectionModel(BaseModel):
- """YOLOv8 detection model."""
+ """YOLO detection model."""
- def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True): # model, input channels, number of classes
- """Initialize the YOLOv8 detection model with the given config and parameters."""
+ def __init__(self, cfg="yolo11n.yaml", ch=3, nc=None, verbose=True): # model, input channels, number of classes
+ """Initialize the YOLO detection model with the given config and parameters."""
super().__init__()
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
+ if self.yaml["backbone"][0][2] == "Silence":
+ LOGGER.warning(
+ "WARNING โ ๏ธ YOLOv9 `Silence` module is deprecated in favor of torch.nn.Identity. "
+ "Please delete local *.pt file and re-download the latest model checkpoint."
+ )
+ self.yaml["backbone"][0][2] = "nn.Identity"
# Define model
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
@@ -386,10 +387,10 @@ def init_criterion(self):
class OBBModel(DetectionModel):
- """YOLOv8 Oriented Bounding Box (OBB) model."""
+ """YOLO Oriented Bounding Box (OBB) model."""
- def __init__(self, cfg="yolov8n-obb.yaml", ch=3, nc=None, verbose=True):
- """Initialize YOLOv8 OBB model with given config and parameters."""
+ def __init__(self, cfg="yolo11n-obb.yaml", ch=3, nc=None, verbose=True):
+ """Initialize YOLO OBB model with given config and parameters."""
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
@@ -398,9 +399,9 @@ def init_criterion(self):
class SegmentationModel(DetectionModel):
- """YOLOv8 segmentation model."""
+ """YOLO segmentation model."""
- def __init__(self, cfg="yolov8n-seg.yaml", ch=3, nc=None, verbose=True):
+ def __init__(self, cfg="yolo11n-seg.yaml", ch=3, nc=None, verbose=True):
"""Initialize YOLOv8 segmentation model with given config and parameters."""
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
@@ -410,9 +411,9 @@ def init_criterion(self):
class PoseModel(DetectionModel):
- """YOLOv8 pose model."""
+ """YOLO pose model."""
- def __init__(self, cfg="yolov8n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
+ def __init__(self, cfg="yolo11n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
"""Initialize YOLOv8 Pose model."""
if not isinstance(cfg, dict):
cfg = yaml_model_load(cfg) # load model YAML
@@ -427,9 +428,9 @@ def init_criterion(self):
class ClassificationModel(BaseModel):
- """YOLOv8 classification model."""
+ """YOLO classification model."""
- def __init__(self, cfg="yolov8n-cls.yaml", ch=3, nc=None, verbose=True):
+ def __init__(self, cfg="yolo11n-cls.yaml", ch=3, nc=None, verbose=True):
"""Init ClassificationModel with YAML, channels, number of classes, verbose flag."""
super().__init__()
self._from_yaml(cfg, ch, nc, verbose)
@@ -456,20 +457,22 @@ def reshape_outputs(model, nc):
name, m = list((model.model if hasattr(model, "model") else model).named_children())[-1] # last module
if isinstance(m, Classify): # YOLO Classify() head
if m.linear.out_features != nc:
- m.linear = nn.Linear(m.linear.in_features, nc)
- elif isinstance(m, nn.Linear): # ResNet, EfficientNet
+ m.linear = torch.nn.Linear(m.linear.in_features, nc)
+ elif isinstance(m, torch.nn.Linear): # ResNet, EfficientNet
if m.out_features != nc:
- setattr(model, name, nn.Linear(m.in_features, nc))
- elif isinstance(m, nn.Sequential):
+ setattr(model, name, torch.nn.Linear(m.in_features, nc))
+ elif isinstance(m, torch.nn.Sequential):
types = [type(x) for x in m]
- if nn.Linear in types:
- i = len(types) - 1 - types[::-1].index(nn.Linear) # last nn.Linear index
+ if torch.nn.Linear in types:
+ i = len(types) - 1 - types[::-1].index(torch.nn.Linear) # last torch.nn.Linear index
if m[i].out_features != nc:
- m[i] = nn.Linear(m[i].in_features, nc)
- elif nn.Conv2d in types:
- i = len(types) - 1 - types[::-1].index(nn.Conv2d) # last nn.Conv2d index
+ m[i] = torch.nn.Linear(m[i].in_features, nc)
+ elif torch.nn.Conv2d in types:
+ i = len(types) - 1 - types[::-1].index(torch.nn.Conv2d) # last torch.nn.Conv2d index
if m[i].out_channels != nc:
- m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
+ m[i] = torch.nn.Conv2d(
+ m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None
+ )
def init_criterion(self):
"""Initialize the loss criterion for the ClassificationModel."""
@@ -506,12 +509,6 @@ class RTDETRDetectionModel(DetectionModel):
the training and inference processes. RTDETR is an object detection and tracking model that extends from the
DetectionModel base class.
- Attributes:
- cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
- ch (int): Number of input channels. Default is 3 (RGB).
- nc (int, optional): Number of classes for object detection. Default is None.
- verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
-
Methods:
init_criterion: Initializes the criterion used for loss calculation.
loss: Computes and returns the loss during training.
@@ -607,7 +604,7 @@ def predict(self, x, profile=False, visualize=False, batch=None, augment=False,
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
if embed and m.i in embed:
- embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
+ embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
if m.i == max(embed):
return torch.unbind(torch.cat(embeddings, 1), dim=0)
head = self.model[-1]
@@ -683,7 +680,7 @@ def predict(self, x, profile=False, visualize=False, txt_feats=None, augment=Fal
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
if embed and m.i in embed:
- embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
+ embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
if m.i == max(embed):
return torch.unbind(torch.cat(embeddings, 1), dim=0)
return x
@@ -704,15 +701,7 @@ def loss(self, batch, preds=None):
return self.criterion(preds, batch)
-# NOTE: keep YOLOv10DetectionModel for compatibility with yolov10 pretrained weights.
-class YOLOv10DetectionModel(DetectionModel):
- """YOLOv10 Detection model."""
-
- def __init__(self, cfg="yolov10n.yaml", ch=3, nc=None, verbose=True, end2end=True):
- super().__init__(cfg, ch, nc, verbose, end2end)
-
-
-class Ensemble(nn.ModuleList):
+class Ensemble(torch.nn.ModuleList):
"""Ensemble of models."""
def __init__(self):
@@ -870,14 +859,14 @@ def torch_safe_load(weight, safe_only=False):
f"with https://github.com/ultralytics/yolov5.\nThis model is NOT forwards compatible with "
f"YOLOv8 at https://github.com/ultralytics/ultralytics."
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
- f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolov8n.pt'"
+ f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo11n.pt'"
)
) from e
LOGGER.warning(
f"WARNING โ ๏ธ {weight} appears to require '{e.name}', which is not in Ultralytics requirements."
f"\nAutoInstall will run now for '{e.name}' but this feature will be removed in the future."
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
- f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolov8n.pt'"
+ f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo11n.pt'"
)
check_requirements(e.name) # install missing module
ckpt = torch.load(file, map_location="cpu")
@@ -915,7 +904,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
for m in ensemble.modules():
if hasattr(m, "inplace"):
m.inplace = inplace
- elif isinstance(m, nn.Upsample) and not hasattr(m, "recompute_scale_factor"):
+ elif isinstance(m, torch.nn.Upsample) and not hasattr(m, "recompute_scale_factor"):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
# Return model
@@ -950,7 +939,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
for m in model.modules():
if hasattr(m, "inplace"):
m.inplace = inplace
- elif isinstance(m, nn.Upsample) and not hasattr(m, "recompute_scale_factor"):
+ elif isinstance(m, torch.nn.Upsample) and not hasattr(m, "recompute_scale_factor"):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
# Return model and ckpt
@@ -962,6 +951,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
import ast
# Args
+ legacy = True # backward compatibility for v3/v5/v8/v9 models
max_channels = float("inf")
nc, act, scales = (d.get(x) for x in ("nc", "activation", "scales"))
depth, width, kpt_shape = (d.get(x, 1.0) for x in ("depth_multiple", "width_multiple", "kpt_shape"))
@@ -973,7 +963,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
depth, width, max_channels = scales[scale]
if act:
- Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+ Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = torch.nn.SiLU()
if verbose:
LOGGER.info(f"{colorstr('activation:')} {act}") # print
@@ -981,15 +971,8 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10} {'module':<45}{'arguments':<30}")
ch = [ch]
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
- for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
- m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m] # get module
- for j, a in enumerate(args):
- if isinstance(a, str):
- with contextlib.suppress(ValueError):
- args[j] = locals()[a] if a in locals() else ast.literal_eval(a)
-
- n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain
- if m in {
+ base_modules = frozenset(
+ {
Classify,
Conv,
ConvTranspose,
@@ -1016,47 +999,65 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
C3,
C3TR,
C3Ghost,
- nn.ConvTranspose2d,
+ torch.nn.ConvTranspose2d,
DWConvTranspose2d,
C3x,
RepC3,
PSA,
SCDown,
C2fCIB,
- }:
+ }
+ )
+ repeat_modules = frozenset( # modules with 'repeat' arguments
+ {
+ BottleneckCSP,
+ C1,
+ C2,
+ C2f,
+ C3k2,
+ C2fAttn,
+ C3,
+ C3TR,
+ C3Ghost,
+ C3x,
+ RepC3,
+ C2fPSA,
+ C2fCIB,
+ C2PSA,
+ }
+ )
+ for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
+ m = (
+ getattr(torch.nn, m[3:])
+ if "nn." in m
+ else getattr(__import__("torchvision").ops, m[16:])
+ if "torchvision.ops." in m
+ else globals()[m]
+ ) # get module
+ for j, a in enumerate(args):
+ if isinstance(a, str):
+ with contextlib.suppress(ValueError):
+ args[j] = locals()[a] if a in locals() else ast.literal_eval(a)
+ n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain
+ if m in base_modules:
c1, c2 = ch[f], args[0]
if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
c2 = make_divisible(min(c2, max_channels) * width, 8)
- if m is C2fAttn:
- args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8) # embed channels
- args[2] = int(
- max(round(min(args[2], max_channels // 2 // 32)) * width, 1) if args[2] > 1 else args[2]
- ) # num heads
+ if m is C2fAttn: # set 1) embed channels and 2) num heads
+ args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8)
+ args[2] = int(max(round(min(args[2], max_channels // 2 // 32)) * width, 1) if args[2] > 1 else args[2])
args = [c1, c2, *args[1:]]
- if m in {
- BottleneckCSP,
- C1,
- C2,
- C2f,
- C3k2,
- C2fAttn,
- C3,
- C3TR,
- C3Ghost,
- C3x,
- RepC3,
- C2fPSA,
- C2fCIB,
- C2PSA,
- }:
+ if m in repeat_modules:
args.insert(2, n) # number of repeats
n = 1
- if m is C3k2 and scale in "mlx": # for M/L/X sizes
- args[3] = True
+ if m is C3k2: # for M/L/X sizes
+ legacy = False
+ if scale in "mlx":
+ args[3] = True
elif m is AIFI:
args = [ch[f], *args]
- elif m in {HGStem, HGBlock}:
+ elif m in frozenset({HGStem, HGBlock}):
c1, cm, c2 = ch[f], args[0], args[1]
args = [c1, cm, c2, *args[2:]]
if m is HGBlock:
@@ -1064,14 +1065,16 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
n = 1
elif m is ResNetLayer:
c2 = args[1] if args[3] else args[1] * 4
- elif m is nn.BatchNorm2d:
+ elif m is torch.nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
- elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
+ elif m in frozenset({Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}):
args.append([ch[x] for x in f])
if m is Segment:
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
+ if m in {Detect, Segment, Pose, OBB}:
+ m.legacy = legacy
elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1
args.insert(1, [ch[x] for x in f])
elif m in (Regress, Regress6):
@@ -1085,27 +1088,29 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
args = [c1, c2, *args[1:]]
elif m is CBFuse:
c2 = ch[f[-1]]
+ elif m in frozenset({TorchVision, Index}):
+ c2 = args[0]
+ c1 = ch[f]
+ args = [*args[1:]]
else:
c2 = ch[f]
- m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
+ m_ = torch.nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace("__main__.", "") # module type
- m.np = sum(x.numel() for x in m_.parameters()) # number params
+ m_.np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type
if verbose:
- LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}") # print
+ LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m_.np:10.0f} {t:<45}{str(args):<30}") # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
- return nn.Sequential(*layers), sorted(save)
+ return torch.nn.Sequential(*layers), sorted(save)
def yaml_model_load(path):
"""Load a YOLOv8 model from a YAML file."""
- import re
-
path = Path(path)
if path.stem in (f"yolov{d}{x}6" for x in "nsmlx" for d in (5, 8)):
new_stem = re.sub(r"(\d+)([nslmx])6(.+)?$", r"\1\2-p6\3", path.stem)
@@ -1132,11 +1137,10 @@ def guess_model_scale(model_path):
Returns:
(str): The size character of the model's scale, which can be n, s, m, l, or x.
"""
- with contextlib.suppress(AttributeError):
- import re
-
- return re.search(r"yolo[v]?\d+([nslmx])", Path(model_path).stem).group(1) # n, s, m, l, or x
- return ""
+ try:
+ return re.search(r"yolo[v]?\d+([nslmx])", Path(model_path).stem).group(1) # noqa, returns n, s, m, l, or x
+ except AttributeError:
+ return ""
def guess_model_task(model):
@@ -1144,7 +1148,7 @@ def guess_model_task(model):
Guess the task of a PyTorch model from its architecture or configuration.
Args:
- model (nn.Module | dict): PyTorch model or model configuration in YAML format.
+ model (torch.nn.Module | dict): PyTorch model or model configuration in YAML format.
Returns:
(str): Task of the model ('detect', 'segment', 'classify', 'pose', 'regress').
@@ -1173,16 +1177,14 @@ def cfg2task(cfg):
if isinstance(model, dict):
with contextlib.suppress(Exception):
return cfg2task(model)
-
# Guess from PyTorch model
- if isinstance(model, nn.Module): # PyTorch model
+ if isinstance(model, torch.nn.Module): # PyTorch model
for x in "model.args", "model.model.args", "model.model.model.args":
with contextlib.suppress(Exception):
return eval(x)["task"]
for x in "model.yaml", "model.model.yaml", "model.model.model.yaml":
with contextlib.suppress(Exception):
return cfg2task(eval(x))
-
for m in model.modules():
if isinstance(m, Segment):
return "segment"
diff --git a/ultralytics/solutions/__init__.py b/ultralytics/solutions/__init__.py
index 4446c1826ed..635cb3ad7e2 100644
--- a/ultralytics/solutions/__init__.py
+++ b/ultralytics/solutions/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .ai_gym import AIGym
from .analytics import Analytics
@@ -7,8 +7,11 @@
from .object_counter import ObjectCounter
from .parking_management import ParkingManagement, ParkingPtsSelection
from .queue_management import QueueManager
+from .region_counter import RegionCounter
+from .security_alarm import SecurityAlarm
from .speed_estimation import SpeedEstimator
-from .streamlit_inference import inference
+from .streamlit_inference import Inference
+from .trackzone import TrackZone
__all__ = (
"AIGym",
@@ -20,5 +23,8 @@
"QueueManager",
"SpeedEstimator",
"Analytics",
- "inference",
+ "Inference",
+ "RegionCounter",
+ "TrackZone",
+ "SecurityAlarm",
)
diff --git a/ultralytics/solutions/ai_gym.py b/ultralytics/solutions/ai_gym.py
index 349e46e8f08..fab84f16891 100644
--- a/ultralytics/solutions/ai_gym.py
+++ b/ultralytics/solutions/ai_gym.py
@@ -1,127 +1,111 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import cv2
-
-from ultralytics.utils.checks import check_imshow
+from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator
-class AIGym:
- """A class to manage the gym steps of people in a real-time video stream based on their poses."""
-
- def __init__(
- self,
- kpts_to_check,
- line_thickness=2,
- view_img=False,
- pose_up_angle=145.0,
- pose_down_angle=90.0,
- pose_type="pullup",
- ):
+class AIGym(BaseSolution):
+ """
+ A class to manage gym steps of people in a real-time video stream based on their poses.
+
+ This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
+ repetitions of exercises based on predefined angle thresholds for up and down positions.
+
+ Attributes:
+ count (List[int]): Repetition counts for each detected person.
+ angle (List[float]): Current angle of the tracked body part for each person.
+ stage (List[str]): Current exercise stage ('up', 'down', or '-') for each person.
+ initial_stage (str | None): Initial stage of the exercise.
+ up_angle (float): Angle threshold for considering the 'up' position of an exercise.
+ down_angle (float): Angle threshold for considering the 'down' position of an exercise.
+ kpts (List[int]): Indices of keypoints used for angle calculation.
+ annotator (Annotator): Object for drawing annotations on the image.
+
+ Methods:
+ monitor: Processes a frame to detect poses, calculate angles, and count repetitions.
+
+ Examples:
+ >>> gym = AIGym(model="yolo11n-pose.pt")
+ >>> image = cv2.imread("gym_scene.jpg")
+ >>> processed_image = gym.monitor(image)
+ >>> cv2.imshow("Processed Image", processed_image)
+ >>> cv2.waitKey(0)
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes AIGym for workout monitoring using pose estimation and predefined angles."""
+ # Check if the model name ends with '-pose'
+ if "model" in kwargs and "-pose" not in kwargs["model"]:
+ kwargs["model"] = "yolo11n-pose.pt"
+ elif "model" not in kwargs:
+ kwargs["model"] = "yolo11n-pose.pt"
+
+ super().__init__(**kwargs)
+ self.count = [] # List for counts, necessary where there are multiple objects in frame
+ self.angle = [] # List for angle, necessary where there are multiple objects in frame
+ self.stage = [] # List for stage, necessary where there are multiple objects in frame
+
+ # Extract details from CFG single time for usage later
+ self.initial_stage = None
+ self.up_angle = float(self.CFG["up_angle"]) # Pose up predefined angle to consider up pose
+ self.down_angle = float(self.CFG["down_angle"]) # Pose down predefined angle to consider down pose
+ self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage
+
+ def monitor(self, im0):
"""
- Initializes the AIGym class with the specified parameters.
+ Monitors workouts using Ultralytics YOLO Pose Model.
- Args:
- kpts_to_check (list): Indices of keypoints to check.
- line_thickness (int, optional): Thickness of the lines drawn. Defaults to 2.
- view_img (bool, optional): Flag to display the image. Defaults to False.
- pose_up_angle (float, optional): Angle threshold for the 'up' pose. Defaults to 145.0.
- pose_down_angle (float, optional): Angle threshold for the 'down' pose. Defaults to 90.0.
- pose_type (str, optional): Type of pose to detect ('pullup', 'pushup', 'abworkout'). Defaults to "pullup".
- """
- # Image and line thickness
- self.im0 = None
- self.tf = line_thickness
-
- # Keypoints and count information
- self.keypoints = None
- self.poseup_angle = pose_up_angle
- self.posedown_angle = pose_down_angle
- self.threshold = 0.001
-
- # Store stage, count and angle information
- self.angle = None
- self.count = None
- self.stage = None
- self.pose_type = pose_type
- self.kpts_to_check = kpts_to_check
-
- # Visual Information
- self.view_img = view_img
- self.annotator = None
-
- # Check if environment supports imshow
- self.env_check = check_imshow(warn=True)
- self.count = []
- self.angle = []
- self.stage = []
-
- def start_counting(self, im0, results):
- """
- Function used to count the gym steps.
+ This function processes an input image to track and analyze human poses for workout monitoring. It uses
+ the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
+ angle thresholds.
Args:
- im0 (ndarray): Current frame from the video stream.
- results (list): Pose estimation data.
- """
- self.im0 = im0
-
- if not len(results[0]):
- return self.im0
-
- if len(results[0]) > len(self.count):
- new_human = len(results[0]) - len(self.count)
- self.count += [0] * new_human
- self.angle += [0] * new_human
- self.stage += ["-"] * new_human
-
- self.keypoints = results[0].keypoints.data
- self.annotator = Annotator(im0, line_width=self.tf)
-
- for ind, k in enumerate(reversed(self.keypoints)):
- # Estimate angle and draw specific points based on pose type
- if self.pose_type in {"pushup", "pullup", "abworkout", "squat"}:
- self.angle[ind] = self.annotator.estimate_pose_angle(
- k[int(self.kpts_to_check[0])].cpu(),
- k[int(self.kpts_to_check[1])].cpu(),
- k[int(self.kpts_to_check[2])].cpu(),
- )
- self.im0 = self.annotator.draw_specific_points(k, self.kpts_to_check, shape=(640, 640), radius=10)
-
- # Check and update pose stages and counts based on angle
- if self.pose_type in {"abworkout", "pullup"}:
- if self.angle[ind] > self.poseup_angle:
- self.stage[ind] = "down"
- if self.angle[ind] < self.posedown_angle and self.stage[ind] == "down":
- self.stage[ind] = "up"
- self.count[ind] += 1
+ im0 (ndarray): Input image for processing.
+
+ Returns:
+ (ndarray): Processed image with annotations for workout monitoring.
- elif self.pose_type in {"pushup", "squat"}:
- if self.angle[ind] > self.poseup_angle:
- self.stage[ind] = "up"
- if self.angle[ind] < self.posedown_angle and self.stage[ind] == "up":
- self.stage[ind] = "down"
+ Examples:
+ >>> gym = AIGym()
+ >>> image = cv2.imread("workout.jpg")
+ >>> processed_image = gym.monitor(image)
+ """
+ # Extract tracks
+ tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)[0]
+
+ if tracks.boxes.id is not None:
+ # Extract and check keypoints
+ if len(tracks) > len(self.count):
+ new_human = len(tracks) - len(self.count)
+ self.angle += [0] * new_human
+ self.count += [0] * new_human
+ self.stage += ["-"] * new_human
+
+ # Initialize annotator
+ self.annotator = Annotator(im0, line_width=self.line_width)
+
+ # Enumerate over keypoints
+ for ind, k in enumerate(reversed(tracks.keypoints.data)):
+ # Get keypoints and estimate the angle
+ kpts = [k[int(self.kpts[i])].cpu() for i in range(3)]
+ self.angle[ind] = self.annotator.estimate_pose_angle(*kpts)
+ im0 = self.annotator.draw_specific_points(k, self.kpts, radius=self.line_width * 3)
+
+ # Determine stage and count logic based on angle thresholds
+ if self.angle[ind] < self.down_angle:
+ if self.stage[ind] == "up":
self.count[ind] += 1
+ self.stage[ind] = "down"
+ elif self.angle[ind] > self.up_angle:
+ self.stage[ind] = "up"
+ # Display angle, count, and stage text
self.annotator.plot_angle_and_count_and_stage(
- angle_text=self.angle[ind],
- count_text=self.count[ind],
- stage_text=self.stage[ind],
- center_kpt=k[int(self.kpts_to_check[1])],
+ angle_text=self.angle[ind], # angle text for display
+ count_text=self.count[ind], # count text for workouts
+ stage_text=self.stage[ind], # stage position text
+ center_kpt=k[int(self.kpts[1])], # center keypoint for display
)
- # Draw keypoints
- self.annotator.kpts(k, shape=(640, 640), radius=1, kpt_line=True)
-
- # Display the image if environment supports it and view_img is True
- if self.env_check and self.view_img:
- cv2.imshow("Ultralytics YOLOv8 AI GYM", self.im0)
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
-
- return self.im0
-
-
-if __name__ == "__main__":
- kpts_to_check = [0, 1, 2] # example keypoints
- aigym = AIGym(kpts_to_check)
+ self.display_output(im0) # Display output image, if environment support display
+ return im0 # return an image for writing or further usage
diff --git a/ultralytics/solutions/analytics.py b/ultralytics/solutions/analytics.py
index c2990097786..3a62e8c2e68 100644
--- a/ultralytics/solutions/analytics.py
+++ b/ultralytics/solutions/analytics.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import warnings
from itertools import cycle
import cv2
@@ -9,299 +8,240 @@
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
-
-class Analytics:
- """A class to create and update various types of charts (line, bar, pie, area) for visual analytics."""
-
- def __init__(
- self,
- type,
- writer,
- im0_shape,
- title="ultralytics",
- x_label="x",
- y_label="y",
- bg_color="white",
- fg_color="black",
- line_color="yellow",
- line_width=2,
- points_width=10,
- fontsize=13,
- view_img=False,
- save_img=True,
- max_points=50,
- ):
- """
- Initialize the Analytics class with various chart types.
-
- Args:
- type (str): Type of chart to initialize ('line', 'bar', 'pie', or 'area').
- writer (object): Video writer object to save the frames.
- im0_shape (tuple): Shape of the input image (width, height).
- title (str): Title of the chart.
- x_label (str): Label for the x-axis.
- y_label (str): Label for the y-axis.
- bg_color (str): Background color of the chart.
- fg_color (str): Foreground (text) color of the chart.
- line_color (str): Line color for line charts.
- line_width (int): Width of the lines in line charts.
- points_width (int): Width of line points highlighter
- fontsize (int): Font size for chart text.
- view_img (bool): Whether to display the image.
- save_img (bool): Whether to save the image.
- max_points (int): Specifies when to remove the oldest points in a graph for multiple lines.
- """
- self.bg_color = bg_color
- self.fg_color = fg_color
- self.view_img = view_img
- self.save_img = save_img
- self.title = title
- self.writer = writer
- self.max_points = max_points
- self.line_color = line_color
- self.x_label = x_label
- self.y_label = y_label
- self.points_width = points_width
- self.line_width = line_width
- self.fontsize = fontsize
-
- # Set figure size based on image shape
- figsize = (im0_shape[0] / 100, im0_shape[1] / 100)
-
- if type in {"line", "area"}:
- # Initialize line or area plot
+from ultralytics.solutions.solutions import BaseSolution # Import a parent class
+
+
+class Analytics(BaseSolution):
+ """
+ A class for creating and updating various types of charts for visual analytics.
+
+ This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts
+ based on object detection and tracking data.
+
+ Attributes:
+ type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area').
+ x_label (str): Label for the x-axis.
+ y_label (str): Label for the y-axis.
+ bg_color (str): Background color of the chart frame.
+ fg_color (str): Foreground color of the chart frame.
+ title (str): Title of the chart window.
+ max_points (int): Maximum number of data points to display on the chart.
+ fontsize (int): Font size for text display.
+ color_cycle (cycle): Cyclic iterator for chart colors.
+ total_counts (int): Total count of detected objects (used for line charts).
+ clswise_count (Dict[str, int]): Dictionary for class-wise object counts.
+ fig (Figure): Matplotlib figure object for the chart.
+ ax (Axes): Matplotlib axes object for the chart.
+ canvas (FigureCanvas): Canvas for rendering the chart.
+
+ Methods:
+ process_data: Processes image data and updates the chart.
+ update_graph: Updates the chart with new data points.
+
+ Examples:
+ >>> analytics = Analytics(analytics_type="line")
+ >>> frame = cv2.imread("image.jpg")
+ >>> processed_frame = analytics.process_data(frame, frame_number=1)
+ >>> cv2.imshow("Analytics", processed_frame)
+ """
+
+ def __init__(self, **kwargs):
+ """Initialize Analytics class with various chart types for visual data representation."""
+ super().__init__(**kwargs)
+
+ self.type = self.CFG["analytics_type"] # extract type of analytics
+ self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
+ self.y_label = "Total Counts"
+
+ # Predefined data
+ self.bg_color = "#F3F3F3" # background color of frame
+ self.fg_color = "#111E68" # foreground color of frame
+ self.title = "Ultralytics Solutions" # window name
+ self.max_points = 45 # maximum points to be drawn on window
+ self.fontsize = 25 # text font size for display
+ figsize = (19.2, 10.8) # Set output image size 1920 * 1080
+ self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
+
+ self.total_counts = 0 # count variable for storing total counts i.e. for line
+ self.clswise_count = {} # dictionary for class-wise counts
+
+ # Ensure line and area chart
+ if self.type in {"line", "area"}:
self.lines = {}
self.fig = Figure(facecolor=self.bg_color, figsize=figsize)
- self.canvas = FigureCanvas(self.fig)
+ self.canvas = FigureCanvas(self.fig) # Set common axis properties
self.ax = self.fig.add_subplot(111, facecolor=self.bg_color)
- if type == "line":
- (self.line,) = self.ax.plot([], [], color=self.line_color, linewidth=self.line_width)
-
- elif type in {"bar", "pie"}:
+ if self.type == "line":
+ (self.line,) = self.ax.plot([], [], color="cyan", linewidth=self.line_width)
+ elif self.type in {"bar", "pie"}:
# Initialize bar or pie plot
self.fig, self.ax = plt.subplots(figsize=figsize, facecolor=self.bg_color)
+ self.canvas = FigureCanvas(self.fig) # Set common axis properties
self.ax.set_facecolor(self.bg_color)
- color_palette = [
- (31, 119, 180),
- (255, 127, 14),
- (44, 160, 44),
- (214, 39, 40),
- (148, 103, 189),
- (140, 86, 75),
- (227, 119, 194),
- (127, 127, 127),
- (188, 189, 34),
- (23, 190, 207),
- ]
- self.color_palette = [(r / 255, g / 255, b / 255, 1) for r, g, b in color_palette]
- self.color_cycle = cycle(self.color_palette)
self.color_mapping = {}
- # Ensure pie chart is circular
- self.ax.axis("equal") if type == "pie" else None
-
- # Set common axis properties
- self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize)
- self.ax.set_xlabel(x_label, color=self.fg_color, fontsize=self.fontsize - 3)
- self.ax.set_ylabel(y_label, color=self.fg_color, fontsize=self.fontsize - 3)
- self.ax.tick_params(axis="both", colors=self.fg_color)
+ if self.type == "pie": # Ensure pie chart is circular
+ self.ax.axis("equal")
- def update_area(self, frame_number, counts_dict):
+ def process_data(self, im0, frame_number):
"""
- Update the area graph with new data for multiple classes.
+ Processes image data and runs object tracking to update analytics charts.
Args:
- frame_number (int): The current frame number.
- counts_dict (dict): Dictionary with class names as keys and counts as values.
- """
- x_data = np.array([])
- y_data_dict = {key: np.array([]) for key in counts_dict.keys()}
-
- if self.ax.lines:
- x_data = self.ax.lines[0].get_xdata()
- for line, key in zip(self.ax.lines, counts_dict.keys()):
- y_data_dict[key] = line.get_ydata()
-
- x_data = np.append(x_data, float(frame_number))
- max_length = len(x_data)
-
- for key in counts_dict.keys():
- y_data_dict[key] = np.append(y_data_dict[key], float(counts_dict[key]))
- if len(y_data_dict[key]) < max_length:
- y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])), "constant")
-
- # Remove the oldest points if the number of points exceeds max_points
- if len(x_data) > self.max_points:
- x_data = x_data[1:]
- for key in counts_dict.keys():
- y_data_dict[key] = y_data_dict[key][1:]
-
- self.ax.clear()
-
- colors = ["#E1FF25", "#0BDBEB", "#FF64DA", "#111F68", "#042AFF"]
- color_cycle = cycle(colors)
-
- for key, y_data in y_data_dict.items():
- color = next(color_cycle)
- self.ax.fill_between(x_data, y_data, color=color, alpha=0.6)
- self.ax.plot(
- x_data,
- y_data,
- color=color,
- linewidth=self.line_width,
- marker="o",
- markersize=self.points_width,
- label=f"{key} Data Points",
- )
+ im0 (np.ndarray): Input image for processing.
+ frame_number (int): Video frame number for plotting the data.
- self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize)
- self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3)
- self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3)
- legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.fg_color)
+ Returns:
+ (np.ndarray): Processed image with updated analytics chart.
- # Set legend text color
- for text in legend.get_texts():
- text.set_color(self.fg_color)
-
- self.canvas.draw()
- im0 = np.array(self.canvas.renderer.buffer_rgba())
- self.write_and_display(im0)
+ Raises:
+ ModuleNotFoundError: If an unsupported chart type is specified.
- def update_line(self, frame_number, total_counts):
+ Examples:
+ >>> analytics = Analytics(analytics_type="line")
+ >>> frame = np.zeros((480, 640, 3), dtype=np.uint8)
+ >>> processed_frame = analytics.process_data(frame, frame_number=1)
"""
- Update the line graph with new data.
-
- Args:
- frame_number (int): The current frame number.
- total_counts (int): The total counts to plot.
- """
- # Update line graph data
- x_data = self.line.get_xdata()
- y_data = self.line.get_ydata()
- x_data = np.append(x_data, float(frame_number))
- y_data = np.append(y_data, float(total_counts))
- self.line.set_data(x_data, y_data)
- self.ax.relim()
- self.ax.autoscale_view()
- self.canvas.draw()
- im0 = np.array(self.canvas.renderer.buffer_rgba())
- self.write_and_display(im0)
-
- def update_multiple_lines(self, counts_dict, labels_list, frame_number):
+ self.extract_tracks(im0) # Extract tracks
+
+ if self.type == "line":
+ for _ in self.boxes:
+ self.total_counts += 1
+ im0 = self.update_graph(frame_number=frame_number)
+ self.total_counts = 0
+ elif self.type in {"pie", "bar", "area"}:
+ self.clswise_count = {}
+ for box, cls in zip(self.boxes, self.clss):
+ if self.names[int(cls)] in self.clswise_count:
+ self.clswise_count[self.names[int(cls)]] += 1
+ else:
+ self.clswise_count[self.names[int(cls)]] = 1
+ im0 = self.update_graph(frame_number=frame_number, count_dict=self.clswise_count, plot=self.type)
+ else:
+ raise ModuleNotFoundError(f"{self.type} chart is not supported โ")
+ return im0
+
+ def update_graph(self, frame_number, count_dict=None, plot="line"):
"""
- Update the line graph with multiple classes.
+ Updates the graph with new data for single or multiple classes.
Args:
- counts_dict (int): Dictionary include each class counts.
- labels_list (int): list include each classes names.
frame_number (int): The current frame number.
+ count_dict (Dict[str, int] | None): Dictionary with class names as keys and counts as values for multiple
+ classes. If None, updates a single line graph.
+ plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
+
+ Returns:
+ (np.ndarray): Updated image containing the graph.
+
+ Examples:
+ >>> analytics = Analytics()
+ >>> frame_number = 10
+ >>> count_dict = {"person": 5, "car": 3}
+ >>> updated_image = analytics.update_graph(frame_number, count_dict, plot="bar")
"""
- warnings.warn("Display is not supported for multiple lines, output will be stored normally!")
- for obj in labels_list:
- if obj not in self.lines:
- (line,) = self.ax.plot([], [], label=obj, marker="o", markersize=self.points_width)
- self.lines[obj] = line
-
- x_data = self.lines[obj].get_xdata()
- y_data = self.lines[obj].get_ydata()
-
- # Remove the initial point if the number of points exceeds max_points
- if len(x_data) >= self.max_points:
- x_data = np.delete(x_data, 0)
- y_data = np.delete(y_data, 0)
+ if count_dict is None:
+ # Single line update
+ x_data = np.append(self.line.get_xdata(), float(frame_number))
+ y_data = np.append(self.line.get_ydata(), float(self.total_counts))
+
+ if len(x_data) > self.max_points:
+ x_data, y_data = x_data[-self.max_points :], y_data[-self.max_points :]
+
+ self.line.set_data(x_data, y_data)
+ self.line.set_label("Counts")
+ self.line.set_color("#7b0068") # Pink color
+ self.line.set_marker("*")
+ self.line.set_markersize(self.line_width * 5)
+ else:
+ labels = list(count_dict.keys())
+ counts = list(count_dict.values())
+ if plot == "area":
+ color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
+ # Multiple lines or area update
+ x_data = self.ax.lines[0].get_xdata() if self.ax.lines else np.array([])
+ y_data_dict = {key: np.array([]) for key in count_dict.keys()}
+ if self.ax.lines:
+ for line, key in zip(self.ax.lines, count_dict.keys()):
+ y_data_dict[key] = line.get_ydata()
+
+ x_data = np.append(x_data, float(frame_number))
+ max_length = len(x_data)
+ for key in count_dict.keys():
+ y_data_dict[key] = np.append(y_data_dict[key], float(count_dict[key]))
+ if len(y_data_dict[key]) < max_length:
+ y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])))
+ if len(x_data) > self.max_points:
+ x_data = x_data[1:]
+ for key in count_dict.keys():
+ y_data_dict[key] = y_data_dict[key][1:]
+
+ self.ax.clear()
+ for key, y_data in y_data_dict.items():
+ color = next(color_cycle)
+ self.ax.fill_between(x_data, y_data, color=color, alpha=0.7)
+ self.ax.plot(
+ x_data,
+ y_data,
+ color=color,
+ linewidth=self.line_width,
+ marker="o",
+ markersize=self.line_width * 5,
+ label=f"{key} Data Points",
+ )
+ if plot == "bar":
+ self.ax.clear() # clear bar data
+ for label in labels: # Map labels to colors
+ if label not in self.color_mapping:
+ self.color_mapping[label] = next(self.color_cycle)
+ colors = [self.color_mapping[label] for label in labels]
+ bars = self.ax.bar(labels, counts, color=colors)
+ for bar, count in zip(bars, counts):
+ self.ax.text(
+ bar.get_x() + bar.get_width() / 2,
+ bar.get_height(),
+ str(count),
+ ha="center",
+ va="bottom",
+ color=self.fg_color,
+ )
+ # Create the legend using labels from the bars
+ for bar, label in zip(bars, labels):
+ bar.set_label(label) # Assign label to each bar
+ self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
+ if plot == "pie":
+ total = sum(counts)
+ percentages = [size / total * 100 for size in counts]
+ start_angle = 90
+ self.ax.clear()
+
+ # Create pie chart and create legend labels with percentages
+ wedges, autotexts = self.ax.pie(
+ counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None
+ )
+ legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)]
+
+ # Assign the legend using the wedges and manually created labels
+ self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
+ self.fig.subplots_adjust(left=0.1, right=0.75) # Adjust layout to fit the legend
+
+ # Common plot settings
+ self.ax.set_facecolor("#f0f0f0") # Set to light gray or any other color you like
+ self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize)
+ self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3)
+ self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3)
- x_data = np.append(x_data, float(frame_number)) # Ensure frame_number is converted to float
- y_data = np.append(y_data, float(counts_dict.get(obj, 0))) # Ensure total_count is converted to float
- self.lines[obj].set_data(x_data, y_data)
+ # Add and format legend
+ legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.bg_color)
+ for text in legend.get_texts():
+ text.set_color(self.fg_color)
+ # Redraw graph, update view, capture, and display the updated plot
self.ax.relim()
self.ax.autoscale_view()
- self.ax.legend()
self.canvas.draw()
-
im0 = np.array(self.canvas.renderer.buffer_rgba())
- self.view_img = False # for multiple line view_img not supported yet, coming soon!
- self.write_and_display(im0)
-
- def write_and_display(self, im0):
- """
- Write and display the line graph
- Args:
- im0 (ndarray): Image for processing.
- """
im0 = cv2.cvtColor(im0[:, :, :3], cv2.COLOR_RGBA2BGR)
- cv2.imshow(self.title, im0) if self.view_img else None
- self.writer.write(im0) if self.save_img else None
-
- def update_bar(self, count_dict):
- """
- Update the bar graph with new data.
-
- Args:
- count_dict (dict): Dictionary containing the count data to plot.
- """
- # Update bar graph data
- self.ax.clear()
- self.ax.set_facecolor(self.bg_color)
- labels = list(count_dict.keys())
- counts = list(count_dict.values())
-
- # Map labels to colors
- for label in labels:
- if label not in self.color_mapping:
- self.color_mapping[label] = next(self.color_cycle)
-
- colors = [self.color_mapping[label] for label in labels]
-
- bars = self.ax.bar(labels, counts, color=colors)
- for bar, count in zip(bars, counts):
- self.ax.text(
- bar.get_x() + bar.get_width() / 2,
- bar.get_height(),
- str(count),
- ha="center",
- va="bottom",
- color=self.fg_color,
- )
-
- # Display and save the updated graph
- canvas = FigureCanvas(self.fig)
- canvas.draw()
- buf = canvas.buffer_rgba()
- im0 = np.asarray(buf)
- self.write_and_display(im0)
-
- def update_pie(self, classes_dict):
- """
- Update the pie chart with new data.
-
- Args:
- classes_dict (dict): Dictionary containing the class data to plot.
- """
- # Update pie chart data
- labels = list(classes_dict.keys())
- sizes = list(classes_dict.values())
- total = sum(sizes)
- percentages = [size / total * 100 for size in sizes]
- start_angle = 90
- self.ax.clear()
-
- # Create pie chart without labels inside the slices
- wedges, autotexts = self.ax.pie(sizes, autopct=None, startangle=start_angle, textprops={"color": self.fg_color})
-
- # Construct legend labels with percentages
- legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)]
- self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
-
- # Adjust layout to fit the legend
- self.fig.tight_layout()
- self.fig.subplots_adjust(left=0.1, right=0.75)
-
- # Display and save the updated chart
- im0 = self.fig.canvas.draw()
- im0 = np.array(self.fig.canvas.renderer.buffer_rgba())
- self.write_and_display(im0)
-
+ self.display_output(im0)
-if __name__ == "__main__":
- Analytics("line", writer=None, im0_shape=None)
+ return im0 # Return the image
diff --git a/ultralytics/solutions/distance_calculation.py b/ultralytics/solutions/distance_calculation.py
index dccd1687c60..c0d8e77b371 100644
--- a/ultralytics/solutions/distance_calculation.py
+++ b/ultralytics/solutions/distance_calculation.py
@@ -1,73 +1,71 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import math
import cv2
-from ultralytics.utils.checks import check_imshow
+from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator, colors
-class DistanceCalculation:
- """A class to calculate distance between two objects in a real-time video stream based on their tracks."""
+class DistanceCalculation(BaseSolution):
+ """
+ A class to calculate distance between two objects in a real-time video stream based on their tracks.
- def __init__(
- self,
- names,
- view_img=False,
- line_thickness=2,
- line_color=(255, 0, 255),
- centroid_color=(104, 31, 17),
- ):
- """
- Initializes the DistanceCalculation class with the given parameters.
+ This class extends BaseSolution to provide functionality for selecting objects and calculating the distance
+ between them in a video stream using YOLO object detection and tracking.
- Args:
- names (dict): Dictionary of classes names.
- view_img (bool, optional): Flag to indicate if the video stream should be displayed. Defaults to False.
- line_thickness (int, optional): Thickness of the lines drawn on the image. Defaults to 2.
- line_color (tuple, optional): Color of the lines drawn on the image (BGR format). Defaults to (255, 255, 0).
- centroid_color (tuple, optional): Color of the centroids drawn (BGR format). Defaults to (255, 0, 255).
- """
- # Visual & image information
- self.im0 = None
- self.annotator = None
- self.view_img = view_img
- self.line_color = line_color
- self.centroid_color = centroid_color
-
- # Prediction & tracking information
- self.names = names
- self.boxes = None
- self.line_thickness = line_thickness
- self.trk_ids = None
-
- # Distance calculation information
- self.centroids = []
+ Attributes:
+ left_mouse_count (int): Counter for left mouse button clicks.
+ selected_boxes (Dict[int, List[float]]): Dictionary to store selected bounding boxes and their track IDs.
+ annotator (Annotator): An instance of the Annotator class for drawing on the image.
+ boxes (List[List[float]]): List of bounding boxes for detected objects.
+ track_ids (List[int]): List of track IDs for detected objects.
+ clss (List[int]): List of class indices for detected objects.
+ names (List[str]): List of class names that the model can detect.
+ centroids (List[List[int]]): List to store centroids of selected bounding boxes.
+
+ Methods:
+ mouse_event_for_distance: Handles mouse events for selecting objects in the video stream.
+ calculate: Processes video frames and calculates the distance between selected objects.
+
+ Examples:
+ >>> distance_calc = DistanceCalculation()
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = distance_calc.calculate(frame)
+ >>> cv2.imshow("Distance Calculation", processed_frame)
+ >>> cv2.waitKey(0)
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes the DistanceCalculation class for measuring object distances in video streams."""
+ super().__init__(**kwargs)
# Mouse event information
self.left_mouse_count = 0
self.selected_boxes = {}
- # Check if environment supports imshow
- self.env_check = check_imshow(warn=True)
- self.window_name = "Ultralytics Solutions"
+ self.centroids = [] # Initialize empty list to store centroids
def mouse_event_for_distance(self, event, x, y, flags, param):
"""
- Handles mouse events to select regions in a real-time video stream.
+ Handles mouse events to select regions in a real-time video stream for distance calculation.
Args:
- event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.).
+ event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN).
x (int): X-coordinate of the mouse pointer.
y (int): Y-coordinate of the mouse pointer.
- flags (int): Flags associated with the event (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY, etc.).
- param (dict): Additional parameters passed to the function.
+ flags (int): Flags associated with the event (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY).
+ param (Dict): Additional parameters passed to the function.
+
+ Examples:
+ >>> # Assuming 'dc' is an instance of DistanceCalculation
+ >>> cv2.setMouseCallback("window_name", dc.mouse_event_for_distance)
"""
if event == cv2.EVENT_LBUTTONDOWN:
self.left_mouse_count += 1
if self.left_mouse_count <= 2:
- for box, track_id in zip(self.boxes, self.trk_ids):
+ for box, track_id in zip(self.boxes, self.track_ids):
if box[0] < x < box[2] and box[1] < y < box[3] and track_id not in self.selected_boxes:
self.selected_boxes[track_id] = box
@@ -75,30 +73,31 @@ def mouse_event_for_distance(self, event, x, y, flags, param):
self.selected_boxes = {}
self.left_mouse_count = 0
- def start_process(self, im0, tracks):
+ def calculate(self, im0):
"""
- Processes the video frame and calculates the distance between two bounding boxes.
+ Processes a video frame and calculates the distance between two selected bounding boxes.
+
+ This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance
+ between two user-selected objects if they have been chosen.
Args:
- im0 (ndarray): The image frame.
- tracks (list): List of tracks obtained from the object tracking process.
+ im0 (numpy.ndarray): The input image frame to process.
Returns:
- (ndarray): The processed image frame.
+ (numpy.ndarray): The processed image frame with annotations and distance calculations.
+
+ Examples:
+ >>> import numpy as np
+ >>> from ultralytics.solutions import DistanceCalculation
+ >>> dc = DistanceCalculation()
+ >>> frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
+ >>> processed_frame = dc.calculate(frame)
"""
- self.im0 = im0
- if tracks[0].boxes.id is None:
- if self.view_img:
- self.display_frames()
- return im0
-
- self.boxes = tracks[0].boxes.xyxy.cpu()
- clss = tracks[0].boxes.cls.cpu().tolist()
- self.trk_ids = tracks[0].boxes.id.int().cpu().tolist()
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ self.extract_tracks(im0) # Extract tracks
- self.annotator = Annotator(self.im0, line_width=self.line_thickness)
-
- for box, cls, track_id in zip(self.boxes, clss, self.trk_ids):
+ # Iterate over bounding boxes, track ids and classes index
+ for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
self.annotator.box_label(box, color=colors(int(cls), True), label=self.names[int(cls)])
if len(self.selected_boxes) == 2:
@@ -115,25 +114,11 @@ def start_process(self, im0, tracks):
pixels_distance = math.sqrt(
(self.centroids[0][0] - self.centroids[1][0]) ** 2 + (self.centroids[0][1] - self.centroids[1][1]) ** 2
)
- self.annotator.plot_distance_and_line(pixels_distance, self.centroids, self.line_color, self.centroid_color)
+ self.annotator.plot_distance_and_line(pixels_distance, self.centroids)
self.centroids = []
- if self.view_img and self.env_check:
- self.display_frames()
-
- return im0
-
- def display_frames(self):
- """Displays the current frame with annotations."""
- cv2.namedWindow(self.window_name)
- cv2.setMouseCallback(self.window_name, self.mouse_event_for_distance)
- cv2.imshow(self.window_name, self.im0)
-
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
-
+ self.display_output(im0) # display output with base class function
+ cv2.setMouseCallback("Ultralytics Solutions", self.mouse_event_for_distance)
-if __name__ == "__main__":
- names = {0: "person", 1: "car"} # example class names
- distance_calculation = DistanceCalculation(names)
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/heatmap.py b/ultralytics/solutions/heatmap.py
index 728b167bc86..c0ee1494959 100644
--- a/ultralytics/solutions/heatmap.py
+++ b/ultralytics/solutions/heatmap.py
@@ -1,259 +1,127 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-from collections import defaultdict
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import cv2
import numpy as np
-from ultralytics.utils.checks import check_imshow, check_requirements
+from ultralytics.solutions.object_counter import ObjectCounter
from ultralytics.utils.plotting import Annotator
-check_requirements("shapely>=2.0.0")
-from shapely.geometry import LineString, Point, Polygon
+class Heatmap(ObjectCounter):
+ """
+ A class to draw heatmaps in real-time video streams based on object tracks.
+
+ This class extends the ObjectCounter class to generate and visualize heatmaps of object movements in video
+ streams. It uses tracked object positions to create a cumulative heatmap effect over time.
+ Attributes:
+ initialized (bool): Flag indicating whether the heatmap has been initialized.
+ colormap (int): OpenCV colormap used for heatmap visualization.
+ heatmap (np.ndarray): Array storing the cumulative heatmap data.
+ annotator (Annotator): Object for drawing annotations on the image.
-class Heatmap:
- """A class to draw heatmaps in real-time video stream based on their tracks."""
+ Methods:
+ heatmap_effect: Calculates and updates the heatmap effect for a given bounding box.
+ generate_heatmap: Generates and applies the heatmap effect to each frame.
- def __init__(
- self,
- names,
- imw=0,
- imh=0,
- colormap=cv2.COLORMAP_JET,
- heatmap_alpha=0.5,
- view_img=False,
- view_in_counts=True,
- view_out_counts=True,
- count_reg_pts=None,
- count_txt_color=(0, 0, 0),
- count_bg_color=(255, 255, 255),
- count_reg_color=(255, 0, 255),
- region_thickness=5,
- line_dist_thresh=15,
- line_thickness=2,
- decay_factor=0.99,
- shape="circle",
- ):
- """Initializes the heatmap class with default values for Visual, Image, track, count and heatmap parameters."""
- # Visual information
- self.annotator = None
- self.view_img = view_img
- self.shape = shape
+ Examples:
+ >>> from ultralytics.solutions import Heatmap
+ >>> heatmap = Heatmap(model="yolo11n.pt", colormap=cv2.COLORMAP_JET)
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = heatmap.generate_heatmap(frame)
+ """
- self.initialized = False
- self.names = names # Classes names
+ def __init__(self, **kwargs):
+ """Initializes the Heatmap class for real-time video stream heatmap generation based on object tracks."""
+ super().__init__(**kwargs)
- # Image information
- self.imw = imw
- self.imh = imh
- self.im0 = None
- self.tf = line_thickness
- self.view_in_counts = view_in_counts
- self.view_out_counts = view_out_counts
+ self.initialized = False # bool variable for heatmap initialization
+ if self.region is not None: # check if user provided the region coordinates
+ self.initialize_region()
- # Heatmap colormap and heatmap np array
- self.colormap = colormap
+ # store colormap
+ self.colormap = cv2.COLORMAP_PARULA if self.CFG["colormap"] is None else self.CFG["colormap"]
self.heatmap = None
- self.heatmap_alpha = heatmap_alpha
- # Predict/track information
- self.boxes = []
- self.track_ids = []
- self.clss = []
- self.track_history = defaultdict(list)
+ def heatmap_effect(self, box):
+ """
+ Efficiently calculates heatmap area and effect location for applying colormap.
- # Region & Line Information
- self.counting_region = None
- self.line_dist_thresh = line_dist_thresh
- self.region_thickness = region_thickness
- self.region_color = count_reg_color
+ Args:
+ box (List[float]): Bounding box coordinates [x0, y0, x1, y1].
- # Object Counting Information
- self.in_counts = 0
- self.out_counts = 0
- self.count_ids = []
- self.class_wise_count = {}
- self.count_txt_color = count_txt_color
- self.count_bg_color = count_bg_color
- self.cls_txtdisplay_gap = 50
+ Examples:
+ >>> heatmap = Heatmap()
+ >>> box = [100, 100, 200, 200]
+ >>> heatmap.heatmap_effect(box)
+ """
+ x0, y0, x1, y1 = map(int, box)
+ radius_squared = (min(x1 - x0, y1 - y0) // 2) ** 2
- # Decay factor
- self.decay_factor = decay_factor
+ # Create a meshgrid with region of interest (ROI) for vectorized distance calculations
+ xv, yv = np.meshgrid(np.arange(x0, x1), np.arange(y0, y1))
- # Check if environment supports imshow
- self.env_check = check_imshow(warn=True)
+ # Calculate squared distances from the center
+ dist_squared = (xv - ((x0 + x1) // 2)) ** 2 + (yv - ((y0 + y1) // 2)) ** 2
- # Region and line selection
- self.count_reg_pts = count_reg_pts
- print(self.count_reg_pts)
- if self.count_reg_pts is not None:
- if len(self.count_reg_pts) == 2:
- print("Line Counter Initiated.")
- self.counting_region = LineString(self.count_reg_pts)
- elif len(self.count_reg_pts) >= 3:
- print("Polygon Counter Initiated.")
- self.counting_region = Polygon(self.count_reg_pts)
- else:
- print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.")
- print("Using Line Counter Now")
- self.counting_region = LineString(self.count_reg_pts)
+ # Create a mask of points within the radius
+ within_radius = dist_squared <= radius_squared
- # Shape of heatmap, if not selected
- if self.shape not in {"circle", "rect"}:
- print("Unknown shape value provided, 'circle' & 'rect' supported")
- print("Using Circular shape now")
- self.shape = "circle"
+ # Update only the values within the bounding box in a single vectorized operation
+ self.heatmap[y0:y1, x0:x1][within_radius] += 2
- def extract_results(self, tracks):
+ def generate_heatmap(self, im0):
"""
- Extracts results from the provided data.
+ Generate heatmap for each frame using Ultralytics.
Args:
- tracks (list): List of tracks obtained from the object tracking process.
- """
- if tracks[0].boxes.id is not None:
- self.boxes = tracks[0].boxes.xyxy.cpu()
- self.clss = tracks[0].boxes.cls.tolist()
- self.track_ids = tracks[0].boxes.id.int().tolist()
+ im0 (np.ndarray): Input image array for processing.
- def generate_heatmap(self, im0, tracks):
- """
- Generate heatmap based on tracking data.
+ Returns:
+ (np.ndarray): Processed image with heatmap overlay and object counts (if region is specified).
- Args:
- im0 (nd array): Image
- tracks (list): List of tracks obtained from the object tracking process.
+ Examples:
+ >>> heatmap = Heatmap()
+ >>> im0 = cv2.imread("image.jpg")
+ >>> result = heatmap.generate_heatmap(im0)
"""
- self.im0 = im0
-
- # Initialize heatmap only once
if not self.initialized:
- self.heatmap = np.zeros((int(self.im0.shape[0]), int(self.im0.shape[1])), dtype=np.float32)
- self.initialized = True
-
- self.heatmap *= self.decay_factor # decay factor
-
- self.extract_results(tracks)
- self.annotator = Annotator(self.im0, self.tf, None)
-
- if self.track_ids:
- # Draw counting region
- if self.count_reg_pts is not None:
- self.annotator.draw_region(
- reg_pts=self.count_reg_pts, color=self.region_color, thickness=self.region_thickness
- )
-
- for box, cls, track_id in zip(self.boxes, self.clss, self.track_ids):
- # Store class info
- if self.names[cls] not in self.class_wise_count:
- self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0}
-
- if self.shape == "circle":
- center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2))
- radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2
-
- y, x = np.ogrid[0 : self.heatmap.shape[0], 0 : self.heatmap.shape[1]]
- mask = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= radius**2
-
- self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += (
- 2 * mask[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])]
- )
-
- else:
- self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += 2
-
- # Store tracking hist
- track_line = self.track_history[track_id]
- track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2)))
- if len(track_line) > 30:
- track_line.pop(0)
-
- prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None
-
- if self.count_reg_pts is not None:
- # Count objects in any polygon
- if len(self.count_reg_pts) >= 3:
- is_inside = self.counting_region.contains(Point(track_line[-1]))
-
- if prev_position is not None and is_inside and track_id not in self.count_ids:
- self.count_ids.append(track_id)
-
- if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0:
- self.in_counts += 1
- self.class_wise_count[self.names[cls]]["IN"] += 1
- else:
- self.out_counts += 1
- self.class_wise_count[self.names[cls]]["OUT"] += 1
-
- # Count objects using line
- elif len(self.count_reg_pts) == 2:
- if prev_position is not None and track_id not in self.count_ids:
- distance = Point(track_line[-1]).distance(self.counting_region)
- if distance < self.line_dist_thresh and track_id not in self.count_ids:
- self.count_ids.append(track_id)
-
- if (box[0] - prev_position[0]) * (
- self.counting_region.centroid.x - prev_position[0]
- ) > 0:
- self.in_counts += 1
- self.class_wise_count[self.names[cls]]["IN"] += 1
- else:
- self.out_counts += 1
- self.class_wise_count[self.names[cls]]["OUT"] += 1
-
- else:
- for box, cls in zip(self.boxes, self.clss):
- if self.shape == "circle":
- center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2))
- radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2
-
- y, x = np.ogrid[0 : self.heatmap.shape[0], 0 : self.heatmap.shape[1]]
- mask = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= radius**2
-
- self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += (
- 2 * mask[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])]
- )
-
- else:
- self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += 2
-
- if self.count_reg_pts is not None:
- labels_dict = {}
-
- for key, value in self.class_wise_count.items():
- if value["IN"] != 0 or value["OUT"] != 0:
- if not self.view_in_counts and not self.view_out_counts:
- continue
- elif not self.view_in_counts:
- labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}"
- elif not self.view_out_counts:
- labels_dict[str.capitalize(key)] = f"IN {value['IN']}"
- else:
- labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}"
-
- if labels_dict is not None:
- self.annotator.display_analytics(self.im0, labels_dict, self.count_txt_color, self.count_bg_color, 10)
+ self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99
+ self.initialized = True # Initialize heatmap only once
+
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ self.extract_tracks(im0) # Extract tracks
+
+ # Iterate over bounding boxes, track ids and classes index
+ for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+ # Draw bounding box and counting region
+ self.heatmap_effect(box)
+
+ if self.region is not None:
+ self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2)
+ self.store_tracking_history(track_id, box) # Store track history
+ self.store_classwise_counts(cls) # store classwise counts in dict
+ current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+ # Store tracking previous position and perform object counting
+ prev_position = None
+ if len(self.track_history[track_id]) > 1:
+ prev_position = self.track_history[track_id][-2]
+ self.count_objects(current_centroid, track_id, prev_position, cls) # Perform object counting
+
+ if self.region is not None:
+ self.display_counts(im0) # Display the counts on the frame
# Normalize, apply colormap to heatmap and combine with original image
- heatmap_normalized = cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX)
- heatmap_colored = cv2.applyColorMap(heatmap_normalized.astype(np.uint8), self.colormap)
- self.im0 = cv2.addWeighted(self.im0, 1 - self.heatmap_alpha, heatmap_colored, self.heatmap_alpha, 0)
-
- if self.env_check and self.view_img:
- self.display_frames()
-
- return self.im0
-
- def display_frames(self):
- """Display frame."""
- cv2.imshow("Ultralytics Heatmap", self.im0)
-
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
-
-
-if __name__ == "__main__":
- classes_names = {0: "person", 1: "car"} # example class names
- heatmap = Heatmap(classes_names)
+ if self.track_data.id is not None:
+ im0 = cv2.addWeighted(
+ im0,
+ 0.5,
+ cv2.applyColorMap(
+ cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8), self.colormap
+ ),
+ 0.5,
+ 0,
+ )
+
+ self.display_output(im0) # display output with base class function
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/object_counter.py b/ultralytics/solutions/object_counter.py
index cc7fe459462..d202ca51f51 100644
--- a/ultralytics/solutions/object_counter.py
+++ b/ultralytics/solutions/object_counter.py
@@ -1,243 +1,203 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-from collections import defaultdict
-
-import cv2
-
-from ultralytics.utils.checks import check_imshow, check_requirements
+from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator, colors
-check_requirements("shapely>=2.0.0")
-
-from shapely.geometry import LineString, Point, Polygon
+class ObjectCounter(BaseSolution):
+ """
+ A class to manage the counting of objects in a real-time video stream based on their tracks.
+
+ This class extends the BaseSolution class and provides functionality for counting objects moving in and out of a
+ specified region in a video stream. It supports both polygonal and linear regions for counting.
+
+ Attributes:
+ in_count (int): Counter for objects moving inward.
+ out_count (int): Counter for objects moving outward.
+ counted_ids (List[int]): List of IDs of objects that have been counted.
+ classwise_counts (Dict[str, Dict[str, int]]): Dictionary for counts, categorized by object class.
+ region_initialized (bool): Flag indicating whether the counting region has been initialized.
+ show_in (bool): Flag to control display of inward count.
+ show_out (bool): Flag to control display of outward count.
+
+ Methods:
+ count_objects: Counts objects within a polygonal or linear region.
+ store_classwise_counts: Initializes class-wise counts if not already present.
+ display_counts: Displays object counts on the frame.
+ count: Processes input data (frames or object tracks) and updates counts.
+
+ Examples:
+ >>> counter = ObjectCounter()
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = counter.count(frame)
+ >>> print(f"Inward count: {counter.in_count}, Outward count: {counter.out_count}")
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes the ObjectCounter class for real-time object counting in video streams."""
+ super().__init__(**kwargs)
+
+ self.in_count = 0 # Counter for objects moving inward
+ self.out_count = 0 # Counter for objects moving outward
+ self.counted_ids = [] # List of IDs of objects that have been counted
+ self.classwise_counts = {} # Dictionary for counts, categorized by object class
+ self.region_initialized = False # Bool variable for region initialization
+
+ self.show_in = self.CFG["show_in"]
+ self.show_out = self.CFG["show_out"]
+
+ def count_objects(self, current_centroid, track_id, prev_position, cls):
+ """
+ Counts objects within a polygonal or linear region based on their tracks.
-class ObjectCounter:
- """A class to manage the counting of objects in a real-time video stream based on their tracks."""
+ Args:
+ current_centroid (Tuple[float, float]): Current centroid values in the current frame.
+ track_id (int): Unique identifier for the tracked object.
+ prev_position (Tuple[float, float]): Last frame position coordinates (x, y) of the track.
+ cls (int): Class index for classwise count updates.
+
+ Examples:
+ >>> counter = ObjectCounter()
+ >>> track_line = {1: [100, 200], 2: [110, 210], 3: [120, 220]}
+ >>> box = [130, 230, 150, 250]
+ >>> track_id = 1
+ >>> prev_position = (120, 220)
+ >>> cls = 0
+ >>> counter.count_objects(current_centroid, track_id, prev_position, cls)
+ """
+ if prev_position is None or track_id in self.counted_ids:
+ return
+
+ if len(self.region) == 2: # Linear region (defined as a line segment)
+ line = self.LineString(self.region) # Check if the line intersects the trajectory of the object
+ if line.intersects(self.LineString([prev_position, current_centroid])):
+ # Determine orientation of the region (vertical or horizontal)
+ if abs(self.region[0][0] - self.region[1][0]) < abs(self.region[0][1] - self.region[1][1]):
+ # Vertical region: Compare x-coordinates to determine direction
+ if current_centroid[0] > prev_position[0]: # Moving right
+ self.in_count += 1
+ self.classwise_counts[self.names[cls]]["IN"] += 1
+ else: # Moving left
+ self.out_count += 1
+ self.classwise_counts[self.names[cls]]["OUT"] += 1
+ # Horizontal region: Compare y-coordinates to determine direction
+ elif current_centroid[1] > prev_position[1]: # Moving downward
+ self.in_count += 1
+ self.classwise_counts[self.names[cls]]["IN"] += 1
+ else: # Moving upward
+ self.out_count += 1
+ self.classwise_counts[self.names[cls]]["OUT"] += 1
+ self.counted_ids.append(track_id)
+
+ elif len(self.region) > 2: # Polygonal region
+ polygon = self.Polygon(self.region)
+ if polygon.contains(self.Point(current_centroid)):
+ # Determine motion direction for vertical or horizontal polygons
+ region_width = max(p[0] for p in self.region) - min(p[0] for p in self.region)
+ region_height = max(p[1] for p in self.region) - min(p[1] for p in self.region)
- def __init__(
- self,
- names,
- reg_pts=None,
- line_thickness=2,
- view_img=False,
- view_in_counts=True,
- view_out_counts=True,
- draw_tracks=False,
- ):
+ if (
+ region_width < region_height
+ and current_centroid[0] > prev_position[0]
+ or region_width >= region_height
+ and current_centroid[1] > prev_position[1]
+ ): # Moving right
+ self.in_count += 1
+ self.classwise_counts[self.names[cls]]["IN"] += 1
+ else: # Moving left
+ self.out_count += 1
+ self.classwise_counts[self.names[cls]]["OUT"] += 1
+ self.counted_ids.append(track_id)
+
+ def store_classwise_counts(self, cls):
"""
- Initializes the ObjectCounter with various tracking and counting parameters.
+ Initialize class-wise counts for a specific object class if not already present.
Args:
- names (dict): Dictionary of class names.
- reg_pts (list): List of points defining the counting region.
- line_thickness (int): Line thickness for bounding boxes.
- view_img (bool): Flag to control whether to display the video stream.
- view_in_counts (bool): Flag to control whether to display the in counts on the video stream.
- view_out_counts (bool): Flag to control whether to display the out counts on the video stream.
- draw_tracks (bool): Flag to control whether to draw the object tracks.
+ cls (int): Class index for classwise count updates.
+
+ This method ensures that the 'classwise_counts' dictionary contains an entry for the specified class,
+ initializing 'IN' and 'OUT' counts to zero if the class is not already present.
+
+ Examples:
+ >>> counter = ObjectCounter()
+ >>> counter.store_classwise_counts(0) # Initialize counts for class index 0
+ >>> print(counter.classwise_counts)
+ {'person': {'IN': 0, 'OUT': 0}}
"""
- # Mouse events
- self.is_drawing = False
- self.selected_point = None
-
- # Region & Line Information
- self.reg_pts = [(20, 400), (1260, 400)] if reg_pts is None else reg_pts
- self.counting_region = None
-
- # Image and annotation Information
- self.im0 = None
- self.tf = line_thickness
- self.view_img = view_img
- self.view_in_counts = view_in_counts
- self.view_out_counts = view_out_counts
-
- self.names = names # Classes names
- self.window_name = "Ultralytics YOLOv8 Object Counter"
-
- # Object counting Information
- self.in_counts = 0
- self.out_counts = 0
- self.count_ids = []
- self.class_wise_count = {}
-
- # Tracks info
- self.track_history = defaultdict(list)
- self.draw_tracks = draw_tracks
-
- # Check if environment supports imshow
- self.env_check = check_imshow(warn=True)
-
- # Initialize counting region
- if len(self.reg_pts) == 2:
- print("Line Counter Initiated.")
- self.counting_region = LineString(self.reg_pts)
- elif len(self.reg_pts) >= 3:
- print("Polygon Counter Initiated.")
- self.counting_region = Polygon(self.reg_pts)
- else:
- print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.")
- print("Using Line Counter Now")
- self.counting_region = LineString(self.reg_pts)
-
- # Define the counting line segment
- self.counting_line_segment = LineString(
- [
- (self.reg_pts[0][0], self.reg_pts[0][1]),
- (self.reg_pts[1][0], self.reg_pts[1][1]),
- ]
- )
-
- def mouse_event_for_region(self, event, x, y, flags, params):
+ if self.names[cls] not in self.classwise_counts:
+ self.classwise_counts[self.names[cls]] = {"IN": 0, "OUT": 0}
+
+ def display_counts(self, im0):
"""
- Handles mouse events for defining and moving the counting region in a real-time video stream.
+ Displays object counts on the input image or frame.
Args:
- event (int): The type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.).
- x (int): The x-coordinate of the mouse pointer.
- y (int): The y-coordinate of the mouse pointer.
- flags (int): Any associated event flags (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY, etc.).
- params (dict): Additional parameters for the function.
+ im0 (numpy.ndarray): The input image or frame to display counts on.
+
+ Examples:
+ >>> counter = ObjectCounter()
+ >>> frame = cv2.imread("image.jpg")
+ >>> counter.display_counts(frame)
"""
- if event == cv2.EVENT_LBUTTONDOWN:
- for i, point in enumerate(self.reg_pts):
- if (
- isinstance(point, (tuple, list))
- and len(point) >= 2
- and (abs(x - point[0]) < 10 and abs(y - point[1]) < 10)
- ):
- self.selected_point = i
- self.is_drawing = True
- break
-
- elif event == cv2.EVENT_MOUSEMOVE:
- if self.is_drawing and self.selected_point is not None:
- self.reg_pts[self.selected_point] = (x, y)
- self.counting_region = Polygon(self.reg_pts)
-
- elif event == cv2.EVENT_LBUTTONUP:
- self.is_drawing = False
- self.selected_point = None
-
- def extract_and_process_tracks(self, tracks):
- """Extracts and processes tracks for object counting in a video stream."""
- # Annotator Init and region drawing
- annotator = Annotator(self.im0, self.tf, self.names)
-
- # Draw region or line
- annotator.draw_region(reg_pts=self.reg_pts, color=(104, 0, 123), thickness=self.tf * 2)
-
- # Extract tracks for OBB or object detection
- track_data = tracks[0].obb or tracks[0].boxes
-
- if track_data and track_data.id is not None:
- boxes = track_data.xyxy.cpu()
- clss = track_data.cls.cpu().tolist()
- track_ids = track_data.id.int().cpu().tolist()
-
- # Extract tracks
- for box, track_id, cls in zip(boxes, track_ids, clss):
- # Draw bounding box
- annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True))
-
- # Store class info
- if self.names[cls] not in self.class_wise_count:
- self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0}
-
- # Draw Tracks
- track_line = self.track_history[track_id]
- track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2)))
- if len(track_line) > 30:
- track_line.pop(0)
-
- # Draw track trails
- if self.draw_tracks:
- annotator.draw_centroid_and_tracks(
- track_line,
- color=colors(int(track_id), True),
- track_thickness=self.tf,
- )
-
- prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None
-
- # Count objects in any polygon
- if len(self.reg_pts) >= 3:
- is_inside = self.counting_region.contains(Point(track_line[-1]))
-
- if prev_position is not None and is_inside and track_id not in self.count_ids:
- self.count_ids.append(track_id)
-
- if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0:
- self.in_counts += 1
- self.class_wise_count[self.names[cls]]["IN"] += 1
- else:
- self.out_counts += 1
- self.class_wise_count[self.names[cls]]["OUT"] += 1
-
- # Count objects using line
- elif len(self.reg_pts) == 2:
- if (
- prev_position is not None
- and track_id not in self.count_ids
- and LineString([(prev_position[0], prev_position[1]), (box[0], box[1])]).intersects(
- self.counting_line_segment
- )
- ):
- self.count_ids.append(track_id)
-
- # Determine the direction of movement (IN or OUT)
- dx = (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0])
- dy = (box[1] - prev_position[1]) * (self.counting_region.centroid.y - prev_position[1])
- if dx > 0 and dy > 0:
- self.in_counts += 1
- self.class_wise_count[self.names[cls]]["IN"] += 1
- else:
- self.out_counts += 1
- self.class_wise_count[self.names[cls]]["OUT"] += 1
-
- labels_dict = {}
-
- for key, value in self.class_wise_count.items():
- if value["IN"] != 0 or value["OUT"] != 0:
- if not self.view_in_counts and not self.view_out_counts:
- continue
- elif not self.view_in_counts:
- labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}"
- elif not self.view_out_counts:
- labels_dict[str.capitalize(key)] = f"IN {value['IN']}"
- else:
- labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}"
+ labels_dict = {
+ str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
+ f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
+ for key, value in self.classwise_counts.items()
+ if value["IN"] != 0 or value["OUT"] != 0
+ }
if labels_dict:
- annotator.display_analytics(self.im0, labels_dict, (104, 31, 17), (255, 255, 255), 10)
-
- def display_frames(self):
- """Displays the current frame with annotations and regions in a window."""
- if self.env_check:
- cv2.namedWindow(self.window_name)
- if len(self.reg_pts) == 4: # only add mouse event If user drawn region
- cv2.setMouseCallback(self.window_name, self.mouse_event_for_region, {"region_points": self.reg_pts})
- cv2.imshow(self.window_name, self.im0)
- # Break Window
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
-
- def start_counting(self, im0, tracks):
- """
- Main function to start the object counting process.
+ self.annotator.display_analytics(im0, labels_dict, (104, 31, 17), (255, 255, 255), 10)
- Args:
- im0 (ndarray): Current frame from the video stream.
- tracks (list): List of tracks obtained from the object tracking process.
+ def count(self, im0):
"""
- self.im0 = im0 # store image
- self.extract_and_process_tracks(tracks) # draw region even if no objects
+ Processes input data (frames or object tracks) and updates object counts.
+
+ This method initializes the counting region, extracts tracks, draws bounding boxes and regions, updates
+ object counts, and displays the results on the input image.
- if self.view_img:
- self.display_frames()
- return self.im0
+ Args:
+ im0 (numpy.ndarray): The input image or frame to be processed.
+ Returns:
+ (numpy.ndarray): The processed image with annotations and count information.
-if __name__ == "__main__":
- classes_names = {0: "person", 1: "car"} # example class names
- ObjectCounter(classes_names)
+ Examples:
+ >>> counter = ObjectCounter()
+ >>> frame = cv2.imread("path/to/image.jpg")
+ >>> processed_frame = counter.count(frame)
+ """
+ if not self.region_initialized:
+ self.initialize_region()
+ self.region_initialized = True
+
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ self.extract_tracks(im0) # Extract tracks
+
+ self.annotator.draw_region(
+ reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2
+ ) # Draw region
+
+ # Iterate over bounding boxes, track ids and classes index
+ for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+ # Draw bounding box and counting region
+ self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+ self.store_tracking_history(track_id, box) # Store track history
+ self.store_classwise_counts(cls) # store classwise counts in dict
+
+ # Draw tracks of objects
+ self.annotator.draw_centroid_and_tracks(
+ self.track_line, color=colors(int(cls), True), track_thickness=self.line_width
+ )
+ current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+ # store previous position of track for object counting
+ prev_position = None
+ if len(self.track_history[track_id]) > 1:
+ prev_position = self.track_history[track_id][-2]
+ self.count_objects(current_centroid, track_id, prev_position, cls) # Perform object counting
+
+ self.display_counts(im0) # Display the counts on the frame
+ self.display_output(im0) # display output with base class function
+
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/parking_management.py b/ultralytics/solutions/parking_management.py
index ef58ad62744..be1c8c9b3f4 100644
--- a/ultralytics/solutions/parking_management.py
+++ b/ultralytics/solutions/parking_management.py
@@ -1,241 +1,246 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import json
import cv2
import numpy as np
-from ultralytics.utils.checks import check_imshow, check_requirements
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
+from ultralytics.utils.checks import check_requirements
from ultralytics.utils.plotting import Annotator
class ParkingPtsSelection:
- """Class for selecting and managing parking zone points on images using a Tkinter-based UI."""
+ """
+ A class for selecting and managing parking zone points on images using a Tkinter-based UI.
+
+ This class provides functionality to upload an image, select points to define parking zones, and save the
+ selected points to a JSON file. It uses Tkinter for the graphical user interface.
+
+ Attributes:
+ tk (module): The Tkinter module for GUI operations.
+ filedialog (module): Tkinter's filedialog module for file selection operations.
+ messagebox (module): Tkinter's messagebox module for displaying message boxes.
+ master (tk.Tk): The main Tkinter window.
+ canvas (tk.Canvas): The canvas widget for displaying the image and drawing bounding boxes.
+ image (PIL.Image.Image): The uploaded image.
+ canvas_image (ImageTk.PhotoImage): The image displayed on the canvas.
+ rg_data (List[List[Tuple[int, int]]]): List of bounding boxes, each defined by 4 points.
+ current_box (List[Tuple[int, int]]): Temporary storage for the points of the current bounding box.
+ imgw (int): Original width of the uploaded image.
+ imgh (int): Original height of the uploaded image.
+ canvas_max_width (int): Maximum width of the canvas.
+ canvas_max_height (int): Maximum height of the canvas.
+
+ Methods:
+ initialize_properties: Initializes the necessary properties.
+ upload_image: Uploads an image, resizes it to fit the canvas, and displays it.
+ on_canvas_click: Handles mouse clicks to add points for bounding boxes.
+ draw_box: Draws a bounding box on the canvas.
+ remove_last_bounding_box: Removes the last bounding box and redraws the canvas.
+ redraw_canvas: Redraws the canvas with the image and all bounding boxes.
+ save_to_json: Saves the bounding boxes to a JSON file.
+
+ Examples:
+ >>> parking_selector = ParkingPtsSelection()
+ >>> # Use the GUI to upload an image, select parking zones, and save the data
+ """
def __init__(self):
- """Initializes the UI for selecting parking zone points in a tkinter window."""
+ """Initializes the ParkingPtsSelection class, setting up UI and properties for parking zone point selection."""
check_requirements("tkinter")
+ import tkinter as tk
+ from tkinter import filedialog, messagebox
- import tkinter as tk # scope for multi-environment compatibility
-
- self.tk = tk
- self.master = tk.Tk()
+ self.tk, self.filedialog, self.messagebox = tk, filedialog, messagebox
+ self.master = self.tk.Tk() # Reference to the main application window or parent widget
self.master.title("Ultralytics Parking Zones Points Selector")
-
- # Disable window resizing
self.master.resizable(False, False)
- # Setup canvas for image display
- self.canvas = self.tk.Canvas(self.master, bg="white")
+ self.canvas = self.tk.Canvas(self.master, bg="white") # Canvas widget for displaying images or graphics
+ self.canvas.pack(side=self.tk.BOTTOM)
+
+ self.image = None # Variable to store the loaded image
+ self.canvas_image = None # Reference to the image displayed on the canvas
+ self.canvas_max_width = None # Maximum allowed width for the canvas
+ self.canvas_max_height = None # Maximum allowed height for the canvas
+ self.rg_data = None # Data related to region or annotation management
+ self.current_box = None # Stores the currently selected or active bounding box
+ self.imgh = None # Height of the current image
+ self.imgw = None # Width of the current image
- # Setup buttons
+ # Button frame with buttons
button_frame = self.tk.Frame(self.master)
button_frame.pack(side=self.tk.TOP)
- self.tk.Button(button_frame, text="Upload Image", command=self.upload_image).grid(row=0, column=0)
- self.tk.Button(button_frame, text="Remove Last BBox", command=self.remove_last_bounding_box).grid(
- row=0, column=1
- )
- self.tk.Button(button_frame, text="Save", command=self.save_to_json).grid(row=0, column=2)
-
- # Initialize properties
- self.image_path = None
- self.image = None
- self.canvas_image = None
- self.rg_data = [] # region coordinates
- self.current_box = []
- self.imgw = 0 # image width
- self.imgh = 0 # image height
-
- # Constants
- self.canvas_max_width = 1280
- self.canvas_max_height = 720
+ for text, cmd in [
+ ("Upload Image", self.upload_image),
+ ("Remove Last BBox", self.remove_last_bounding_box),
+ ("Save", self.save_to_json),
+ ]:
+ self.tk.Button(button_frame, text=text, command=cmd).pack(side=self.tk.LEFT)
+ self.initialize_properties()
self.master.mainloop()
- def upload_image(self):
- """Upload an image and resize it to fit canvas."""
- from tkinter import filedialog
+ def initialize_properties(self):
+ """Initialize properties for image, canvas, bounding boxes, and dimensions."""
+ self.image = self.canvas_image = None
+ self.rg_data, self.current_box = [], []
+ self.imgw = self.imgh = 0
+ self.canvas_max_width, self.canvas_max_height = 1280, 720
+ def upload_image(self):
+ """Uploads and displays an image on the canvas, resizing it to fit within specified dimensions."""
from PIL import Image, ImageTk # scope because ImageTk requires tkinter package
- self.image_path = filedialog.askopenfilename(filetypes=[("Image Files", "*.png;*.jpg;*.jpeg")])
- if not self.image_path:
+ self.image = Image.open(self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg")]))
+ if not self.image:
return
- self.image = Image.open(self.image_path)
self.imgw, self.imgh = self.image.size
-
- # Calculate the aspect ratio and resize image
aspect_ratio = self.imgw / self.imgh
- if aspect_ratio > 1:
- # Landscape orientation
- canvas_width = min(self.canvas_max_width, self.imgw)
- canvas_height = int(canvas_width / aspect_ratio)
- else:
- # Portrait orientation
- canvas_height = min(self.canvas_max_height, self.imgh)
- canvas_width = int(canvas_height * aspect_ratio)
-
- # Check if canvas is already initialized
- if self.canvas:
- self.canvas.destroy() # Destroy previous canvas
-
- self.canvas = self.tk.Canvas(self.master, bg="white", width=canvas_width, height=canvas_height)
- resized_image = self.image.resize((canvas_width, canvas_height), Image.LANCZOS)
- self.canvas_image = ImageTk.PhotoImage(resized_image)
- self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
+ canvas_width = (
+ min(self.canvas_max_width, self.imgw) if aspect_ratio > 1 else int(self.canvas_max_height * aspect_ratio)
+ )
+ canvas_height = (
+ min(self.canvas_max_height, self.imgh) if aspect_ratio <= 1 else int(canvas_width / aspect_ratio)
+ )
- self.canvas.pack(side=self.tk.BOTTOM)
+ self.canvas.config(width=canvas_width, height=canvas_height)
+ self.canvas_image = ImageTk.PhotoImage(self.image.resize((canvas_width, canvas_height)))
+ self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
self.canvas.bind("", self.on_canvas_click)
- # Reset bounding boxes and current box
- self.rg_data = []
- self.current_box = []
+ self.rg_data.clear(), self.current_box.clear()
def on_canvas_click(self, event):
- """Handle mouse clicks on canvas to create points for bounding boxes."""
+ """Handles mouse clicks to add points for bounding boxes on the canvas."""
self.current_box.append((event.x, event.y))
self.canvas.create_oval(event.x - 3, event.y - 3, event.x + 3, event.y + 3, fill="red")
-
if len(self.current_box) == 4:
- self.rg_data.append(self.current_box)
- [
- self.canvas.create_line(self.current_box[i], self.current_box[(i + 1) % 4], fill="blue", width=2)
- for i in range(4)
- ]
- self.current_box = []
+ self.rg_data.append(self.current_box.copy())
+ self.draw_box(self.current_box)
+ self.current_box.clear()
- def remove_last_bounding_box(self):
- """Remove the last drawn bounding box from canvas."""
- from tkinter import messagebox # scope for multi-environment compatibility
+ def draw_box(self, box):
+ """Draws a bounding box on the canvas using the provided coordinates."""
+ for i in range(4):
+ self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2)
- if self.rg_data:
- self.rg_data.pop() # Remove the last bounding box
- self.canvas.delete("all") # Clear the canvas
- self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image) # Redraw the image
+ def remove_last_bounding_box(self):
+ """Removes the last bounding box from the list and redraws the canvas."""
+ if not self.rg_data:
+ self.messagebox.showwarning("Warning", "No bounding boxes to remove.")
+ return
+ self.rg_data.pop()
+ self.redraw_canvas()
- # Redraw all bounding boxes
- for box in self.rg_data:
- [self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2) for i in range(4)]
- messagebox.showinfo("Success", "Last bounding box removed.")
- else:
- messagebox.showwarning("Warning", "No bounding boxes to remove.")
+ def redraw_canvas(self):
+ """Redraws the canvas with the image and all bounding boxes."""
+ self.canvas.delete("all")
+ self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
+ for box in self.rg_data:
+ self.draw_box(box)
def save_to_json(self):
- """Saves rescaled bounding boxes to 'bounding_boxes.json' based on image-to-canvas size ratio."""
- from tkinter import messagebox # scope for multi-environment compatibility
+ """Saves the selected parking zone points to a JSON file with scaled coordinates."""
+ scale_w, scale_h = self.imgw / self.canvas.winfo_width(), self.imgh / self.canvas.winfo_height()
+ data = [{"points": [(int(x * scale_w), int(y * scale_h)) for x, y in box]} for box in self.rg_data]
- rg_data = [] # regions data
- for box in self.rg_data:
- rs_box = [
- (
- int(x * self.imgw / self.canvas.winfo_width()), # width scaling
- int(y * self.imgh / self.canvas.winfo_height()), # height scaling
- )
- for x, y in box
- ]
- rg_data.append({"points": rs_box})
- with open("bounding_boxes.json", "w") as f:
- json.dump(rg_data, f, indent=4)
-
- messagebox.showinfo("Success", "Bounding boxes saved to bounding_boxes.json")
-
-
-class ParkingManagement:
- """Manages parking occupancy and availability using YOLOv8 for real-time monitoring and visualization."""
-
- def __init__(
- self,
- model, # Ultralytics YOLO model file path
- json_file, # Parking management annotation file created from Parking Annotator
- occupied_region_color=(0, 0, 255), # occupied region color
- available_region_color=(0, 255, 0), # available region color
- ):
- """
- Initializes the parking management system with a YOLOv8 model and visualization settings.
+ from io import StringIO # Function level import, as it's only required to store coordinates, not every frame
- Args:
- model (str): Path to the YOLOv8 model.
- json_file (str): file that have all parking slot points data
- occupied_region_color (tuple): RGB color tuple for occupied regions.
- available_region_color (tuple): RGB color tuple for available regions.
- """
- # Model initialization
- from ultralytics import YOLO
+ write_buffer = StringIO()
+ json.dump(data, write_buffer, indent=4)
+ with open("bounding_boxes.json", "w", encoding="utf-8") as f:
+ f.write(write_buffer.getvalue())
+ self.messagebox.showinfo("Success", "Bounding boxes saved to bounding_boxes.json")
- self.model = YOLO(model)
- # Load JSON data
- with open(json_file) as f:
- self.json_data = json.load(f)
+class ParkingManagement(BaseSolution):
+ """
+ Manages parking occupancy and availability using YOLO model for real-time monitoring and visualization.
- self.pr_info = {"Occupancy": 0, "Available": 0} # dictionary for parking information
+ This class extends BaseSolution to provide functionality for parking lot management, including detection of
+ occupied spaces, visualization of parking regions, and display of occupancy statistics.
- self.occ = occupied_region_color
- self.arc = available_region_color
+ Attributes:
+ json_file (str): Path to the JSON file containing parking region details.
+ json (List[Dict]): Loaded JSON data containing parking region information.
+ pr_info (Dict[str, int]): Dictionary storing parking information (Occupancy and Available spaces).
+ arc (Tuple[int, int, int]): RGB color tuple for available region visualization.
+ occ (Tuple[int, int, int]): RGB color tuple for occupied region visualization.
+ dc (Tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects.
- self.env_check = check_imshow(warn=True) # check if environment supports imshow
+ Methods:
+ process_data: Processes model data for parking lot management and visualization.
- def process_data(self, im0):
- """
- Process the model data for parking lot management.
+ Examples:
+ >>> from ultralytics.solutions import ParkingManagement
+ >>> parking_manager = ParkingManagement(model="yolo11n.pt", json_file="parking_regions.json")
+ >>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}")
+ >>> print(f"Available spaces: {parking_manager.pr_info['Available']}")
+ """
- Args:
- im0 (ndarray): inference image
+ def __init__(self, **kwargs):
+ """Initializes the parking management system with a YOLO model and visualization settings."""
+ super().__init__(**kwargs)
+
+ self.json_file = self.CFG["json_file"] # Load JSON data
+ if self.json_file is None:
+ LOGGER.warning("โ json_file argument missing. Parking region details required.")
+ raise ValueError("โ Json file path can not be empty")
+
+ with open(self.json_file) as f:
+ self.json = json.load(f)
+
+ self.pr_info = {"Occupancy": 0, "Available": 0} # dictionary for parking information
+
+ self.arc = (0, 0, 255) # available region color
+ self.occ = (0, 255, 0) # occupied region color
+ self.dc = (255, 0, 189) # centroid color for each box
+
+ def process_data(self, im0):
"""
- results = self.model.track(im0, persist=True, show=False) # object tracking
+ Processes the model data for parking lot management.
- es, fs = len(self.json_data), 0 # empty slots, filled slots
- annotator = Annotator(im0) # init annotator
+ This function analyzes the input image, extracts tracks, and determines the occupancy status of parking
+ regions defined in the JSON file. It annotates the image with occupied and available parking spots,
+ and updates the parking information.
- # extract tracks data
- if results[0].boxes.id is None:
- self.display_frames(im0)
- return im0
+ Args:
+ im0 (np.ndarray): The input inference image.
- boxes = results[0].boxes.xyxy.cpu().tolist()
- clss = results[0].boxes.cls.cpu().tolist()
+ Examples:
+ >>> parking_manager = ParkingManagement(json_file="parking_regions.json")
+ >>> image = cv2.imread("parking_lot.jpg")
+ >>> parking_manager.process_data(image)
+ """
+ self.extract_tracks(im0) # extract tracks from im0
+ es, fs = len(self.json), 0 # empty slots, filled slots
+ annotator = Annotator(im0, self.line_width) # init annotator
- for region in self.json_data:
+ for region in self.json:
# Convert points to a NumPy array with the correct dtype and reshape properly
pts_array = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2))
rg_occupied = False # occupied region initialization
- for box, cls in zip(boxes, clss):
- xc = int((box[0] + box[2]) / 2)
- yc = int((box[1] + box[3]) / 2)
- annotator.display_objects_labels(
- im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10
- )
+ for box, cls in zip(self.boxes, self.clss):
+ xc, yc = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
dist = cv2.pointPolygonTest(pts_array, (xc, yc), False)
if dist >= 0:
+ # cv2.circle(im0, (xc, yc), radius=self.line_width * 4, color=self.dc, thickness=-1)
+ annotator.display_objects_labels(
+ im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10
+ )
rg_occupied = True
break
- if rg_occupied:
- fs += 1
- es -= 1
-
+ fs, es = (fs + 1, es - 1) if rg_occupied else (fs, es)
# Plotting regions
- color = self.occ if rg_occupied else self.arc
- cv2.polylines(im0, [pts_array], isClosed=True, color=color, thickness=2)
+ cv2.polylines(im0, [pts_array], isClosed=True, color=self.occ if rg_occupied else self.arc, thickness=2)
- self.pr_info["Occupancy"] = fs
- self.pr_info["Available"] = es
+ self.pr_info["Occupancy"], self.pr_info["Available"] = fs, es
annotator.display_analytics(im0, self.pr_info, (104, 31, 17), (255, 255, 255), 10)
-
- self.display_frames(im0)
- return im0
-
- def display_frames(self, im0):
- """
- Display frame.
-
- Args:
- im0 (ndarray): inference image
- """
- if self.env_check:
- cv2.imshow("Ultralytics Parking Manager", im0)
- # Break Window
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
+ self.display_output(im0) # display output with base class function
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/queue_management.py b/ultralytics/solutions/queue_management.py
index ef601503950..4fcf8fa7103 100644
--- a/ultralytics/solutions/queue_management.py
+++ b/ultralytics/solutions/queue_management.py
@@ -1,127 +1,112 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-from collections import defaultdict
-
-import cv2
-
-from ultralytics.utils.checks import check_imshow, check_requirements
+from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator, colors
-check_requirements("shapely>=2.0.0")
-
-from shapely.geometry import Point, Polygon
-
-
-class QueueManager:
- """A class to manage the queue in a real-time video stream based on object tracks."""
- def __init__(
- self,
- names,
- reg_pts=None,
- line_thickness=2,
- view_img=False,
- draw_tracks=False,
- ):
+class QueueManager(BaseSolution):
+ """
+ Manages queue counting in real-time video streams based on object tracks.
+
+ This class extends BaseSolution to provide functionality for tracking and counting objects within a specified
+ region in video frames.
+
+ Attributes:
+ counts (int): The current count of objects in the queue.
+ rect_color (Tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
+ region_length (int): The number of points defining the queue region.
+ annotator (Annotator): An instance of the Annotator class for drawing on frames.
+ track_line (List[Tuple[int, int]]): List of track line coordinates.
+ track_history (Dict[int, List[Tuple[int, int]]]): Dictionary storing tracking history for each object.
+
+ Methods:
+ initialize_region: Initializes the queue region.
+ process_queue: Processes a single frame for queue management.
+ extract_tracks: Extracts object tracks from the current frame.
+ store_tracking_history: Stores the tracking history for an object.
+ display_output: Displays the processed output.
+
+ Examples:
+ >>> cap = cv2.VideoCapture("Path/to/video/file.mp4")
+ >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300])
+ >>> while cap.isOpened():
+ >>> success, im0 = cap.read()
+ >>> if not success:
+ >>> break
+ >>> out = queue.process_queue(im0)
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes the QueueManager with parameters for tracking and counting objects in a video stream."""
+ super().__init__(**kwargs)
+ self.initialize_region()
+ self.counts = 0 # Queue counts Information
+ self.rect_color = (255, 255, 255) # Rectangle color
+ self.region_length = len(self.region) # Store region length for further usage
+
+ def process_queue(self, im0):
"""
- Initializes the QueueManager with specified parameters for tracking and counting objects.
+ Processes the queue management for a single frame of video.
Args:
- names (dict): A dictionary mapping class IDs to class names.
- reg_pts (list of tuples, optional): Points defining the counting region polygon. Defaults to a predefined
- rectangle.
- line_thickness (int, optional): Thickness of the annotation lines. Defaults to 2.
- view_img (bool, optional): Whether to display the image frames. Defaults to False.
- draw_tracks (bool, optional): Whether to draw tracks of the objects. Defaults to False.
+ im0 (numpy.ndarray): Input image for processing, typically a frame from a video stream.
+
+ Returns:
+ (numpy.ndarray): Processed image with annotations, bounding boxes, and queue counts.
+
+ This method performs the following steps:
+ 1. Resets the queue count for the current frame.
+ 2. Initializes an Annotator object for drawing on the image.
+ 3. Extracts tracks from the image.
+ 4. Draws the counting region on the image.
+ 5. For each detected object:
+ - Draws bounding boxes and labels.
+ - Stores tracking history.
+ - Draws centroids and tracks.
+ - Checks if the object is inside the counting region and updates the count.
+ 6. Displays the queue count on the image.
+ 7. Displays the processed output.
+
+ Examples:
+ >>> queue_manager = QueueManager()
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = queue_manager.process_queue(frame)
"""
- # Region & Line Information
- self.reg_pts = reg_pts if reg_pts is not None else [(20, 60), (20, 680), (1120, 680), (1120, 60)]
- self.counting_region = (
- Polygon(self.reg_pts) if len(self.reg_pts) >= 3 else Polygon([(20, 60), (20, 680), (1120, 680), (1120, 60)])
- )
-
- # annotation Information
- self.tf = line_thickness
- self.view_img = view_img
-
- self.names = names # Class names
-
- # Object counting Information
- self.counts = 0
-
- # Tracks info
- self.track_history = defaultdict(list)
- self.draw_tracks = draw_tracks
+ self.counts = 0 # Reset counts every frame
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ self.extract_tracks(im0) # Extract tracks
- # Check if environment supports imshow
- self.env_check = check_imshow(warn=True)
+ self.annotator.draw_region(
+ reg_pts=self.region, color=self.rect_color, thickness=self.line_width * 2
+ ) # Draw region
- def extract_and_process_tracks(self, tracks, im0):
- """Extracts and processes tracks for queue management in a video stream."""
- # Initialize annotator and draw the queue region
- annotator = Annotator(im0, self.tf, self.names)
- self.counts = 0 # Reset counts every frame
- if tracks[0].boxes.id is not None:
- boxes = tracks[0].boxes.xyxy.cpu()
- clss = tracks[0].boxes.cls.cpu().tolist()
- track_ids = tracks[0].boxes.id.int().cpu().tolist()
-
- # Extract tracks
- for box, track_id, cls in zip(boxes, track_ids, clss):
- # Draw bounding box
- annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True))
-
- # Update track history
- track_line = self.track_history[track_id]
- track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2)))
- if len(track_line) > 30:
- track_line.pop(0)
-
- # Draw track trails if enabled
- if self.draw_tracks:
- annotator.draw_centroid_and_tracks(
- track_line,
- color=colors(int(track_id), True),
- track_thickness=self.line_thickness,
- )
-
- prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None
-
- # Check if the object is inside the counting region
- if len(self.reg_pts) >= 3:
- is_inside = self.counting_region.contains(Point(track_line[-1]))
- if prev_position is not None and is_inside:
- self.counts += 1
+ for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+ # Draw bounding box and counting region
+ self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True))
+ self.store_tracking_history(track_id, box) # Store track history
- # Display queue counts
- label = f"Queue Counts : {str(self.counts)}"
- if label is not None:
- annotator.queue_counts_display(
- label,
- points=self.reg_pts,
- region_color=(255, 0, 255),
- txt_color=(104, 31, 17),
+ # Draw tracks of objects
+ self.annotator.draw_centroid_and_tracks(
+ self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width
)
- if self.env_check and self.view_img:
- annotator.draw_region(reg_pts=self.reg_pts, thickness=self.tf * 2, color=(255, 0, 255))
- cv2.imshow("Ultralytics YOLOv8 Queue Manager", im0)
- # Close window on 'q' key press
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
+ # Cache frequently accessed attributes
+ track_history = self.track_history.get(track_id, [])
- def process_queue(self, im0, tracks):
- """
- Main function to start the queue management process.
-
- Args:
- im0 (ndarray): Current frame from the video stream.
- tracks (list): List of tracks obtained from the object tracking process.
- """
- self.extract_and_process_tracks(tracks, im0) # Extract and process tracks
- return im0
+ # store previous position of track and check if the object is inside the counting region
+ prev_position = None
+ if len(track_history) > 1:
+ prev_position = track_history[-2]
+ if self.region_length >= 3 and prev_position and self.r_s.contains(self.Point(self.track_line[-1])):
+ self.counts += 1
+ # Display queue counts
+ self.annotator.queue_counts_display(
+ f"Queue Counts : {str(self.counts)}",
+ points=self.region,
+ region_color=self.rect_color,
+ txt_color=(104, 31, 17),
+ )
+ self.display_output(im0) # display output with base class function
-if __name__ == "__main__":
- classes_names = {0: "person", 1: "car"} # example class names
- queue_manager = QueueManager(classes_names)
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/region_counter.py b/ultralytics/solutions/region_counter.py
new file mode 100644
index 00000000000..5a2953f3c61
--- /dev/null
+++ b/ultralytics/solutions/region_counter.py
@@ -0,0 +1,116 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class RegionCounter(BaseSolution):
+ """
+ A class designed for real-time counting of objects within user-defined regions in a video stream.
+
+ This class inherits from `BaseSolution` and offers functionalities to define polygonal regions in a video
+ frame, track objects, and count those objects that pass through each defined region. This makes it useful
+ for applications that require counting in specified areas, such as monitoring zones or segmented sections.
+
+ Attributes:
+ region_template (dict): A template for creating new counting regions with default attributes including
+ the name, polygon coordinates, and display colors.
+ counting_regions (list): A list storing all defined regions, where each entry is based on `region_template`
+ and includes specific region settings like name, coordinates, and color.
+
+ Methods:
+ add_region: Adds a new counting region with specified attributes, such as the region's name, polygon points,
+ region color, and text color.
+ count: Processes video frames to count objects in each region, drawing regions and displaying counts
+ on the frame. Handles object detection, region definition, and containment checks.
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes the RegionCounter class for real-time counting in different regions of the video streams."""
+ super().__init__(**kwargs)
+ self.region_template = {
+ "name": "Default Region",
+ "polygon": None,
+ "counts": 0,
+ "dragging": False,
+ "region_color": (255, 255, 255),
+ "text_color": (0, 0, 0),
+ }
+ self.counting_regions = []
+
+ def add_region(self, name, polygon_points, region_color, text_color):
+ """
+ Adds a new region to the counting list based on the provided template with specific attributes.
+
+ Args:
+ name (str): Name assigned to the new region.
+ polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
+ region_color (tuple): BGR color for region visualization.
+ text_color (tuple): BGR color for the text within the region.
+ """
+ region = self.region_template.copy()
+ region.update(
+ {
+ "name": name,
+ "polygon": self.Polygon(polygon_points),
+ "region_color": region_color,
+ "text_color": text_color,
+ }
+ )
+ self.counting_regions.append(region)
+
+ def count(self, im0):
+ """
+ Processes the input frame to detect and count objects within each defined region.
+
+ Args:
+ im0 (numpy.ndarray): Input image frame where objects and regions are annotated.
+
+ Returns:
+ im0 (numpy.ndarray): Processed image frame with annotated counting information.
+ """
+ self.annotator = Annotator(im0, line_width=self.line_width)
+ self.extract_tracks(im0)
+
+ # Region initialization and conversion
+ if self.region is None:
+ self.initialize_region()
+ regions = {"Region#01": self.region}
+ else:
+ regions = self.region if isinstance(self.region, dict) else {"Region#01": self.region}
+
+ # Draw regions and process counts for each defined area
+ for idx, (region_name, reg_pts) in enumerate(regions.items(), start=1):
+ if not isinstance(reg_pts, list) or not all(isinstance(pt, tuple) for pt in reg_pts):
+ LOGGER.warning(f"Invalid region points for {region_name}: {reg_pts}")
+ continue # Skip invalid entries
+ color = colors(idx, True)
+ self.annotator.draw_region(reg_pts=reg_pts, color=color, thickness=self.line_width * 2)
+ self.add_region(region_name, reg_pts, color, self.annotator.get_txt_color())
+
+ # Prepare regions for containment check
+ for region in self.counting_regions:
+ region["prepared_polygon"] = self.prep(region["polygon"])
+
+ # Process bounding boxes and count objects within each region
+ for box, cls in zip(self.boxes, self.clss):
+ self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+ bbox_center = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+
+ for region in self.counting_regions:
+ if region["prepared_polygon"].contains(self.Point(bbox_center)):
+ region["counts"] += 1
+
+ # Display counts in each region
+ for region in self.counting_regions:
+ self.annotator.text_label(
+ region["polygon"].bounds,
+ label=str(region["counts"]),
+ color=region["region_color"],
+ txt_color=region["text_color"],
+ )
+ region["counts"] = 0 # Reset count for next frame
+
+ self.display_output(im0)
+ return im0
diff --git a/ultralytics/solutions/security_alarm.py b/ultralytics/solutions/security_alarm.py
new file mode 100644
index 00000000000..e07119bc5bd
--- /dev/null
+++ b/ultralytics/solutions/security_alarm.py
@@ -0,0 +1,144 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils import LOGGER
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class SecurityAlarm(BaseSolution):
+ """
+ A class to manage security alarm functionalities for real-time monitoring.
+
+ This class extends the BaseSolution class and provides features to monitor
+ objects in a frame, send email notifications when specific thresholds are
+ exceeded for total detections, and annotate the output frame for visualization.
+
+ Attributes:
+ email_sent (bool): Flag to track if an email has already been sent for the current event.
+ records (int): Threshold for the number of detected objects to trigger an alert.
+
+ Methods:
+ authenticate: Sets up email server authentication for sending alerts.
+ send_email: Sends an email notification with details and an image attachment.
+ monitor: Monitors the frame, processes detections, and triggers alerts if thresholds are crossed.
+
+ Examples:
+ >>> security = SecurityAlarm()
+ >>> security.authenticate("abc@gmail.com", "1111222233334444", "xyz@gmail.com")
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = security.monitor(frame)
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes the SecurityAlarm class with parameters for real-time object monitoring."""
+ super().__init__(**kwargs)
+ self.email_sent = False
+ self.records = self.CFG["records"]
+ self.server = None
+ self.to_email = ""
+ self.from_email = ""
+
+ def authenticate(self, from_email, password, to_email):
+ """
+ Authenticates the email server for sending alert notifications.
+
+ Args:
+ from_email (str): Sender's email address.
+ password (str): Password for the sender's email account.
+ to_email (str): Recipient's email address.
+
+ This method initializes a secure connection with the SMTP server
+ and logs in using the provided credentials.
+
+ Examples:
+ >>> alarm = SecurityAlarm()
+ >>> alarm.authenticate("sender@example.com", "password123", "recipient@example.com")
+ """
+ import smtplib
+
+ self.server = smtplib.SMTP("smtp.gmail.com: 587")
+ self.server.starttls()
+ self.server.login(from_email, password)
+ self.to_email = to_email
+ self.from_email = from_email
+
+ def send_email(self, im0, records=5):
+ """
+ Sends an email notification with an image attachment indicating the number of objects detected.
+
+ Args:
+ im0 (numpy.ndarray): The input image or frame to be attached to the email.
+ records (int): The number of detected objects to be included in the email message.
+
+ This method encodes the input image, composes the email message with
+ details about the detection, and sends it to the specified recipient.
+
+ Examples:
+ >>> alarm = SecurityAlarm()
+ >>> frame = cv2.imread("path/to/image.jpg")
+ >>> alarm.send_email(frame, records=10)
+ """
+ from email.mime.image import MIMEImage
+ from email.mime.multipart import MIMEMultipart
+ from email.mime.text import MIMEText
+
+ import cv2
+
+ img_bytes = cv2.imencode(".jpg", im0)[1].tobytes() # Encode the image as JPEG
+
+ # Create the email
+ message = MIMEMultipart()
+ message["From"] = self.from_email
+ message["To"] = self.to_email
+ message["Subject"] = "Security Alert"
+
+ # Add the text message body
+ message_body = f"Ultralytics ALERT!!! {records} objects have been detected!!"
+ message.attach(MIMEText(message_body))
+
+ # Attach the image
+ image_attachment = MIMEImage(img_bytes, name="ultralytics.jpg")
+ message.attach(image_attachment)
+
+ # Send the email
+ try:
+ self.server.send_message(message)
+ LOGGER.info("โ Email sent successfully!")
+ except Exception as e:
+ print(f"โ Failed to send email: {e}")
+
+ def monitor(self, im0):
+ """
+ Monitors the frame, processes object detections, and triggers alerts if thresholds are exceeded.
+
+ Args:
+ im0 (numpy.ndarray): The input image or frame to be processed and annotated.
+
+ This method processes the input frame, extracts detections, annotates the frame
+ with bounding boxes, and sends an email notification if the number of detected objects
+ surpasses the specified threshold and an alert has not already been sent.
+
+ Returns:
+ (numpy.ndarray): The processed frame with annotations.
+
+ Examples:
+ >>> alarm = SecurityAlarm()
+ >>> frame = cv2.imread("path/to/image.jpg")
+ >>> processed_frame = alarm.monitor(frame)
+ """
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ self.extract_tracks(im0) # Extract tracks
+
+ # Iterate over bounding boxes, track ids and classes index
+ for box, cls in zip(self.boxes, self.clss):
+ # Draw bounding box
+ self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+
+ total_det = len(self.clss)
+ if total_det > self.records and not self.email_sent: # Only send email If not sent before
+ self.send_email(im0, total_det)
+ self.email_sent = True
+
+ self.display_output(im0) # display output with base class function
+
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py
new file mode 100644
index 00000000000..3bd59dc9e3c
--- /dev/null
+++ b/ultralytics/solutions/solutions.py
@@ -0,0 +1,178 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+from collections import defaultdict
+
+import cv2
+
+from ultralytics import YOLO
+from ultralytics.utils import ASSETS_URL, DEFAULT_CFG_DICT, DEFAULT_SOL_DICT, LOGGER
+from ultralytics.utils.checks import check_imshow, check_requirements
+
+
+class BaseSolution:
+ """
+ A base class for managing Ultralytics Solutions.
+
+ This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking,
+ and region initialization.
+
+ Attributes:
+ LineString (shapely.geometry.LineString): Class for creating line string geometries.
+ Polygon (shapely.geometry.Polygon): Class for creating polygon geometries.
+ Point (shapely.geometry.Point): Class for creating point geometries.
+ CFG (Dict): Configuration dictionary loaded from a YAML file and updated with kwargs.
+ region (List[Tuple[int, int]]): List of coordinate tuples defining a region of interest.
+ line_width (int): Width of lines used in visualizations.
+ model (ultralytics.YOLO): Loaded YOLO model instance.
+ names (Dict[int, str]): Dictionary mapping class indices to class names.
+ env_check (bool): Flag indicating whether the environment supports image display.
+ track_history (collections.defaultdict): Dictionary to store tracking history for each object.
+
+ Methods:
+ extract_tracks: Apply object tracking and extract tracks from an input image.
+ store_tracking_history: Store object tracking history for a given track ID and bounding box.
+ initialize_region: Initialize the counting region and line segment based on configuration.
+ display_output: Display the results of processing, including showing frames or saving results.
+
+ Examples:
+ >>> solution = BaseSolution(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
+ >>> solution.initialize_region()
+ >>> image = cv2.imread("image.jpg")
+ >>> solution.extract_tracks(image)
+ >>> solution.display_output(image)
+ """
+
+ def __init__(self, IS_CLI=False, **kwargs):
+ """
+ Initializes the `BaseSolution` class with configuration settings and the YOLO model for Ultralytics solutions.
+
+ IS_CLI (optional): Enables CLI mode if set.
+ """
+ check_requirements("shapely>=2.0.0")
+ from shapely.geometry import LineString, Point, Polygon
+ from shapely.prepared import prep
+
+ self.LineString = LineString
+ self.Polygon = Polygon
+ self.Point = Point
+ self.prep = prep
+ self.annotator = None # Initialize annotator
+ self.tracks = None
+ self.track_data = None
+ self.boxes = []
+ self.clss = []
+ self.track_ids = []
+ self.track_line = None
+ self.r_s = None
+
+ # Load config and update with args
+ DEFAULT_SOL_DICT.update(kwargs)
+ DEFAULT_CFG_DICT.update(kwargs)
+ self.CFG = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT}
+ LOGGER.info(f"Ultralytics Solutions: โ {DEFAULT_SOL_DICT}")
+
+ self.region = self.CFG["region"] # Store region data for other classes usage
+ self.line_width = (
+ self.CFG["line_width"] if self.CFG["line_width"] is not None else 2
+ ) # Store line_width for usage
+
+ # Load Model and store classes names
+ if self.CFG["model"] is None:
+ self.CFG["model"] = "yolo11n.pt"
+ self.model = YOLO(self.CFG["model"])
+ self.names = self.model.names
+
+ self.track_add_args = { # Tracker additional arguments for advance configuration
+ k: self.CFG[k] for k in ["verbose", "iou", "conf", "device", "max_det", "half", "tracker"]
+ }
+
+ if IS_CLI and self.CFG["source"] is None:
+ d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
+ LOGGER.warning(f"โ ๏ธ WARNING: source not provided. using default source {ASSETS_URL}/{d_s}")
+ from ultralytics.utils.downloads import safe_download
+
+ safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets
+ self.CFG["source"] = d_s # set default source
+
+ # Initialize environment and region setup
+ self.env_check = check_imshow(warn=True)
+ self.track_history = defaultdict(list)
+
+ def extract_tracks(self, im0):
+ """
+ Applies object tracking and extracts tracks from an input image or frame.
+
+ Args:
+ im0 (ndarray): The input image or frame.
+
+ Examples:
+ >>> solution = BaseSolution()
+ >>> frame = cv2.imread("path/to/image.jpg")
+ >>> solution.extract_tracks(frame)
+ """
+ self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)
+
+ # Extract tracks for OBB or object detection
+ self.track_data = self.tracks[0].obb or self.tracks[0].boxes
+
+ if self.track_data and self.track_data.id is not None:
+ self.boxes = self.track_data.xyxy.cpu()
+ self.clss = self.track_data.cls.cpu().tolist()
+ self.track_ids = self.track_data.id.int().cpu().tolist()
+ else:
+ LOGGER.warning("WARNING โ ๏ธ no tracks found!")
+ self.boxes, self.clss, self.track_ids = [], [], []
+
+ def store_tracking_history(self, track_id, box):
+ """
+ Stores the tracking history of an object.
+
+ This method updates the tracking history for a given object by appending the center point of its
+ bounding box to the track line. It maintains a maximum of 30 points in the tracking history.
+
+ Args:
+ track_id (int): The unique identifier for the tracked object.
+ box (List[float]): The bounding box coordinates of the object in the format [x1, y1, x2, y2].
+
+ Examples:
+ >>> solution = BaseSolution()
+ >>> solution.store_tracking_history(1, [100, 200, 300, 400])
+ """
+ # Store tracking history
+ self.track_line = self.track_history[track_id]
+ self.track_line.append(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
+ if len(self.track_line) > 30:
+ self.track_line.pop(0)
+
+ def initialize_region(self):
+ """Initialize the counting region and line segment based on configuration settings."""
+ if self.region is None:
+ self.region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
+ self.r_s = (
+ self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
+ ) # region or line
+
+ def display_output(self, im0):
+ """
+ Display the results of the processing, which could involve showing frames, printing counts, or saving results.
+
+ This method is responsible for visualizing the output of the object detection and tracking process. It displays
+ the processed frame with annotations, and allows for user interaction to close the display.
+
+ Args:
+ im0 (numpy.ndarray): The input image or frame that has been processed and annotated.
+
+ Examples:
+ >>> solution = BaseSolution()
+ >>> frame = cv2.imread("path/to/image.jpg")
+ >>> solution.display_output(frame)
+
+ Notes:
+ - This method will only display output if the 'show' configuration is set to True and the environment
+ supports image display.
+ - The display can be closed by pressing the 'q' key.
+ """
+ if self.CFG.get("show") and self.env_check:
+ cv2.imshow("Ultralytics Solutions", im0)
+ if cv2.waitKey(1) & 0xFF == ord("q"):
+ return
diff --git a/ultralytics/solutions/speed_estimation.py b/ultralytics/solutions/speed_estimation.py
index 70964241fd4..43eaceceb27 100644
--- a/ultralytics/solutions/speed_estimation.py
+++ b/ultralytics/solutions/speed_estimation.py
@@ -1,116 +1,110 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-from collections import defaultdict
from time import time
-import cv2
import numpy as np
-from ultralytics.utils.checks import check_imshow
+from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator, colors
-class SpeedEstimator:
- """A class to estimate the speed of objects in a real-time video stream based on their tracks."""
+class SpeedEstimator(BaseSolution):
+ """
+ A class to estimate the speed of objects in a real-time video stream based on their tracks.
- def __init__(self, names, reg_pts=None, view_img=False, line_thickness=2, spdl_dist_thresh=10):
- """
- Initializes the SpeedEstimator with the given parameters.
+ This class extends the BaseSolution class and provides functionality for estimating object speeds using
+ tracking data in video streams.
- Args:
- names (dict): Dictionary of class names.
- reg_pts (list, optional): List of region points for speed estimation. Defaults to [(20, 400), (1260, 400)].
- view_img (bool, optional): Whether to display the image with annotations. Defaults to False.
- line_thickness (int, optional): Thickness of the lines for drawing boxes and tracks. Defaults to 2.
- spdl_dist_thresh (int, optional): Distance threshold for speed calculation. Defaults to 10.
- """
- # Region information
- self.reg_pts = reg_pts if reg_pts is not None else [(20, 400), (1260, 400)]
+ Attributes:
+ spd (Dict[int, float]): Dictionary storing speed data for tracked objects.
+ trkd_ids (List[int]): List of tracked object IDs that have already been speed-estimated.
+ trk_pt (Dict[int, float]): Dictionary storing previous timestamps for tracked objects.
+ trk_pp (Dict[int, Tuple[float, float]]): Dictionary storing previous positions for tracked objects.
+ annotator (Annotator): Annotator object for drawing on images.
+ region (List[Tuple[int, int]]): List of points defining the speed estimation region.
+ track_line (List[Tuple[float, float]]): List of points representing the object's track.
+ r_s (LineString): LineString object representing the speed estimation region.
+
+ Methods:
+ initialize_region: Initializes the speed estimation region.
+ estimate_speed: Estimates the speed of objects based on tracking data.
+ store_tracking_history: Stores the tracking history for an object.
+ extract_tracks: Extracts tracks from the current frame.
+ display_output: Displays the output with annotations.
- self.names = names # Classes names
+ Examples:
+ >>> estimator = SpeedEstimator()
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = estimator.estimate_speed(frame)
+ >>> cv2.imshow("Speed Estimation", processed_frame)
+ """
- # Tracking information
- self.trk_history = defaultdict(list)
+ def __init__(self, **kwargs):
+ """Initializes the SpeedEstimator object with speed estimation parameters and data structures."""
+ super().__init__(**kwargs)
+
+ self.initialize_region() # Initialize speed region
- self.view_img = view_img # bool for displaying inference
- self.tf = line_thickness # line thickness for annotator
self.spd = {} # set for speed data
self.trkd_ids = [] # list for already speed_estimated and tracked ID's
- self.spdl = spdl_dist_thresh # Speed line distance threshold
self.trk_pt = {} # set for tracks previous time
self.trk_pp = {} # set for tracks previous point
- # Check if the environment supports imshow
- self.env_check = check_imshow(warn=True)
-
- def estimate_speed(self, im0, tracks):
+ def estimate_speed(self, im0):
"""
Estimates the speed of objects based on tracking data.
Args:
- im0 (ndarray): Image.
- tracks (list): List of tracks obtained from the object tracking process.
+ im0 (np.ndarray): Input image for processing. Shape is typically (H, W, C) for RGB images.
Returns:
- (ndarray): The image with annotated boxes and tracks.
- """
- if tracks[0].boxes.id is None:
- return im0
+ (np.ndarray): Processed image with speed estimations and annotations.
- boxes = tracks[0].boxes.xyxy.cpu()
- clss = tracks[0].boxes.cls.cpu().tolist()
- t_ids = tracks[0].boxes.id.int().cpu().tolist()
- annotator = Annotator(im0, line_width=self.tf)
- annotator.draw_region(reg_pts=self.reg_pts, color=(255, 0, 255), thickness=self.tf * 2)
-
- for box, t_id, cls in zip(boxes, t_ids, clss):
- track = self.trk_history[t_id]
- bbox_center = (float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))
- track.append(bbox_center)
+ Examples:
+ >>> estimator = SpeedEstimator()
+ >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
+ >>> processed_image = estimator.estimate_speed(image)
+ """
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ self.extract_tracks(im0) # Extract tracks
- if len(track) > 30:
- track.pop(0)
+ self.annotator.draw_region(
+ reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2
+ ) # Draw region
- trk_pts = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
+ for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+ self.store_tracking_history(track_id, box) # Store track history
- if t_id not in self.trk_pt:
- self.trk_pt[t_id] = 0
+ # Check if track_id is already in self.trk_pp or trk_pt initialize if not
+ if track_id not in self.trk_pt:
+ self.trk_pt[track_id] = 0
+ if track_id not in self.trk_pp:
+ self.trk_pp[track_id] = self.track_line[-1]
- speed_label = f"{int(self.spd[t_id])} km/h" if t_id in self.spd else self.names[int(cls)]
- bbox_color = colors(int(t_id), True)
+ speed_label = f"{int(self.spd[track_id])} km/h" if track_id in self.spd else self.names[int(cls)]
+ self.annotator.box_label(box, label=speed_label, color=colors(track_id, True)) # Draw bounding box
- annotator.box_label(box, speed_label, bbox_color)
- cv2.polylines(im0, [trk_pts], isClosed=False, color=bbox_color, thickness=self.tf)
- cv2.circle(im0, (int(track[-1][0]), int(track[-1][1])), self.tf * 2, bbox_color, -1)
+ # Draw tracks of objects
+ self.annotator.draw_centroid_and_tracks(
+ self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width
+ )
- # Calculation of object speed
- if not self.reg_pts[0][0] < track[-1][0] < self.reg_pts[1][0]:
- return
- if self.reg_pts[1][1] - self.spdl < track[-1][1] < self.reg_pts[1][1] + self.spdl:
- direction = "known"
- elif self.reg_pts[0][1] - self.spdl < track[-1][1] < self.reg_pts[0][1] + self.spdl:
+ # Calculate object speed and direction based on region intersection
+ if self.LineString([self.trk_pp[track_id], self.track_line[-1]]).intersects(self.r_s):
direction = "known"
else:
direction = "unknown"
- if self.trk_pt.get(t_id) != 0 and direction != "unknown" and t_id not in self.trkd_ids:
- self.trkd_ids.append(t_id)
-
- time_difference = time() - self.trk_pt[t_id]
+ # Perform speed calculation and tracking updates if direction is valid
+ if direction == "known" and track_id not in self.trkd_ids:
+ self.trkd_ids.append(track_id)
+ time_difference = time() - self.trk_pt[track_id]
if time_difference > 0:
- self.spd[t_id] = np.abs(track[-1][1] - self.trk_pp[t_id][1]) / time_difference
-
- self.trk_pt[t_id] = time()
- self.trk_pp[t_id] = track[-1]
-
- if self.view_img and self.env_check:
- cv2.imshow("Ultralytics Speed Estimation", im0)
- if cv2.waitKey(1) & 0xFF == ord("q"):
- return
+ self.spd[track_id] = np.abs(self.track_line[-1][1] - self.trk_pp[track_id][1]) / time_difference
- return im0
+ self.trk_pt[track_id] = time()
+ self.trk_pp[track_id] = self.track_line[-1]
+ self.display_output(im0) # display output with base class function
-if __name__ == "__main__":
- names = {0: "person", 1: "car"} # example class names
- speed_estimator = SpeedEstimator(names)
+ return im0 # return output image for more usage
diff --git a/ultralytics/solutions/streamlit_inference.py b/ultralytics/solutions/streamlit_inference.py
index 85394350dae..50cc2584095 100644
--- a/ultralytics/solutions/streamlit_inference.py
+++ b/ultralytics/solutions/streamlit_inference.py
@@ -1,149 +1,190 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import io
-import time
+from typing import Any
import cv2
-import torch
+from ultralytics import YOLO
+from ultralytics.utils import LOGGER
from ultralytics.utils.checks import check_requirements
from ultralytics.utils.downloads import GITHUB_ASSETS_STEMS
-def inference(model=None):
- """Runs real-time object detection on video input using Ultralytics YOLOv8 in a Streamlit application."""
- check_requirements("streamlit>=1.29.0") # scope imports for faster ultralytics package load speeds
- import streamlit as st
-
- from ultralytics import YOLO
-
- # Hide main menu style
- menu_style_cfg = """"""
-
- # Main title of streamlit application
- main_title_cfg = """
- Experience real-time object detection on your webcam with the power of Ultralytics YOLOv8! ๐
-
"""
-
- # Set html page configuration
- st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide", initial_sidebar_state="auto")
-
- # Append the custom HTML
- st.markdown(menu_style_cfg, unsafe_allow_html=True)
- st.markdown(main_title_cfg, unsafe_allow_html=True)
- st.markdown(sub_title_cfg, unsafe_allow_html=True)
-
- # Add ultralytics logo in sidebar
- with st.sidebar:
- logo = "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg"
- st.image(logo, width=250)
+class Inference:
+ """
+ A class to perform object detection, image classification, image segmentation and pose estimation inference using
+ Streamlit and Ultralytics YOLO models. It provides the functionalities such as loading models, configuring settings,
+ uploading video files, and performing real-time inference.
+
+ Attributes:
+ st (module): Streamlit module for UI creation.
+ temp_dict (dict): Temporary dictionary to store the model path.
+ model_path (str): Path to the loaded model.
+ model (YOLO): The YOLO model instance.
+ source (str): Selected video source.
+ enable_trk (str): Enable tracking option.
+ conf (float): Confidence threshold.
+ iou (float): IoU threshold for non-max suppression.
+ vid_file_name (str): Name of the uploaded video file.
+ selected_ind (list): List of selected class indices.
+
+ Methods:
+ web_ui: Sets up the Streamlit web interface with custom HTML elements.
+ sidebar: Configures the Streamlit sidebar for model and inference settings.
+ source_upload: Handles video file uploads through the Streamlit interface.
+ configure: Configures the model and loads selected classes for inference.
+ inference: Performs real-time object detection inference.
+
+ Examples:
+ >>> inf = solutions.Inference(model="path/to/model.pt") # Model is not necessary argument.
+ >>> inf.inference()
+ """
+
+ def __init__(self, **kwargs: Any):
+ """
+ Initializes the Inference class, checking Streamlit requirements and setting up the model path.
+
+ Args:
+ **kwargs (Any): Additional keyword arguments for model configuration.
+ """
+ check_requirements("streamlit>=1.29.0") # scope imports for faster ultralytics package load speeds
+ import streamlit as st
+
+ self.st = st # Reference to the Streamlit class instance
+ self.source = None # Placeholder for video or webcam source details
+ self.enable_trk = False # Flag to toggle object tracking
+ self.conf = 0.25 # Confidence threshold for detection
+ self.iou = 0.45 # Intersection-over-Union (IoU) threshold for non-maximum suppression
+ self.org_frame = None # Container for the original frame to be displayed
+ self.ann_frame = None # Container for the annotated frame to be displayed
+ self.vid_file_name = None # Holds the name of the video file
+ self.selected_ind = [] # List of selected classes for detection or tracking
+ self.model = None # Container for the loaded model instance
+
+ self.temp_dict = {"model": None, **kwargs}
+ self.model_path = None # Store model file name with path
+ if self.temp_dict["model"] is not None:
+ self.model_path = self.temp_dict["model"]
+
+ LOGGER.info(f"Ultralytics Solutions: โ {self.temp_dict}")
+
+ def web_ui(self):
+ """Sets up the Streamlit web interface with custom HTML elements."""
+ menu_style_cfg = """""" # Hide main menu style
+
+ # Main title of streamlit application
+ main_title_cfg = """
Experience real-time object detection on your webcam with the power
+ of Ultralytics YOLO! ๐
"""
+
+ # Set html page configuration and append custom HTML
+ self.st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide")
+ self.st.markdown(menu_style_cfg, unsafe_allow_html=True)
+ self.st.markdown(main_title_cfg, unsafe_allow_html=True)
+ self.st.markdown(sub_title_cfg, unsafe_allow_html=True)
+
+ def sidebar(self):
+ """Configures the Streamlit sidebar for model and inference settings."""
+ with self.st.sidebar: # Add Ultralytics LOGO
+ logo = "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg"
+ self.st.image(logo, width=250)
+
+ self.st.sidebar.title("User Configuration") # Add elements to vertical setting menu
+ self.source = self.st.sidebar.selectbox(
+ "Video",
+ ("webcam", "video"),
+ ) # Add source selection dropdown
+ self.enable_trk = self.st.sidebar.radio("Enable Tracking", ("Yes", "No")) # Enable object tracking
+ self.conf = float(
+ self.st.sidebar.slider("Confidence Threshold", 0.0, 1.0, self.conf, 0.01)
+ ) # Slider for confidence
+ self.iou = float(self.st.sidebar.slider("IoU Threshold", 0.0, 1.0, self.iou, 0.01)) # Slider for NMS threshold
+
+ col1, col2 = self.st.columns(2)
+ self.org_frame = col1.empty()
+ self.ann_frame = col2.empty()
+
+ def source_upload(self):
+ """Handles video file uploads through the Streamlit interface."""
+ self.vid_file_name = ""
+ if self.source == "video":
+ vid_file = self.st.sidebar.file_uploader("Upload Video File", type=["mp4", "mov", "avi", "mkv"])
+ if vid_file is not None:
+ g = io.BytesIO(vid_file.read()) # BytesIO Object
+ with open("ultralytics.mp4", "wb") as out: # Open temporary file as bytes
+ out.write(g.read()) # Read bytes into file
+ self.vid_file_name = "ultralytics.mp4"
+ elif self.source == "webcam":
+ self.vid_file_name = 0
+
+ def configure(self):
+ """Configures the model and loads selected classes for inference."""
+ # Add dropdown menu for model selection
+ available_models = [x.replace("yolo", "YOLO") for x in GITHUB_ASSETS_STEMS if x.startswith("yolo11")]
+ if self.model_path: # If user provided the custom model, insert model without suffix as *.pt is added later
+ available_models.insert(0, self.model_path.split(".pt")[0])
+ selected_model = self.st.sidebar.selectbox("Model", available_models)
+
+ with self.st.spinner("Model is downloading..."):
+ self.model = YOLO(f"{selected_model.lower()}.pt") # Load the YOLO model
+ class_names = list(self.model.names.values()) # Convert dictionary to list of class names
+ self.st.success("Model loaded successfully!")
+
+ # Multiselect box with class names and get indices of selected classes
+ selected_classes = self.st.sidebar.multiselect("Classes", class_names, default=class_names[:3])
+ self.selected_ind = [class_names.index(option) for option in selected_classes]
+
+ if not isinstance(self.selected_ind, list): # Ensure selected_options is a list
+ self.selected_ind = list(self.selected_ind)
+
+ def inference(self):
+ """Performs real-time object detection inference."""
+ self.web_ui() # Initialize the web interface
+ self.sidebar() # Create the sidebar
+ self.source_upload() # Upload the video source
+ self.configure() # Configure the app
+
+ if self.st.sidebar.button("Start"):
+ stop_button = self.st.button("Stop") # Button to stop the inference
+ cap = cv2.VideoCapture(self.vid_file_name) # Capture the video
+ if not cap.isOpened():
+ self.st.error("Could not open webcam.")
+ while cap.isOpened():
+ success, frame = cap.read()
+ if not success:
+ self.st.warning("Failed to read frame from webcam. Please verify the webcam is connected properly.")
+ break
+
+ # Store model predictions
+ if self.enable_trk == "Yes":
+ results = self.model.track(
+ frame, conf=self.conf, iou=self.iou, classes=self.selected_ind, persist=True
+ )
+ else:
+ results = self.model(frame, conf=self.conf, iou=self.iou, classes=self.selected_ind)
+ annotated_frame = results[0].plot() # Add annotations on frame
+
+ if stop_button:
+ cap.release() # Release the capture
+ self.st.stop() # Stop streamlit app
+
+ self.org_frame.image(frame, channels="BGR") # Display original frame
+ self.ann_frame.image(annotated_frame, channels="BGR") # Display processed frame
+
+ cap.release() # Release the capture
+ cv2.destroyAllWindows() # Destroy window
- # Add elements to vertical setting menu
- st.sidebar.title("User Configuration")
- # Add video source selection dropdown
- source = st.sidebar.selectbox(
- "Video",
- ("webcam", "video"),
- )
-
- vid_file_name = ""
- if source == "video":
- vid_file = st.sidebar.file_uploader("Upload Video File", type=["mp4", "mov", "avi", "mkv"])
- if vid_file is not None:
- g = io.BytesIO(vid_file.read()) # BytesIO Object
- vid_location = "ultralytics.mp4"
- with open(vid_location, "wb") as out: # Open temporary file as bytes
- out.write(g.read()) # Read bytes into file
- vid_file_name = "ultralytics.mp4"
- elif source == "webcam":
- vid_file_name = 0
-
- # Add dropdown menu for model selection
- available_models = [x.replace("yolo", "YOLO") for x in GITHUB_ASSETS_STEMS if x.startswith("yolov8")]
- if model:
- available_models.insert(0, model.split(".pt")[0]) # insert model without suffix as *.pt is added later
-
- selected_model = st.sidebar.selectbox("Model", available_models)
- with st.spinner("Model is downloading..."):
- model = YOLO(f"{selected_model.lower()}.pt") # Load the YOLO model
- class_names = list(model.names.values()) # Convert dictionary to list of class names
- st.success("Model loaded successfully!")
-
- # Multiselect box with class names and get indices of selected classes
- selected_classes = st.sidebar.multiselect("Classes", class_names, default=class_names[:3])
- selected_ind = [class_names.index(option) for option in selected_classes]
-
- if not isinstance(selected_ind, list): # Ensure selected_options is a list
- selected_ind = list(selected_ind)
-
- enable_trk = st.sidebar.radio("Enable Tracking", ("Yes", "No"))
- conf = float(st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.25, 0.01))
- iou = float(st.sidebar.slider("IoU Threshold", 0.0, 1.0, 0.45, 0.01))
-
- col1, col2 = st.columns(2)
- org_frame = col1.empty()
- ann_frame = col2.empty()
-
- fps_display = st.sidebar.empty() # Placeholder for FPS display
-
- if st.sidebar.button("Start"):
- videocapture = cv2.VideoCapture(vid_file_name) # Capture the video
-
- if not videocapture.isOpened():
- st.error("Could not open webcam.")
-
- stop_button = st.button("Stop") # Button to stop the inference
-
- while videocapture.isOpened():
- success, frame = videocapture.read()
- if not success:
- st.warning("Failed to read frame from webcam. Please make sure the webcam is connected properly.")
- break
-
- prev_time = time.time()
-
- # Store model predictions
- if enable_trk == "Yes":
- results = model.track(frame, conf=conf, iou=iou, classes=selected_ind, persist=True)
- else:
- results = model(frame, conf=conf, iou=iou, classes=selected_ind)
- annotated_frame = results[0].plot() # Add annotations on frame
-
- # Calculate model FPS
- curr_time = time.time()
- fps = 1 / (curr_time - prev_time)
- prev_time = curr_time
-
- # display frame
- org_frame.image(frame, channels="BGR")
- ann_frame.image(annotated_frame, channels="BGR")
-
- if stop_button:
- videocapture.release() # Release the capture
- torch.cuda.empty_cache() # Clear CUDA memory
- st.stop() # Stop streamlit app
-
- # Display FPS in sidebar
- fps_display.metric("FPS", f"{fps:.2f}")
-
- # Release the capture
- videocapture.release()
-
- # Clear CUDA memory
- torch.cuda.empty_cache()
-
- # Destroy window
- cv2.destroyAllWindows()
-
-
-# Main function call
if __name__ == "__main__":
- inference()
+ import sys # Import the sys module for accessing command-line arguments
+
+ # Check if a model name is provided as a command-line argument
+ args = len(sys.argv)
+ model = sys.argv[1] if args > 1 else None # assign first argument as the model name
+ # Create an instance of the Inference class and run inference
+ Inference(model=model).inference()
diff --git a/ultralytics/solutions/trackzone.py b/ultralytics/solutions/trackzone.py
new file mode 100644
index 00000000000..b1d32f2d8e5
--- /dev/null
+++ b/ultralytics/solutions/trackzone.py
@@ -0,0 +1,68 @@
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class TrackZone(BaseSolution):
+ """
+ A class to manage region-based object tracking in a video stream.
+
+ This class extends the BaseSolution class and provides functionality for tracking objects within a specific region
+ defined by a polygonal area. Objects outside the region are excluded from tracking. It supports dynamic initialization
+ of the region, allowing either a default region or a user-specified polygon.
+
+ Attributes:
+ region (ndarray): The polygonal region for tracking, represented as a convex hull.
+
+ Methods:
+ trackzone: Processes each frame of the video, applying region-based tracking.
+
+ Examples:
+ >>> tracker = TrackZone()
+ >>> frame = cv2.imread("frame.jpg")
+ >>> processed_frame = tracker.trackzone(frame)
+ >>> cv2.imshow("Tracked Frame", processed_frame)
+ """
+
+ def __init__(self, **kwargs):
+ """Initializes the TrackZone class for tracking objects within a defined region in video streams."""
+ super().__init__(**kwargs)
+ default_region = [(150, 150), (1130, 150), (1130, 570), (150, 570)]
+ self.region = cv2.convexHull(np.array(self.region or default_region, dtype=np.int32))
+
+ def trackzone(self, im0):
+ """
+ Processes the input frame to track objects within a defined region.
+
+ This method initializes the annotator, creates a mask for the specified region, extracts tracks
+ only from the masked area, and updates tracking information. Objects outside the region are ignored.
+
+ Args:
+ im0 (numpy.ndarray): The input image or frame to be processed.
+
+ Returns:
+ (numpy.ndarray): The processed image with tracking id and bounding boxes annotations.
+
+ Examples:
+ >>> tracker = TrackZone()
+ >>> frame = cv2.imread("path/to/image.jpg")
+ >>> tracker.trackzone(frame)
+ """
+ self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator
+ # Create a mask for the region and extract tracks from the masked image
+ masked_frame = cv2.bitwise_and(im0, im0, mask=cv2.fillPoly(np.zeros_like(im0[:, :, 0]), [self.region], 255))
+ self.extract_tracks(masked_frame)
+
+ cv2.polylines(im0, [self.region], isClosed=True, color=(255, 255, 255), thickness=self.line_width * 2)
+
+ # Iterate over boxes, track ids, classes indexes list and draw bounding boxes
+ for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+ self.annotator.box_label(box, label=f"{self.names[cls]}:{track_id}", color=colors(track_id, True))
+
+ self.display_output(im0) # display output with base class function
+
+ return im0 # return output image for more usage
diff --git a/ultralytics/trackers/README.md b/ultralytics/trackers/README.md
index d7bc855814d..3743d5374c5 100644
--- a/ultralytics/trackers/README.md
+++ b/ultralytics/trackers/README.md
@@ -13,7 +13,7 @@ The output from Ultralytics trackers is consistent with standard object detectio
- **Ease of Use:** Simple Python API and CLI options for quick integration and deployment.
- **Customizability:** Easy to use with custom trained YOLO models, allowing integration into domain-specific applications.
-**Video Tutorial:** [Object Detection and Tracking with Ultralytics YOLOv8](https://www.youtube.com/embed/hHyHmOtmEgs?si=VNZtXmm45Nb9s-N-).
+**Video Tutorial:** [Object Detection and Tracking with Ultralytics YOLO](https://www.youtube.com/embed/hHyHmOtmEgs?si=VNZtXmm45Nb9s-N-).
## Features at a Glance
@@ -34,7 +34,7 @@ The default tracker is BoT-SORT.
## Tracking
-To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLOv8n, YOLOv8n-seg and YOLOv8n-pose.
+To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLO11n, YOLO11n-seg and YOLO11n-pose.
#### Python
@@ -42,9 +42,9 @@ To run the tracker on video streams, use a trained Detect, Segment or Pose model
from ultralytics import YOLO
# Load an official or custom model
-model = YOLO("yolov8n.pt") # Load an official Detect model
-model = YOLO("yolov8n-seg.pt") # Load an official Segment model
-model = YOLO("yolov8n-pose.pt") # Load an official Pose model
+model = YOLO("yolo11n.pt") # Load an official Detect model
+model = YOLO("yolo11n-seg.pt") # Load an official Segment model
+model = YOLO("yolo11n-pose.pt") # Load an official Pose model
model = YOLO("path/to/best.pt") # Load a custom trained model
# Perform tracking with the model
@@ -58,9 +58,9 @@ results = model.track(
```bash
# Perform tracking with various models using the command line interface
-yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model
-yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model
-yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model
+yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model
+yolo track model=yolo11n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model
+yolo track model=yolo11n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model
yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Custom trained model
# Track using ByteTrack tracker
@@ -81,7 +81,7 @@ Tracking configuration shares properties with Predict mode, such as `conf`, `iou
from ultralytics import YOLO
# Configure the tracking parameters and run the tracker
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True)
```
@@ -89,7 +89,7 @@ results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5,
```bash
# Configure tracking parameters and run the tracker using the command line interface
-yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show
+yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show
```
### Tracker Selection
@@ -102,7 +102,7 @@ Ultralytics also allows you to use a modified tracker configuration file. To do
from ultralytics import YOLO
# Load the model and run the tracker with a custom configuration file
-model = YOLO("yolov8n.pt")
+model = YOLO("yolo11n.pt")
results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tracker.yaml")
```
@@ -110,7 +110,7 @@ results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tra
```bash
# Load the model and run the tracker with a custom configuration file using the command line interface
-yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml'
+yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml'
```
For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) page.
@@ -119,7 +119,7 @@ For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/tr
### Persisting Tracks Loop
-Here is a Python script using OpenCV (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker than the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image.
+Here is a Python script using OpenCV (`cv2`) and YOLO11 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker than the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image.
#### Python
@@ -128,8 +128,8 @@ import cv2
from ultralytics import YOLO
-# Load the YOLOv8 model
-model = YOLO("yolov8n.pt")
+# Load the YOLO11 model
+model = YOLO("yolo11n.pt")
# Open the video file
video_path = "path/to/video.mp4"
@@ -141,14 +141,14 @@ while cap.isOpened():
success, frame = cap.read()
if success:
- # Run YOLOv8 tracking on the frame, persisting tracks between frames
+ # Run YOLO11 tracking on the frame, persisting tracks between frames
results = model.track(frame, persist=True)
# Visualize the results on the frame
annotated_frame = results[0].plot()
# Display the annotated frame
- cv2.imshow("YOLOv8 Tracking", annotated_frame)
+ cv2.imshow("YOLO11 Tracking", annotated_frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord("q"):
@@ -166,9 +166,9 @@ Please note the change from `model(frame)` to `model.track(frame)`, which enable
### Plotting Tracks Over Time
-Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLOv8, plotting these tracks is a seamless and efficient process.
+Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLO11, plotting these tracks is a seamless and efficient process.
-In the following example, we demonstrate how to utilize YOLOv8's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects.
+In the following example, we demonstrate how to utilize YOLO11's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects.
#### Python
@@ -180,8 +180,8 @@ import numpy as np
from ultralytics import YOLO
-# Load the YOLOv8 model
-model = YOLO("yolov8n.pt")
+# Load the YOLO11 model
+model = YOLO("yolo11n.pt")
# Open the video file
video_path = "path/to/video.mp4"
@@ -196,7 +196,7 @@ while cap.isOpened():
success, frame = cap.read()
if success:
- # Run YOLOv8 tracking on the frame, persisting tracks between frames
+ # Run YOLO11 tracking on the frame, persisting tracks between frames
results = model.track(frame, persist=True)
# Get the boxes and track IDs
@@ -225,7 +225,7 @@ while cap.isOpened():
)
# Display the annotated frame
- cv2.imshow("YOLOv8 Tracking", annotated_frame)
+ cv2.imshow("YOLO11 Tracking", annotated_frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord("q"):
@@ -247,7 +247,7 @@ In the provided Python script, we make use of Python's `threading` module to run
To ensure that each thread receives the correct parameters (the video file and the model to use), we define a function `run_tracker_in_thread` that accepts these parameters and contains the main tracking loop. This function reads the video frame by frame, runs the tracker, and displays the results.
-Two different models are used in this example: `yolov8n.pt` and `yolov8n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`.
+Two different models are used in this example: `yolo11n.pt` and `yolo11n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`.
The `daemon=True` parameter in `threading.Thread` means that these threads will be closed as soon as the main program finishes. We then start the threads with `start()` and use `join()` to make the main thread wait until both tracker threads have finished.
@@ -278,8 +278,8 @@ def run_tracker_in_thread(filename, model):
# Load the models
-model1 = YOLO("yolov8n.pt")
-model2 = YOLO("yolov8n-seg.pt")
+model1 = YOLO("yolo11n.pt")
+model2 = YOLO("yolo11n-seg.pt")
# Define the video files for the trackers
video_file1 = "path/to/video1.mp4"
diff --git a/ultralytics/trackers/__init__.py b/ultralytics/trackers/__init__.py
index bf51b8df699..2919511ba50 100644
--- a/ultralytics/trackers/__init__.py
+++ b/ultralytics/trackers/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .bot_sort import BOTSORT
from .byte_tracker import BYTETracker
diff --git a/ultralytics/trackers/basetrack.py b/ultralytics/trackers/basetrack.py
index f3baaf4e4be..47b27269e2a 100644
--- a/ultralytics/trackers/basetrack.py
+++ b/ultralytics/trackers/basetrack.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Module defines the base classes and structures for object tracking in YOLO."""
from collections import OrderedDict
@@ -44,7 +44,7 @@ class BaseTrack:
start_frame (int): The frame number where tracking started.
frame_id (int): The most recent frame ID processed by the track.
time_since_update (int): Frames passed since the last update.
- location (Tuple): The location of the object in the context of multi-camera tracking.
+ location (tuple): The location of the object in the context of multi-camera tracking.
Methods:
end_frame: Returns the ID of the last frame where the object was tracked.
diff --git a/ultralytics/trackers/bot_sort.py b/ultralytics/trackers/bot_sort.py
index 1f10dc7f59e..8a2d02e0e14 100644
--- a/ultralytics/trackers/bot_sort.py
+++ b/ultralytics/trackers/bot_sort.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from collections import deque
diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py
index 31637de920c..807f4ad6678 100644
--- a/ultralytics/trackers/byte_tracker.py
+++ b/ultralytics/trackers/byte_tracker.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import numpy as np
diff --git a/ultralytics/trackers/track.py b/ultralytics/trackers/track.py
index b0103cf98e2..6e422f0db8a 100644
--- a/ultralytics/trackers/track.py
+++ b/ultralytics/trackers/track.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from functools import partial
from pathlib import Path
@@ -31,6 +31,9 @@ def on_predict_start(predictor: object, persist: bool = False) -> None:
>>> predictor = SomePredictorClass()
>>> on_predict_start(predictor, persist=True)
"""
+ if predictor.args.task == "classify":
+ raise ValueError("โ Classification doesn't support 'mode=track'")
+
if hasattr(predictor, "trackers") and persist:
return
diff --git a/ultralytics/trackers/utils/__init__.py b/ultralytics/trackers/utils/__init__.py
index 9e68dc12245..77a19dcf0f8 100644
--- a/ultralytics/trackers/utils/__init__.py
+++ b/ultralytics/trackers/utils/__init__.py
@@ -1 +1 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
diff --git a/ultralytics/trackers/utils/gmc.py b/ultralytics/trackers/utils/gmc.py
index 3619057fa72..e3cd2dc88ca 100644
--- a/ultralytics/trackers/utils/gmc.py
+++ b/ultralytics/trackers/utils/gmc.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import copy
@@ -26,9 +26,9 @@ class GMC:
Methods:
__init__: Initializes a GMC object with the specified method and downscale factor.
apply: Applies the chosen method to a raw frame and optionally uses provided detections.
- applyEcc: Applies the ECC algorithm to a raw frame.
- applyFeatures: Applies feature-based methods like ORB or SIFT to a raw frame.
- applySparseOptFlow: Applies the Sparse Optical Flow method to a raw frame.
+ apply_ecc: Applies the ECC algorithm to a raw frame.
+ apply_features: Applies feature-based methods like ORB or SIFT to a raw frame.
+ apply_sparseoptflow: Applies the Sparse Optical Flow method to a raw frame.
reset_params: Resets the internal parameters of the GMC object.
Examples:
@@ -108,15 +108,15 @@ def apply(self, raw_frame: np.array, detections: list = None) -> np.array:
(480, 640, 3)
"""
if self.method in {"orb", "sift"}:
- return self.applyFeatures(raw_frame, detections)
+ return self.apply_features(raw_frame, detections)
elif self.method == "ecc":
- return self.applyEcc(raw_frame)
+ return self.apply_ecc(raw_frame)
elif self.method == "sparseOptFlow":
- return self.applySparseOptFlow(raw_frame)
+ return self.apply_sparseoptflow(raw_frame)
else:
return np.eye(2, 3)
- def applyEcc(self, raw_frame: np.array) -> np.array:
+ def apply_ecc(self, raw_frame: np.array) -> np.array:
"""
Apply the ECC (Enhanced Correlation Coefficient) algorithm to a raw frame for motion compensation.
@@ -128,7 +128,7 @@ def applyEcc(self, raw_frame: np.array) -> np.array:
Examples:
>>> gmc = GMC(method="ecc")
- >>> processed_frame = gmc.applyEcc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
+ >>> processed_frame = gmc.apply_ecc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
>>> print(processed_frame)
[[1. 0. 0.]
[0. 1. 0.]]
@@ -161,7 +161,7 @@ def applyEcc(self, raw_frame: np.array) -> np.array:
return H
- def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.array:
+ def apply_features(self, raw_frame: np.array, detections: list = None) -> np.array:
"""
Apply feature-based methods like ORB or SIFT to a raw frame.
@@ -175,7 +175,7 @@ def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.arra
Examples:
>>> gmc = GMC(method="orb")
>>> raw_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
- >>> processed_frame = gmc.applyFeatures(raw_frame)
+ >>> processed_frame = gmc.apply_features(raw_frame)
>>> print(processed_frame.shape)
(2, 3)
"""
@@ -304,7 +304,7 @@ def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.arra
return H
- def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
+ def apply_sparseoptflow(self, raw_frame: np.array) -> np.array:
"""
Apply Sparse Optical Flow method to a raw frame.
@@ -316,7 +316,7 @@ def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
Examples:
>>> gmc = GMC()
- >>> result = gmc.applySparseOptFlow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
+ >>> result = gmc.apply_sparseoptflow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
>>> print(result)
[[1. 0. 0.]
[0. 1. 0.]]
diff --git a/ultralytics/trackers/utils/kalman_filter.py b/ultralytics/trackers/utils/kalman_filter.py
index d103d0bb112..8a212ba63a9 100644
--- a/ultralytics/trackers/utils/kalman_filter.py
+++ b/ultralytics/trackers/utils/kalman_filter.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import numpy as np
import scipy.linalg
diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py
index f969f1126af..f15f64df185 100644
--- a/ultralytics/trackers/utils/matching.py
+++ b/ultralytics/trackers/utils/matching.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import numpy as np
import scipy
@@ -13,7 +13,7 @@
except (ImportError, AssertionError, AttributeError):
from ultralytics.utils.checks import check_requirements
- check_requirements("lapx>=0.5.2") # update to lap package from https://github.com/rathaROG/lapx
+ check_requirements("lap>=0.5.12") # https://github.com/gatagat/lap
import lap
@@ -27,10 +27,9 @@ def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = Tr
use_lap (bool): Use lap.lapjv for the assignment. If False, scipy.optimize.linear_sum_assignment is used.
Returns:
- (tuple): A tuple containing:
- - matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches.
- - unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,).
- - unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,).
+ matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches.
+ unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,).
+ unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,).
Examples:
>>> cost_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
@@ -56,8 +55,8 @@ def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = Tr
unmatched_a = list(np.arange(cost_matrix.shape[0]))
unmatched_b = list(np.arange(cost_matrix.shape[1]))
else:
- unmatched_a = list(set(np.arange(cost_matrix.shape[0])) - set(matches[:, 0]))
- unmatched_b = list(set(np.arange(cost_matrix.shape[1])) - set(matches[:, 1]))
+ unmatched_a = list(frozenset(np.arange(cost_matrix.shape[0])) - frozenset(matches[:, 0]))
+ unmatched_b = list(frozenset(np.arange(cost_matrix.shape[1])) - frozenset(matches[:, 1]))
return matches, unmatched_a, unmatched_b
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index 02610b88764..3afa07a973d 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import contextlib
import importlib.metadata
@@ -12,19 +12,20 @@
import sys
import threading
import time
-import urllib
import uuid
+import warnings
from pathlib import Path
from threading import Lock
from types import SimpleNamespace
from typing import Union
+from urllib.parse import unquote
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
+import tqdm
import yaml
-from tqdm import tqdm as tqdm_original
from ultralytics import __version__
@@ -37,7 +38,9 @@
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLO
ASSETS = ROOT / "assets" # default images
+ASSETS_URL = "https://github.com/ultralytics/assets/releases/download/v0.0.0" # assets GitHub URL
DEFAULT_CFG_PATH = ROOT / "cfg/default.yaml"
+DEFAULT_SOL_CFG_PATH = ROOT / "cfg/solutions/default.yaml" # Ultralytics solutions yaml path
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLO multiprocessing threads
AUTOINSTALL = str(os.getenv("YOLO_AUTOINSTALL", True)).lower() == "true" # global auto-install mode
VERBOSE = str(os.getenv("YOLO_VERBOSE", True)).lower() == "true" # global verbose mode
@@ -49,6 +52,20 @@
TORCH_VERSION = torch.__version__
TORCHVISION_VERSION = importlib.metadata.version("torchvision") # faster than importing torchvision
IS_VSCODE = os.environ.get("TERM_PROGRAM", False) == "vscode"
+RKNN_CHIPS = frozenset(
+ {
+ "rk3588",
+ "rk3576",
+ "rk3566",
+ "rk3568",
+ "rk3562",
+ "rv1103",
+ "rv1106",
+ "rv1103b",
+ "rv1106b",
+ "rk2118",
+ }
+) # Rockchip processors available for export
HELP_MSG = """
Examples for running Ultralytics:
@@ -61,8 +78,8 @@
from ultralytics import YOLO
# Load a model
- model = YOLO("yolov8n.yaml") # build a new model from scratch
- model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)
+ model = YOLO("yolo11n.yaml") # build a new model from scratch
+ model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training)
# Use the model
results = model.train(data="coco8.yaml", epochs=3) # train the model
@@ -77,21 +94,21 @@
yolo TASK MODE ARGS
Where TASK (optional) is one of [detect, segment, classify, pose, obb]
- MODE (required) is one of [train, val, predict, export, benchmark]
+ MODE (required) is one of [train, val, predict, export, track, benchmark]
ARGS (optional) are any number of custom "arg=value" pairs like "imgsz=320" that override defaults.
See all ARGS at https://docs.ultralytics.com/usage/cfg or with "yolo cfg"
- Train a detection model for 10 epochs with an initial learning_rate of 0.01
- yolo detect train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01
+ yolo detect train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01
- Predict a YouTube video using a pretrained segmentation model at image size 320:
- yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
+ yolo segment predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
- Val a pretrained detection model at batch-size 1 and image size 640:
- yolo detect val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640
+ yolo detect val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640
- - Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
- yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
+ - Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required)
+ yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128
- Run special commands:
yolo help
@@ -111,12 +128,16 @@
np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ["NUMEXPR_MAX_THREADS"] = str(NUM_THREADS) # NumExpr max threads
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # for deterministic training to avoid CUDA warning
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # suppress verbose TF compiler warnings in Colab
os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR" # suppress "NNPACK.cpp could not initialize NNPACK" warnings
os.environ["KINETO_LOG_LEVEL"] = "5" # suppress verbose PyTorch profiler output when computing FLOPs
+if TQDM_RICH := str(os.getenv("YOLO_TQDM_RICH", False)).lower() == "true":
+ from tqdm import rich
+
-class TQDM(tqdm_original):
+class TQDM(rich.tqdm if TQDM_RICH else tqdm.tqdm):
"""
A custom TQDM progress bar class that extends the original tqdm functionality.
@@ -159,7 +180,8 @@ def __init__(self, *args, **kwargs):
... # Your code here
... pass
"""
- kwargs["disable"] = not VERBOSE or kwargs.get("disable", False) # logical 'and' with default value if passed
+ warnings.filterwarnings("ignore", category=tqdm.TqdmExperimentalWarning) # suppress tqdm.rich warning
+ kwargs["disable"] = not VERBOSE or kwargs.get("disable", False)
kwargs.setdefault("bar_format", TQDM_BAR_FORMAT) # override default value if passed
super().__init__(*args, **kwargs)
@@ -507,6 +529,7 @@ def yaml_print(yaml_file: Union[str, Path, dict]) -> None:
# Default configuration
DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH)
+DEFAULT_SOL_DICT = yaml_load(DEFAULT_SOL_CFG_PATH) # Ultralytics solutions configuration
for k, v in DEFAULT_CFG_DICT.items():
if isinstance(v, str) and v.lower() == "none":
DEFAULT_CFG_DICT[k] = None
@@ -520,12 +543,9 @@ def read_device_model() -> str:
is_raspberrypi().
Returns:
- (str): Model file contents if read successfully or empty string otherwise.
+ (str): Kernel release information.
"""
- with contextlib.suppress(Exception):
- with open("/proc/device-tree/model") as f:
- return f.read()
- return ""
+ return platform.release().lower()
def is_ubuntu() -> bool:
@@ -535,10 +555,11 @@ def is_ubuntu() -> bool:
Returns:
(bool): True if OS is Ubuntu, False otherwise.
"""
- with contextlib.suppress(FileNotFoundError):
+ try:
with open("/etc/os-release") as f:
return "ID=ubuntu" in f.read()
- return False
+ except FileNotFoundError:
+ return False
def is_colab():
@@ -563,16 +584,26 @@ def is_kaggle():
def is_jupyter():
"""
- Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace.
+ Check if the current script is running inside a Jupyter Notebook.
Returns:
(bool): True if running inside a Jupyter Notebook, False otherwise.
+
+ Note:
+ - Only works on Colab and Kaggle, other environments like Jupyterlab and Paperspace are not reliably detectable.
+ - "get_ipython" in globals() method suffers false positives when IPython package installed manually.
"""
- with contextlib.suppress(Exception):
- from IPython import get_ipython
+ return IS_COLAB or IS_KAGGLE
- return get_ipython() is not None
- return False
+
+def is_runpod():
+ """
+ Check if the current script is running inside a RunPod container.
+
+ Returns:
+ (bool): True if running in RunPod, False otherwise.
+ """
+ return "RUNPOD_POD_ID" in os.environ
def is_docker() -> bool:
@@ -582,10 +613,11 @@ def is_docker() -> bool:
Returns:
(bool): True if the script is running inside a Docker container, False otherwise.
"""
- with contextlib.suppress(Exception):
+ try:
with open("/proc/self/cgroup") as f:
return "docker" in f.read()
- return False
+ except Exception:
+ return False
def is_raspberrypi() -> bool:
@@ -595,18 +627,17 @@ def is_raspberrypi() -> bool:
Returns:
(bool): True if running on a Raspberry Pi, False otherwise.
"""
- return "Raspberry Pi" in PROC_DEVICE_MODEL
+ return "rpi" in DEVICE_MODEL
def is_jetson() -> bool:
"""
- Determines if the Python environment is running on a Jetson Nano or Jetson Orin device by checking the device model
- information.
+ Determines if the Python environment is running on an NVIDIA Jetson device by checking the device model information.
Returns:
- (bool): True if running on a Jetson Nano or Jetson Orin, False otherwise.
+ (bool): True if running on an NVIDIA Jetson device, False otherwise.
"""
- return "NVIDIA" in PROC_DEVICE_MODEL # i.e. "NVIDIA Jetson Nano" or "NVIDIA Orin NX"
+ return "tegra" in DEVICE_MODEL
def is_online() -> bool:
@@ -616,14 +647,15 @@ def is_online() -> bool:
Returns:
(bool): True if connection is successful, False otherwise.
"""
- with contextlib.suppress(Exception):
+ try:
assert str(os.getenv("YOLO_OFFLINE", "")).lower() != "true" # check if ENV var YOLO_OFFLINE="True"
import socket
for dns in ("1.1.1.1", "8.8.8.8"): # check Cloudflare and Google DNS
socket.create_connection(address=(dns, 80), timeout=2.0).close()
return True
- return False
+ except Exception:
+ return False
def is_pip_package(filepath: str = __name__) -> bool:
@@ -710,9 +742,11 @@ def get_git_origin_url():
(str | None): The origin URL of the git repository or None if not git directory.
"""
if IS_GIT_DIR:
- with contextlib.suppress(subprocess.CalledProcessError):
+ try:
origin = subprocess.check_output(["git", "config", "--get", "remote.origin.url"])
return origin.decode().strip()
+ except subprocess.CalledProcessError:
+ return None
def get_git_branch():
@@ -723,9 +757,11 @@ def get_git_branch():
(str | None): The current git branch name or None if not a git directory.
"""
if IS_GIT_DIR:
- with contextlib.suppress(subprocess.CalledProcessError):
+ try:
origin = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"])
return origin.decode().strip()
+ except subprocess.CalledProcessError:
+ return None
def get_default_args(func):
@@ -750,9 +786,11 @@ def get_ubuntu_version():
(str): Ubuntu version or None if not an Ubuntu OS.
"""
if is_ubuntu():
- with contextlib.suppress(FileNotFoundError, AttributeError):
+ try:
with open("/etc/os-release") as f:
return re.search(r'VERSION_ID="(\d+\.\d+)"', f.read())[1]
+ except (FileNotFoundError, AttributeError):
+ return None
def get_user_config_dir(sub_dir="Ultralytics"):
@@ -789,13 +827,13 @@ def get_user_config_dir(sub_dir="Ultralytics"):
# Define constants (required below)
-PROC_DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant
+DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant
ONLINE = is_online()
IS_COLAB = is_colab()
+IS_KAGGLE = is_kaggle()
IS_DOCKER = is_docker()
IS_JETSON = is_jetson()
IS_JUPYTER = is_jupyter()
-IS_KAGGLE = is_kaggle()
IS_PIP_PACKAGE = is_pip_package()
IS_RASPBERRYPI = is_raspberrypi()
GIT_DIR = get_git_dir()
@@ -970,7 +1008,7 @@ def wrapper(*args, **kwargs):
def set_sentry():
"""
Initialize the Sentry SDK for error tracking and reporting. Only used if sentry_sdk package is installed and
- sync=True in settings. Run 'yolo settings' to see and update settings YAML file.
+ sync=True in settings. Run 'yolo settings' to see and update settings.
Conditions required to send errors (ALL conditions must be met or no errors will be reported):
- sentry_sdk package is installed
@@ -982,11 +1020,26 @@ def set_sentry():
- online environment
- CLI used to run package (checked with 'yolo' as the name of the main CLI command)
- The function also configures Sentry SDK to ignore KeyboardInterrupt and FileNotFoundError
- exceptions and to exclude events with 'out of memory' in their exception message.
+ The function also configures Sentry SDK to ignore KeyboardInterrupt and FileNotFoundError exceptions and to exclude
+ events with 'out of memory' in their exception message.
Additionally, the function sets custom tags and user information for Sentry events.
"""
+ if (
+ not SETTINGS["sync"]
+ or RANK not in {-1, 0}
+ or Path(ARGV[0]).name != "yolo"
+ or TESTS_RUNNING
+ or not ONLINE
+ or not IS_PIP_PACKAGE
+ or IS_GIT_DIR
+ ):
+ return
+ # If sentry_sdk package is not installed then return and do not use Sentry
+ try:
+ import sentry_sdk # noqa
+ except ImportError:
+ return
def before_send(event, hint):
"""
@@ -1000,7 +1053,7 @@ def before_send(event, hint):
dict: The modified event or None if the event should not be sent to Sentry.
"""
if "exc_info" in hint:
- exc_type, exc_value, tb = hint["exc_info"]
+ exc_type, exc_value, _ = hint["exc_info"]
if exc_type in {KeyboardInterrupt, FileNotFoundError} or "out of memory" in str(exc_value):
return None # do not send event
@@ -1012,31 +1065,17 @@ def before_send(event, hint):
}
return event
- if (
- SETTINGS["sync"]
- and RANK in {-1, 0}
- and Path(ARGV[0]).name == "yolo"
- and not TESTS_RUNNING
- and ONLINE
- and IS_PIP_PACKAGE
- and not IS_GIT_DIR
- ):
- # If sentry_sdk package is not installed then return and do not use Sentry
- try:
- import sentry_sdk # noqa
- except ImportError:
- return
-
- sentry_sdk.init(
- dsn="https://5ff1556b71594bfea135ff0203a0d290@o4504521589325824.ingest.sentry.io/4504521592406016",
- debug=False,
- traces_sample_rate=1.0,
- release=__version__,
- environment="production", # 'dev' or 'production'
- before_send=before_send,
- ignore_errors=[KeyboardInterrupt, FileNotFoundError],
- )
- sentry_sdk.set_user({"id": SETTINGS["uuid"]}) # SHA-256 anonymized UUID hash
+ sentry_sdk.init(
+ dsn="https://888e5a0778212e1d0314c37d4b9aae5d@o4504521589325824.ingest.us.sentry.io/4504521592406016",
+ debug=False,
+ auto_enabling_integrations=False,
+ traces_sample_rate=1.0,
+ release=__version__,
+ environment="runpod" if is_runpod() else "production",
+ before_send=before_send,
+ ignore_errors=[KeyboardInterrupt, FileNotFoundError],
+ )
+ sentry_sdk.set_user({"id": SETTINGS["uuid"]}) # SHA-256 anonymized UUID hash
class JSONDict(dict):
@@ -1116,7 +1155,8 @@ def __delitem__(self, key):
def __str__(self):
"""Return a pretty-printed JSON string representation of the dictionary."""
- return f'JSONDict("{self.file_path}"):\n{json.dumps(dict(self), indent=2, ensure_ascii=False, default=self._json_default)}'
+ contents = json.dumps(dict(self), indent=2, ensure_ascii=False, default=self._json_default)
+ return f'JSONDict("{self.file_path}"):\n{contents}'
def update(self, *args, **kwargs):
"""Update the dictionary and persist changes."""
@@ -1169,25 +1209,26 @@ def __init__(self, file=SETTINGS_FILE, version="0.0.6"):
self.file = Path(file)
self.version = version
self.defaults = {
- "settings_version": version,
- "datasets_dir": str(datasets_root / "datasets"),
- "weights_dir": str(root / "weights"),
- "runs_dir": str(root / "runs"),
- "uuid": hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(),
- "sync": True,
- "api_key": "",
- "openai_api_key": "",
- "clearml": True, # integrations
- "comet": True,
- "dvc": True,
- "hub": True,
- "mlflow": True,
- "neptune": True,
- "raytune": True,
- "tensorboard": True,
- "wandb": True,
- "vscode_msg": True,
+ "settings_version": version, # Settings schema version
+ "datasets_dir": str(datasets_root / "datasets"), # Datasets directory
+ "weights_dir": str(root / "weights"), # Model weights directory
+ "runs_dir": str(root / "runs"), # Experiment runs directory
+ "uuid": hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(), # SHA-256 anonymized UUID hash
+ "sync": True, # Enable synchronization
+ "api_key": "", # Ultralytics API Key
+ "openai_api_key": "", # OpenAI API Key
+ "clearml": True, # ClearML integration
+ "comet": True, # Comet integration
+ "dvc": True, # DVC integration
+ "hub": True, # Ultralytics HUB integration
+ "mlflow": True, # MLflow integration
+ "neptune": True, # Neptune integration
+ "raytune": True, # Ray Tune integration
+ "tensorboard": True, # TensorBoard logging
+ "wandb": False, # Weights & Biases logging
+ "vscode_msg": True, # VSCode messaging
}
+
self.help_msg = (
f"\nView Ultralytics Settings with 'yolo settings' or at '{self.file}'"
"\nUpdate Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. "
@@ -1205,7 +1246,7 @@ def __init__(self, file=SETTINGS_FILE, version="0.0.6"):
def _validate_settings(self):
"""Validate the current settings and reset if necessary."""
- correct_keys = set(self.keys()) == set(self.defaults.keys())
+ correct_keys = frozenset(self.keys()) == frozenset(self.defaults.keys())
correct_types = all(isinstance(self.get(k), type(v)) for k, v in self.defaults.items())
correct_version = self.get("settings_version", "") == self.version
@@ -1223,14 +1264,23 @@ def _validate_settings(self):
f"Please change one to avoid possible issues during training. {self.help_msg}"
)
+ def __setitem__(self, key, value):
+ """Updates one key: value pair."""
+ self.update({key: value})
+
def update(self, *args, **kwargs):
"""Updates settings, validating keys and types."""
+ for arg in args:
+ if isinstance(arg, dict):
+ kwargs.update(arg)
for k, v in kwargs.items():
if k not in self.defaults:
raise KeyError(f"No Ultralytics setting '{k}'. {self.help_msg}")
t = type(self.defaults[k])
if not isinstance(v, t):
- raise TypeError(f"Ultralytics setting '{k}' must be of type '{t}', not '{type(v)}'. {self.help_msg}")
+ raise TypeError(
+ f"Ultralytics setting '{k}' must be '{t.__name__}' type, not '{type(v).__name__}'. {self.help_msg}"
+ )
super().update(*args, **kwargs)
def reset(self):
@@ -1239,15 +1289,18 @@ def reset(self):
self.update(self.defaults)
-def deprecation_warn(arg, new_arg):
+def deprecation_warn(arg, new_arg=None):
"""Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument."""
- LOGGER.warning(f"WARNING โ ๏ธ '{arg}' is deprecated and will be removed in in the future. Use '{new_arg}' instead.")
+ msg = f"WARNING โ ๏ธ '{arg}' is deprecated and will be removed in in the future."
+ if new_arg is not None:
+ msg += f" Use '{new_arg}' instead."
+ LOGGER.warning(msg)
def clean_url(url):
"""Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt."""
url = Path(url).as_posix().replace(":/", "://") # Pathlib turns :// -> :/, as_posix() for Windows
- return urllib.parse.unquote(url).split("?")[0] # '%2F' to '/', split https://url.com/file.txt?auth
+ return unquote(url).split("?")[0] # '%2F' to '/', split https://url.com/file.txt?auth
def url2file(url):
diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py
index 2d09c5d894e..085001a153c 100644
--- a/ultralytics/utils/autobatch.py
+++ b/ultralytics/utils/autobatch.py
@@ -1,6 +1,7 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch."""
+import os
from copy import deepcopy
import numpy as np
@@ -10,7 +11,7 @@
from ultralytics.utils.torch_utils import autocast, profile
-def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
+def check_train_batch_size(model, imgsz=640, amp=True, batch=-1, max_num_obj=1):
"""
Compute optimal YOLO training batch size using the autobatch() function.
@@ -19,6 +20,7 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
imgsz (int, optional): Image size used for training.
amp (bool, optional): Use automatic mixed precision if True.
batch (float, optional): Fraction of GPU memory to use. If -1, use default.
+ max_num_obj (int, optional): The maximum number of objects from dataset.
Returns:
(int): Optimal batch size computed using the autobatch() function.
@@ -28,10 +30,12 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
Otherwise, a default fraction of 0.6 is used.
"""
with autocast(enabled=amp):
- return autobatch(deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6)
+ return autobatch(
+ deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6, max_num_obj=max_num_obj
+ )
-def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
+def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch, max_num_obj=1):
"""
Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory.
@@ -40,6 +44,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640.
fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60.
batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16.
+ max_num_obj (int, optional): The maximum number of objects from dataset.
Returns:
(int): The optimal batch size.
@@ -57,7 +62,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
# Inspect CUDA memory
gb = 1 << 30 # bytes to GiB (1024 ** 3)
- d = str(device).upper() # 'CUDA:0'
+ d = f"CUDA:{os.getenv('CUDA_VISIBLE_DEVICES', '0').strip()[0]}" # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
t = properties.total_memory / gb # GiB total
r = torch.cuda.memory_reserved(device) / gb # GiB reserved
@@ -66,26 +71,36 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free")
# Profile batch sizes
- batch_sizes = [1, 2, 4, 8, 16]
+ batch_sizes = [1, 2, 4, 8, 16] if t < 16 else [1, 2, 4, 8, 16, 32, 64]
try:
img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
- results = profile(img, model, n=3, device=device)
+ results = profile(img, model, n=1, device=device, max_num_obj=max_num_obj)
# Fit a solution
- y = [x[2] for x in results if x] # memory [2]
- p = np.polyfit(batch_sizes[: len(y)], y, deg=1) # first degree polynomial fit
- b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
+ xy = [
+ [x, y[2]]
+ for i, (x, y) in enumerate(zip(batch_sizes, results))
+ if y # valid result
+ and isinstance(y[2], (int, float)) # is numeric
+ and 0 < y[2] < t # between 0 and GPU limit
+ and (i == 0 or not results[i - 1] or y[2] > results[i - 1][2]) # first item or increasing memory
+ ]
+ fit_x, fit_y = zip(*xy) if xy else ([], [])
+ p = np.polyfit(np.log(fit_x), np.log(fit_y), deg=1) # first-degree polynomial fit in log space
+ b = int(round(np.exp((np.log(f * fraction) - p[1]) / p[0]))) # y intercept (optimal batch size)
if None in results: # some sizes failed
i = results.index(None) # first fail index
if b >= batch_sizes[i]: # y intercept above failure point
b = batch_sizes[max(i - 1, 0)] # select prior safe point
if b < 1 or b > 1024: # b outside of safe range
+ LOGGER.info(f"{prefix}WARNING โ ๏ธ batch={b} outside safe range, using default batch-size {batch_size}.")
b = batch_size
- LOGGER.info(f"{prefix}WARNING โ ๏ธ CUDA anomaly detected, using default batch-size {batch_size}.")
- fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
+ fraction = (np.exp(np.polyval(p, np.log(b))) + r + a) / t # predicted fraction
LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) โ ")
return b
except Exception as e:
LOGGER.warning(f"{prefix}WARNING โ ๏ธ error detected: {e}, using default batch-size {batch_size}.")
return batch_size
+ finally:
+ torch.cuda.empty_cache()
diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py
index d5850a181a1..280a31d54f5 100644
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@@ -1,27 +1,29 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
Benchmark a YOLO model formats for speed and accuracy.
Usage:
from ultralytics.utils.benchmarks import ProfileModels, benchmark
- ProfileModels(['yolov8n.yaml', 'yolov8s.yaml']).profile()
- benchmark(model='yolov8n.pt', imgsz=160)
+ ProfileModels(['yolo11n.yaml', 'yolov8s.yaml']).profile()
+ benchmark(model='yolo11n.pt', imgsz=160)
Format | `format=argument` | Model
--- | --- | ---
-PyTorch | - | yolov8n.pt
-TorchScript | `torchscript` | yolov8n.torchscript
-ONNX | `onnx` | yolov8n.onnx
-OpenVINO | `openvino` | yolov8n_openvino_model/
-TensorRT | `engine` | yolov8n.engine
-CoreML | `coreml` | yolov8n.mlpackage
-TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/
-TensorFlow GraphDef | `pb` | yolov8n.pb
-TensorFlow Lite | `tflite` | yolov8n.tflite
-TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite
-TensorFlow.js | `tfjs` | yolov8n_web_model/
-PaddlePaddle | `paddle` | yolov8n_paddle_model/
-NCNN | `ncnn` | yolov8n_ncnn_model/
+PyTorch | - | yolo11n.pt
+TorchScript | `torchscript` | yolo11n.torchscript
+ONNX | `onnx` | yolo11n.onnx
+OpenVINO | `openvino` | yolo11n_openvino_model/
+TensorRT | `engine` | yolo11n.engine
+CoreML | `coreml` | yolo11n.mlpackage
+TensorFlow SavedModel | `saved_model` | yolo11n_saved_model/
+TensorFlow GraphDef | `pb` | yolo11n.pb
+TensorFlow Lite | `tflite` | yolo11n.tflite
+TensorFlow Edge TPU | `edgetpu` | yolo11n_edgetpu.tflite
+TensorFlow.js | `tfjs` | yolo11n_web_model/
+PaddlePaddle | `paddle` | yolo11n_paddle_model/
+MNN | `mnn` | yolo11n.mnn
+NCNN | `ncnn` | yolo11n_ncnn_model/
+RKNN | `rknn` | yolo11n_rknn_model/
"""
import glob
@@ -39,15 +41,15 @@
from ultralytics import YOLO, YOLOWorld
from ultralytics.cfg import TASK2DATA, TASK2METRIC
from ultralytics.engine.exporter import export_formats
-from ultralytics.utils import ARM64, ASSETS, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR
-from ultralytics.utils.checks import IS_PYTHON_3_12, check_requirements, check_yolo
+from ultralytics.utils import ARM64, ASSETS, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR
+from ultralytics.utils.checks import IS_PYTHON_3_12, check_imgsz, check_requirements, check_yolo, is_rockchip
from ultralytics.utils.downloads import safe_download
from ultralytics.utils.files import file_size
from ultralytics.utils.torch_utils import get_cpu_info, select_device
def benchmark(
- model=WEIGHTS_DIR / "yolov8n.pt",
+ model=WEIGHTS_DIR / "yolo11n.pt",
data=None,
imgsz=160,
half=False,
@@ -57,6 +59,7 @@ def benchmark(
separate_outputs=False,
export_hw_optimized=False,
eps=1e-3,
+ format="",
):
"""
Benchmark a YOLO model across different formats for speed and accuracy.
@@ -70,6 +73,7 @@ def benchmark(
device (str): Device to run the benchmark on, either 'cpu' or 'cuda'.
verbose (bool | float): If True or a float, assert benchmarks pass with given metric.
eps (float): Epsilon value for divide by zero prevention.
+ format (str): Export format for benchmarking. If not supplied all formats are benchmarked.
Returns:
(pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size, metric,
@@ -78,8 +82,11 @@ def benchmark(
Examples:
Benchmark a YOLO model with default settings:
>>> from ultralytics.utils.benchmarks import benchmark
- >>> benchmark(model="yolov8n.pt", imgsz=640)
+ >>> benchmark(model="yolo11n.pt", imgsz=640)
"""
+ imgsz = check_imgsz(imgsz)
+ assert imgsz[0] == imgsz[1] if isinstance(imgsz, list) else True, "benchmark() only supports square imgsz."
+
import pandas as pd # scope for faster 'import ultralytics'
pd.options.display.max_columns = 10
@@ -91,30 +98,50 @@ def benchmark(
y = []
t0 = time.time()
- for i, (name, format, suffix, cpu, gpu) in enumerate(zip(*export_formats().values())):
+
+ format_arg = format.lower()
+ if format_arg:
+ formats = frozenset(export_formats()["Argument"])
+ assert format in formats, f"Expected format to be one of {formats}, but got '{format_arg}'."
+ for i, (name, format, suffix, cpu, gpu, _) in enumerate(zip(*export_formats().values())):
emoji, filename = "โ", None # export defaults
try:
+ if format_arg and format_arg != format:
+ continue
+
# Checks
if i == 7: # TF GraphDef
assert model.task != "obb", "TensorFlow GraphDef not supported for OBB task"
elif i == 9: # Edge TPU
assert LINUX and not ARM64, "Edge TPU export only supported on non-aarch64 Linux"
elif i in {5, 10}: # CoreML and TF.js
- assert MACOS or LINUX, "CoreML and TF.js export only supported on macOS and Linux"
- assert not IS_RASPBERRYPI, "CoreML and TF.js export not supported on Raspberry Pi"
- assert not IS_JETSON, "CoreML and TF.js export not supported on NVIDIA Jetson"
+ assert MACOS or (LINUX and not ARM64), (
+ "CoreML and TF.js export only supported on macOS and non-aarch64 Linux"
+ )
if i in {5}: # CoreML
assert not IS_PYTHON_3_12, "CoreML not supported on Python 3.12"
if i in {6, 7, 8}: # TF SavedModel, TF GraphDef, and TFLite
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
if i in {9, 10}: # TF EdgeTPU and TF.js
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
- if i in {11}: # Paddle
+ if i == 11: # Paddle
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
assert LINUX or MACOS, "Windows Paddle exports not supported yet"
- if i in {12}: # NCNN
+ if i == 12: # MNN
+ assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
+ if i == 13: # NCNN
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
+ if i == 14: # IMX
+ assert not is_end2end
+ assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
+ assert model.task == "detect", "IMX only supported for detection task"
+ assert "C2f" in model.__str__(), "IMX only supported for YOLOv8"
+ if i == 15: # RKNN
+ assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
+ assert not is_end2end, "End-to-end models not supported by RKNN yet"
+ assert LINUX, "RKNN only supported on Linux"
+ assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
if "cpu" in device.type:
assert cpu, "inference not supported on CPU"
if "cuda" in device.type:
@@ -126,7 +153,7 @@ def benchmark(
# Export
if format == "-":
- filename = model.ckpt_path or model.cfg
+ filename = model.pt_path or model.ckpt_path or model.model_name
exported_model = model # PyTorch format
else:
filename = model.export(imgsz=imgsz, format=format, half=half, int8=int8, device=device, verbose=False, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized)
@@ -138,16 +165,16 @@ def benchmark(
assert model.task != "pose" or i != 7, "GraphDef Pose inference is not supported"
assert i not in {9, 10}, "inference not supported" # Edge TPU and TF.js are unsupported
assert i != 5 or platform.system() == "Darwin", "inference only supported on macOS>=10.13" # CoreML
- if i in {12}:
+ if i in {13}:
assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
- exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized)
+ exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, verbose=False, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized)
# Validate
data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect
- key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
results = exported_model.val(
data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, int8=int8, verbose=False, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized
)
+ key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
metric, speed = results.results_dict[key], results.speed["inference"]
fps = round(1000 / (speed + eps), 2) # frames per second
y.append([name, "โ ", round(file_size(filename), 1), round(metric, 4), round(speed, 2), fps])
@@ -161,8 +188,10 @@ def benchmark(
check_yolo(device=device) # print system info
df = pd.DataFrame(y, columns=["Format", "Statusโ", "Size (MB)", key, "Inference time (ms/im)", "FPS"])
- name = Path(model.ckpt_path).name
- s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz}, separate_outputs={separate_outputs}, export_hw_optimized={export_hw_optimized} ({time.time() - t0:.2f}s)\n{df}\n"
+ name = model.model_name
+ dt = time.time() - t0
+ legend = "Benchmarks legend: - โ Success - โ Export passed but validation failed - โ๏ธ Export failed"
+ s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz}, separate_outputs={separate_outputs}, export_hw_optimized={export_hw_optimized} ({dt:.2f}s)\n{legend}\n{df.fillna('-')}\n"
LOGGER.info(s)
with open("benchmarks.log", "a", errors="ignore", encoding="utf-8") as f:
f.write(s)
@@ -332,7 +361,7 @@ class ProfileModels:
Examples:
Profile models and print results
>>> from ultralytics.utils.benchmarks import ProfileModels
- >>> profiler = ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640)
+ >>> profiler = ProfileModels(["yolo11n.yaml", "yolov8s.yaml"], imgsz=640)
>>> profiler.profile()
"""
@@ -366,7 +395,7 @@ def __init__(
Examples:
Initialize and profile models
>>> from ultralytics.utils.benchmarks import ProfileModels
- >>> profiler = ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640)
+ >>> profiler = ProfileModels(["yolo11n.yaml", "yolov8s.yaml"], imgsz=640)
>>> profiler.profile()
"""
self.paths = paths
@@ -438,7 +467,8 @@ def get_files(self):
print(f"Profiling: {sorted(files)}")
return [Path(file) for file in sorted(files)]
- def get_onnx_model_info(self, onnx_file: str):
+ @staticmethod
+ def get_onnx_model_info(onnx_file: str):
"""Extracts metadata from an ONNX model file including parameters, GFLOPs, and input shape."""
return 0.0, 0.0, 0.0, 0.0 # return (num_layers, num_params, num_gradients, num_flops)
@@ -461,7 +491,7 @@ def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3):
# Model and input
model = YOLO(engine_file)
- input_data = np.random.rand(self.imgsz, self.imgsz, 3).astype(np.float32) # must be FP32
+ input_data = np.zeros((self.imgsz, self.imgsz, 3), dtype=np.uint8) # use uint8 for Classify
# Warmup runs
elapsed = 0.0
@@ -542,8 +572,8 @@ def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
"""Generates a table row string with model performance metrics including inference times and model details."""
layers, params, gradients, flops = model_info
return (
- f"| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ยฑ {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ยฑ "
- f"{t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |"
+ f"| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.1f}ยฑ{t_onnx[1]:.1f} ms | {t_engine[0]:.1f}ยฑ"
+ f"{t_engine[1]:.1f} ms | {params / 1e6:.1f} | {flops:.1f} |"
)
@staticmethod
diff --git a/ultralytics/utils/callbacks/__init__.py b/ultralytics/utils/callbacks/__init__.py
index 116babe9b7f..920cc4fad9d 100644
--- a/ultralytics/utils/callbacks/__init__.py
+++ b/ultralytics/utils/callbacks/__init__.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from .base import add_integration_callbacks, default_callbacks, get_default_callbacks
diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py
index 98b20256e52..11e0a8979e0 100644
--- a/ultralytics/utils/callbacks/base.py
+++ b/ultralytics/utils/callbacks/base.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Base callbacks."""
from collections import defaultdict
diff --git a/ultralytics/utils/callbacks/clearml.py b/ultralytics/utils/callbacks/clearml.py
index e076e55fa74..5afc7a3659f 100644
--- a/ultralytics/utils/callbacks/clearml.py
+++ b/ultralytics/utils/callbacks/clearml.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
@@ -68,9 +68,9 @@ def on_pretrain_routine_start(trainer):
PatchedMatplotlib.update_current_task(None)
else:
task = Task.init(
- project_name=trainer.args.project or "YOLOv8",
+ project_name=trainer.args.project or "Ultralytics",
task_name=trainer.args.name,
- tags=["YOLOv8"],
+ tags=["Ultralytics"],
output_uri=True,
reuse_last_task_id=False,
auto_connect_frameworks={"pytorch": False, "matplotlib": False},
diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py
index 7e90a538638..910e3c424d2 100644
--- a/ultralytics/utils/callbacks/comet.py
+++ b/ultralytics/utils/callbacks/comet.py
@@ -1,6 +1,7 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import LOGGER, RANK, SETTINGS, TESTS_RUNNING, ops
+from ultralytics.utils.metrics import ClassifyMetrics, DetMetrics, OBBMetrics, PoseMetrics, SegmentMetrics
try:
assert not TESTS_RUNNING # do not log pytest
@@ -15,9 +16,12 @@
# Ensures certain logging functions only run for supported tasks
COMET_SUPPORTED_TASKS = ["detect"]
- # Names of plots created by YOLOv8 that are logged to Comet
- EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve", "confusion_matrix"
+ # Names of plots created by Ultralytics that are logged to Comet
+ CONFUSION_MATRIX_PLOT_NAMES = "confusion_matrix", "confusion_matrix_normalized"
+ EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve"
LABEL_PLOT_NAMES = "labels", "labels_correlogram"
+ SEGMENT_METRICS_PLOT_PREFIX = "Box", "Mask"
+ POSE_METRICS_PLOT_PREFIX = "Box", "Pose"
_comet_image_prediction_count = 0
@@ -31,8 +35,8 @@ def _get_comet_mode():
def _get_comet_model_name():
- """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'."""
- return os.getenv("COMET_MODEL_NAME", "YOLOv8")
+ """Returns the model name for Comet from the environment variable COMET_MODEL_NAME or defaults to 'Ultralytics'."""
+ return os.getenv("COMET_MODEL_NAME", "Ultralytics")
def _get_eval_batch_logging_interval():
@@ -86,7 +90,7 @@ def _create_experiment(args):
"max_image_predictions": _get_max_image_predictions_to_log(),
}
)
- experiment.log_other("Created from", "yolov8")
+ experiment.log_other("Created from", "ultralytics")
except Exception as e:
LOGGER.warning(f"WARNING โ ๏ธ Comet installed but not initialized correctly, not logging this run. {e}")
@@ -110,7 +114,7 @@ def _fetch_trainer_metadata(trainer):
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
"""
- YOLOv8 resizes images during training and the label values are normalized based on this resized shape.
+ YOLO resizes images during training and the label values are normalized based on this resized shape.
This function rescales the bounding box labels to the original image shape.
"""
@@ -274,11 +278,31 @@ def _log_image_predictions(experiment, validator, curr_step):
def _log_plots(experiment, trainer):
"""Logs evaluation plots and label plots for the experiment."""
- plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES]
- _log_images(experiment, plot_filenames, None)
-
- label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES]
- _log_images(experiment, label_plot_filenames, None)
+ plot_filenames = None
+ if isinstance(trainer.validator.metrics, SegmentMetrics) and trainer.validator.metrics.task == "segment":
+ plot_filenames = [
+ trainer.save_dir / f"{prefix}{plots}.png"
+ for plots in EVALUATION_PLOT_NAMES
+ for prefix in SEGMENT_METRICS_PLOT_PREFIX
+ ]
+ elif isinstance(trainer.validator.metrics, PoseMetrics):
+ plot_filenames = [
+ trainer.save_dir / f"{prefix}{plots}.png"
+ for plots in EVALUATION_PLOT_NAMES
+ for prefix in POSE_METRICS_PLOT_PREFIX
+ ]
+ elif isinstance(trainer.validator.metrics, (DetMetrics, OBBMetrics)):
+ plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES]
+
+ if plot_filenames is not None:
+ _log_images(experiment, plot_filenames, None)
+
+ confusion_matrix_filenames = [trainer.save_dir / f"{plots}.png" for plots in CONFUSION_MATRIX_PLOT_NAMES]
+ _log_images(experiment, confusion_matrix_filenames, None)
+
+ if not isinstance(trainer.validator.metrics, ClassifyMetrics):
+ label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES]
+ _log_images(experiment, label_plot_filenames, None)
def _log_model(experiment, trainer):
@@ -307,9 +331,6 @@ def on_train_epoch_end(trainer):
experiment.log_metrics(trainer.label_loss_items(trainer.tloss, prefix="train"), step=curr_step, epoch=curr_epoch)
- if curr_epoch == 1:
- _log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step)
-
def on_fit_epoch_end(trainer):
"""Logs model assets at the end of each epoch."""
@@ -356,6 +377,8 @@ def on_train_end(trainer):
_log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
_log_image_predictions(experiment, trainer.validator, curr_step)
+ _log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step)
+ _log_images(experiment, trainer.save_dir.glob("val_batch*.jpg"), curr_step)
experiment.end()
global _comet_image_prediction_count
diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py
index ab51dc52946..1cc0c632ecb 100644
--- a/ultralytics/utils/callbacks/dvc.py
+++ b/ultralytics/utils/callbacks/dvc.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, checks
diff --git a/ultralytics/utils/callbacks/hub.py b/ultralytics/utils/callbacks/hub.py
index fbcd1667efd..4709fbea8ba 100644
--- a/ultralytics/utils/callbacks/hub.py
+++ b/ultralytics/utils/callbacks/hub.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import json
from time import time
@@ -15,16 +15,14 @@ def on_pretrain_routine_start(trainer):
def on_pretrain_routine_end(trainer):
"""Logs info before starting timer for upload rate limit."""
- session = getattr(trainer, "hub_session", None)
- if session:
+ if session := getattr(trainer, "hub_session", None):
# Start timer for upload rate limit
session.timers = {"metrics": time(), "ckpt": time()} # start timer on session.rate_limit
def on_fit_epoch_end(trainer):
"""Uploads training progress metrics at the end of each epoch."""
- session = getattr(trainer, "hub_session", None)
- if session:
+ if session := getattr(trainer, "hub_session", None):
# Upload metrics after val end
all_plots = {
**trainer.label_loss_items(trainer.tloss, prefix="train"),
@@ -49,8 +47,7 @@ def on_fit_epoch_end(trainer):
def on_model_save(trainer):
"""Saves checkpoints to Ultralytics HUB with rate limiting."""
- session = getattr(trainer, "hub_session", None)
- if session:
+ if session := getattr(trainer, "hub_session", None):
# Upload checkpoints with rate limiting
is_best = trainer.best_fitness == trainer.fitness
if time() - session.timers["ckpt"] > session.rate_limits["ckpt"]:
@@ -61,8 +58,7 @@ def on_model_save(trainer):
def on_train_end(trainer):
"""Upload final model and metrics to Ultralytics HUB at the end of training."""
- session = getattr(trainer, "hub_session", None)
- if session:
+ if session := getattr(trainer, "hub_session", None):
# Upload final model and metrics with exponential standoff
LOGGER.info(f"{PREFIX}Syncing final model...")
session.upload_model(
@@ -72,7 +68,7 @@ def on_train_end(trainer):
final=True,
)
session.alive = False # stop heartbeats
- LOGGER.info(f"{PREFIX}Done โ \n" f"{PREFIX}View model at {session.model_url} ๐")
+ LOGGER.info(f"{PREFIX}Done โ \n{PREFIX}View model at {session.model_url} ๐")
def on_train_start(trainer):
diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py
index bbae4cc0cca..9d5dc2f16f6 100644
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""
MLflow Logging for Ultralytics YOLO.
@@ -69,7 +69,7 @@ def on_pretrain_routine_end(trainer):
mlflow.set_tracking_uri(uri)
# Set experiment and run names
- experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/YOLOv8"
+ experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/Ultralytics"
run_name = os.environ.get("MLFLOW_RUN") or trainer.args.name
mlflow.set_experiment(experiment_name)
@@ -82,7 +82,7 @@ def on_pretrain_routine_end(trainer):
LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
mlflow.log_params(dict(trainer.args))
except Exception as e:
- LOGGER.warning(f"{PREFIX}WARNING โ ๏ธ Failed to initialize: {e}\n" f"{PREFIX}WARNING โ ๏ธ Not tracking this run")
+ LOGGER.warning(f"{PREFIX}WARNING โ ๏ธ Failed to initialize: {e}\n{PREFIX}WARNING โ ๏ธ Not tracking this run")
def on_train_epoch_end(trainer):
diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py
index 6be8a821f5d..7adfdad1fdb 100644
--- a/ultralytics/utils/callbacks/neptune.py
+++ b/ultralytics/utils/callbacks/neptune.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
@@ -52,7 +52,11 @@ def on_pretrain_routine_start(trainer):
"""Callback function called before the training routine starts."""
try:
global run
- run = neptune.init_run(project=trainer.args.project or "YOLOv8", name=trainer.args.name, tags=["YOLOv8"])
+ run = neptune.init_run(
+ project=trainer.args.project or "Ultralytics",
+ name=trainer.args.name,
+ tags=["Ultralytics"],
+ )
run["Configuration/Hyperparameters"] = {k: "" if v is None else v for k, v in vars(trainer.args).items()}
except Exception as e:
LOGGER.warning(f"WARNING โ ๏ธ NeptuneAI installed but not initialized correctly, not logging this run. {e}")
diff --git a/ultralytics/utils/callbacks/raytune.py b/ultralytics/utils/callbacks/raytune.py
index 1a368db6637..e7e01d0985f 100644
--- a/ultralytics/utils/callbacks/raytune.py
+++ b/ultralytics/utils/callbacks/raytune.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import SETTINGS
@@ -16,8 +16,7 @@ def on_fit_epoch_end(trainer):
"""Sends training metrics to Ray Tune at end of each epoch."""
if ray.train._internal.session._get_session(): # replacement for deprecated ray.tune.is_session_enabled()
metrics = trainer.metrics
- metrics["epoch"] = trainer.epoch
- session.report(metrics)
+ session.report({**metrics, **{"epoch": trainer.epoch + 1}})
callbacks = (
diff --git a/ultralytics/utils/callbacks/tensorboard.py b/ultralytics/utils/callbacks/tensorboard.py
index 2aa114b53b7..2920fa23bf1 100644
--- a/ultralytics/utils/callbacks/tensorboard.py
+++ b/ultralytics/utils/callbacks/tensorboard.py
@@ -1,6 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
-
-import contextlib
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
@@ -45,26 +43,27 @@ def _log_tensorboard_graph(trainer):
warnings.simplefilter("ignore", category=torch.jit.TracerWarning) # suppress jit trace warning
# Try simple method first (YOLO)
- with contextlib.suppress(Exception):
+ try:
trainer.model.eval() # place in .eval() mode to avoid BatchNorm statistics changes
WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), [])
LOGGER.info(f"{PREFIX}model graph visualization added โ ")
return
- # Fallback to TorchScript export steps (RTDETR)
- try:
- model = deepcopy(de_parallel(trainer.model))
- model.eval()
- model = model.fuse(verbose=False)
- for m in model.modules():
- if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class)
- m.export = True
- m.format = "torchscript"
- model(im) # dry run
- WRITER.add_graph(torch.jit.trace(model, im, strict=False), [])
- LOGGER.info(f"{PREFIX}model graph visualization added โ ")
- except Exception as e:
- LOGGER.warning(f"{PREFIX}WARNING โ ๏ธ TensorBoard graph visualization failure {e}")
+ except Exception:
+ # Fallback to TorchScript export steps (RTDETR)
+ try:
+ model = deepcopy(de_parallel(trainer.model))
+ model.eval()
+ model = model.fuse(verbose=False)
+ for m in model.modules():
+ if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class)
+ m.export = True
+ m.format = "torchscript"
+ model(im) # dry run
+ WRITER.add_graph(torch.jit.trace(model, im, strict=False), [])
+ LOGGER.info(f"{PREFIX}model graph visualization added โ ")
+ except Exception as e:
+ LOGGER.warning(f"{PREFIX}WARNING โ ๏ธ TensorBoard graph visualization failure {e}")
def on_pretrain_routine_start(trainer):
diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py
index 7b69b7a45a8..7242d51e3d8 100644
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import SETTINGS, TESTS_RUNNING
from ultralytics.utils.torch_utils import model_info_for_loggers
@@ -109,7 +109,12 @@ def _log_plots(plots, step):
def on_pretrain_routine_start(trainer):
"""Initiate and start project if module is present."""
- wb.run or wb.init(project=trainer.args.project or "YOLOv8", name=trainer.args.name, config=vars(trainer.args))
+ if not wb.run:
+ wb.init(
+ project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
+ name=str(trainer.args.name).replace("/", "-"),
+ config=vars(trainer.args),
+ )
def on_fit_epoch_end(trainer):
@@ -137,17 +142,19 @@ def on_train_end(trainer):
if trainer.best.exists():
art.add_file(trainer.best)
wb.run.log_artifact(art, aliases=["best"])
- for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
- x, y, x_title, y_title = curve_values
- _plot_curve(
- x,
- y,
- names=list(trainer.validator.metrics.names.values()),
- id=f"curves/{curve_name}",
- title=curve_name,
- x_title=x_title,
- y_title=y_title,
- )
+ # Check if we actually have plots to save
+ if trainer.args.plots and hasattr(trainer.validator.metrics, "curves_results"):
+ for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
+ x, y, x_title, y_title = curve_values
+ _plot_curve(
+ x,
+ y,
+ names=list(trainer.validator.metrics.names.values()),
+ id=f"curves/{curve_name}",
+ title=curve_name,
+ x_title=x_title,
+ y_title=y_title,
+ )
wb.run.finish() # required or run continues on dashboard
diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py
index 383c8562538..52f60f89c07 100644
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import contextlib
import glob
import inspect
import math
@@ -20,11 +19,11 @@
import torch
from ultralytics.utils import (
+ ARM64,
ASSETS,
AUTOINSTALL,
IS_COLAB,
IS_GIT_DIR,
- IS_JUPYTER,
IS_KAGGLE,
IS_PIP_PACKAGE,
LINUX,
@@ -32,6 +31,7 @@
MACOS,
ONLINE,
PYTHON_VERSION,
+ RKNN_CHIPS,
ROOT,
TORCHVISION_VERSION,
USER_CONFIG_DIR,
@@ -77,8 +77,7 @@ def parse_requirements(file_path=ROOT.parent / "requirements.txt", package=""):
line = line.strip()
if line and not line.startswith("#"):
line = line.split("#")[0].strip() # ignore inline comments
- match = re.match(r"([a-zA-Z0-9-_]+)\s*([<>!=~]+.*)?", line)
- if match:
+ if match := re.match(r"([a-zA-Z0-9-_]+)\s*([<>!=~]+.*)?", line):
requirements.append(SimpleNamespace(name=match[1], specifier=match[2].strip() if match[2] else ""))
return requirements
@@ -239,12 +238,14 @@ def check_version(
c = parse_version(current) # '1.2.3' -> (1, 2, 3)
for r in required.strip(",").split(","):
op, version = re.match(r"([^0-9]*)([\d.]+)", r).groups() # split '>=22.04' -> ('>=', '22.04')
+ if not op:
+ op = ">=" # assume >= if no op passed
v = parse_version(version) # '1.2.3' -> (1, 2, 3)
if op == "==" and c != v:
result = False
elif op == "!=" and c == v:
result = False
- elif op in {">=", ""} and not (c >= v): # if no constraint passed assume '>=required'
+ elif op == ">=" and not (c >= v):
result = False
elif op == "<=" and not (c <= v):
result = False
@@ -271,11 +272,13 @@ def check_latest_pypi_version(package_name="ultralytics"):
Returns:
(str): The latest version of the package.
"""
- with contextlib.suppress(Exception):
+ try:
requests.packages.urllib3.disable_warnings() # Disable the InsecureRequestWarning
response = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=3)
if response.status_code == 200:
return response.json()["info"]["version"]
+ except Exception:
+ return None
def check_pip_update_available():
@@ -286,7 +289,7 @@ def check_pip_update_available():
(bool): True if an update is available, False otherwise.
"""
if ONLINE and IS_PIP_PACKAGE:
- with contextlib.suppress(Exception):
+ try:
from ultralytics import __version__
latest = check_latest_pypi_version()
@@ -296,6 +299,8 @@ def check_pip_update_available():
f"Update with 'pip install -U ultralytics'"
)
return True
+ except Exception:
+ pass
return False
@@ -330,18 +335,19 @@ def check_font(font="Arial.ttf"):
return file
-def check_python(minimum: str = "3.8.0", hard: bool = True) -> bool:
+def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = False) -> bool:
"""
Check current python version against the required minimum version.
Args:
minimum (str): Required minimum version of python.
hard (bool, optional): If True, raise an AssertionError if the requirement is not met.
+ verbose (bool, optional): If True, print warning message if requirement is not met.
Returns:
(bool): Whether the installed Python version meets the minimum constraints.
"""
- return check_version(PYTHON_VERSION, minimum, name="Python", hard=hard)
+ return check_version(PYTHON_VERSION, minimum, name="Python", hard=hard, verbose=verbose)
@TryExcept()
@@ -371,8 +377,6 @@ def check_requirements(requirements=ROOT.parent / "requirements.txt", exclude=()
```
"""
prefix = colorstr("red", "bold", "requirements:")
- check_python() # check python version
- check_torchvision() # check torch-torchvision compatibility
if isinstance(requirements, Path): # requirements.txt file
file = requirements.resolve()
assert file.exists(), f"{prefix} {file} not found, check failed."
@@ -429,8 +433,9 @@ def check_torchvision():
The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
Torchvision versions.
"""
- # Compatibility table
compatibility_table = {
+ "2.6": ["0.21"],
+ "2.5": ["0.20"],
"2.4": ["0.19"],
"2.3": ["0.18"],
"2.2": ["0.17"],
@@ -440,7 +445,7 @@ def check_torchvision():
"1.12": ["0.13"],
}
- # Extract only the major and minor versions
+ # Check major and minor versions
v_torch = ".".join(torch.__version__.split("+")[0].split(".")[:2])
if v_torch in compatibility_table:
compatible_versions = compatibility_table[v_torch]
@@ -454,7 +459,7 @@ def check_torchvision():
)
-def check_suffix(file="yolov8n.pt", suffix=".pt", msg=""):
+def check_suffix(file="yolo11n.pt", suffix=".pt", msg=""):
"""Check file(s) for acceptable suffix."""
if file and suffix:
if isinstance(suffix, str):
@@ -484,10 +489,10 @@ def check_yolov5u_filename(file: str, verbose: bool = True):
return file
-def check_model_file_from_stem(model="yolov8n"):
+def check_model_file_from_stem(model="yolo11n"):
"""Return a model filename from a valid model stem."""
if model and not Path(model).suffix and Path(model).stem in downloads.GITHUB_ASSETS_STEMS:
- return Path(model).with_suffix(".pt") # add suffix, i.e. yolov8n -> yolov8n.pt
+ return Path(model).with_suffix(".pt") # add suffix, i.e. yolo11n -> yolo11n.pt
else:
return model
@@ -565,11 +570,8 @@ def check_yolo(verbose=True, device=""):
from ultralytics.utils.torch_utils import select_device
- if IS_JUPYTER:
- if check_requirements("wandb", install=False):
- os.system("pip uninstall -y wandb") # uninstall wandb: unwanted account creation prompt with infinite hang
- if IS_COLAB:
- shutil.rmtree("sample_data", ignore_errors=True) # remove colab /sample_data directory
+ if IS_COLAB:
+ shutil.rmtree("sample_data", ignore_errors=True) # remove colab /sample_data directory
if verbose:
# System info
@@ -577,10 +579,12 @@ def check_yolo(verbose=True, device=""):
ram = psutil.virtual_memory().total
total, used, free = shutil.disk_usage("/")
s = f"({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)"
- with contextlib.suppress(Exception): # clear display if ipython is installed
+ try:
from IPython import display
- display.clear_output()
+ display.clear_output() # clear display if notebook
+ except ImportError:
+ pass
else:
s = ""
@@ -593,38 +597,54 @@ def collect_system_info():
import psutil
from ultralytics.utils import ENVIRONMENT # scope to avoid circular import
- from ultralytics.utils.torch_utils import get_cpu_info
+ from ultralytics.utils.torch_utils import get_cpu_info, get_gpu_info
- ram_info = psutil.virtual_memory().total / (1024**3) # Convert bytes to GB
+ gib = 1 << 30 # bytes per GiB
+ cuda = torch and torch.cuda.is_available()
check_yolo()
- LOGGER.info(
- f"\n{'OS':<20}{platform.platform()}\n"
- f"{'Environment':<20}{ENVIRONMENT}\n"
- f"{'Python':<20}{PYTHON_VERSION}\n"
- f"{'Install':<20}{'git' if IS_GIT_DIR else 'pip' if IS_PIP_PACKAGE else 'other'}\n"
- f"{'RAM':<20}{ram_info:.2f} GB\n"
- f"{'CPU':<20}{get_cpu_info()}\n"
- f"{'CUDA':<20}{torch.version.cuda if torch and torch.cuda.is_available() else None}\n"
- )
+ total, used, free = shutil.disk_usage("/")
+
+ info_dict = {
+ "OS": platform.platform(),
+ "Environment": ENVIRONMENT,
+ "Python": PYTHON_VERSION,
+ "Install": "git" if IS_GIT_DIR else "pip" if IS_PIP_PACKAGE else "other",
+ "RAM": f"{psutil.virtual_memory().total / gib:.2f} GB",
+ "Disk": f"{(total - free) / gib:.1f}/{total / gib:.1f} GB",
+ "CPU": get_cpu_info(),
+ "CPU count": os.cpu_count(),
+ "GPU": get_gpu_info(index=0) if cuda else None,
+ "GPU count": torch.cuda.device_count() if cuda else None,
+ "CUDA": torch.version.cuda if cuda else None,
+ }
+ LOGGER.info("\n" + "\n".join(f"{k:<20}{v}" for k, v in info_dict.items()) + "\n")
+ package_info = {}
for r in parse_requirements(package="ultralytics"):
try:
current = metadata.version(r.name)
- is_met = "โ " if check_version(current, str(r.specifier), hard=True) else "โ "
+ is_met = "โ " if check_version(current, str(r.specifier), name=r.name, hard=True) else "โ "
except metadata.PackageNotFoundError:
current = "(not installed)"
is_met = "โ "
- LOGGER.info(f"{r.name:<20}{is_met}{current}{r.specifier}")
+ package_info[r.name] = f"{is_met}{current}{r.specifier}"
+ LOGGER.info(f"{r.name:<20}{package_info[r.name]}")
+
+ info_dict["Package Info"] = package_info
if is_github_action_running():
- LOGGER.info(
- f"\nRUNNER_OS: {os.getenv('RUNNER_OS')}\n"
- f"GITHUB_EVENT_NAME: {os.getenv('GITHUB_EVENT_NAME')}\n"
- f"GITHUB_WORKFLOW: {os.getenv('GITHUB_WORKFLOW')}\n"
- f"GITHUB_ACTOR: {os.getenv('GITHUB_ACTOR')}\n"
- f"GITHUB_REPOSITORY: {os.getenv('GITHUB_REPOSITORY')}\n"
- f"GITHUB_REPOSITORY_OWNER: {os.getenv('GITHUB_REPOSITORY_OWNER')}\n"
- )
+ github_info = {
+ "RUNNER_OS": os.getenv("RUNNER_OS"),
+ "GITHUB_EVENT_NAME": os.getenv("GITHUB_EVENT_NAME"),
+ "GITHUB_WORKFLOW": os.getenv("GITHUB_WORKFLOW"),
+ "GITHUB_ACTOR": os.getenv("GITHUB_ACTOR"),
+ "GITHUB_REPOSITORY": os.getenv("GITHUB_REPOSITORY"),
+ "GITHUB_REPOSITORY_OWNER": os.getenv("GITHUB_REPOSITORY_OWNER"),
+ }
+ LOGGER.info("\n" + "\n".join(f"{k}: {v}" for k, v in github_info.items()))
+ info_dict["GitHub Info"] = github_info
+
+ return info_dict
def check_amp(model):
@@ -651,21 +671,35 @@ def check_amp(model):
from ultralytics.utils.torch_utils import autocast
device = next(model.parameters()).device # get model device
+ prefix = colorstr("AMP: ")
if device.type in {"cpu", "mps"}:
return False # AMP only used on CUDA devices
+ else:
+ # GPUs that have issues with AMP
+ pattern = re.compile(
+ r"(nvidia|geforce|quadro|tesla).*?(1660|1650|1630|t400|t550|t600|t1000|t1200|t2000|k40m)", re.IGNORECASE
+ )
+
+ gpu = torch.cuda.get_device_name(device)
+ if bool(pattern.search(gpu)):
+ LOGGER.warning(
+ f"{prefix}checks failed โ. AMP training on {gpu} GPU may cause "
+ f"NaN losses or zero-mAP results, so AMP will be disabled during training."
+ )
+ return False
def amp_allclose(m, im):
"""All close FP32 vs AMP results."""
batch = [im] * 8
- a = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # FP32 inference
+ imgsz = max(256, int(model.stride.max() * 4)) # max stride P5-32 and P6-64
+ a = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # FP32 inference
with autocast(enabled=True):
- b = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # AMP inference
+ b = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # AMP inference
del m
return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance
im = ASSETS / "bus.jpg" # image to check
- prefix = colorstr("AMP: ")
- LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks with YOLO11n...")
+ LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks...")
warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False."
try:
from ultralytics import YOLO
@@ -673,11 +707,13 @@ def amp_allclose(m, im):
assert amp_allclose(YOLO("yolo11n.pt"), im)
LOGGER.info(f"{prefix}checks passed โ ")
except ConnectionError:
- LOGGER.warning(f"{prefix}checks skipped โ ๏ธ, offline and unable to download YOLO11n. {warning_msg}")
+ LOGGER.warning(
+ f"{prefix}checks skipped โ ๏ธ. Offline and unable to download YOLO11n for AMP checks. {warning_msg}"
+ )
except (AttributeError, ModuleNotFoundError):
LOGGER.warning(
f"{prefix}checks skipped โ ๏ธ. "
- f"Unable to load YOLO11n due to possible Ultralytics package modifications. {warning_msg}"
+ f"Unable to load YOLO11n for AMP checks due to possible Ultralytics package modifications. {warning_msg}"
)
except AssertionError:
LOGGER.warning(
@@ -690,9 +726,10 @@ def amp_allclose(m, im):
def git_describe(path=ROOT): # path must be a directory
"""Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe."""
- with contextlib.suppress(Exception):
+ try:
return subprocess.check_output(f"git -C {path} describe --tags --long --always", shell=True).decode()[:-1]
- return ""
+ except Exception:
+ return ""
def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
@@ -747,6 +784,38 @@ def cuda_is_available() -> bool:
return cuda_device_count() > 0
+def is_rockchip():
+ """Check if the current environment is running on a Rockchip SoC."""
+ if LINUX and ARM64:
+ try:
+ with open("/proc/device-tree/compatible") as f:
+ dev_str = f.read()
+ *_, soc = dev_str.split(",")
+ if soc.replace("\x00", "") in RKNN_CHIPS:
+ return True
+ except OSError:
+ return False
+ else:
+ return False
+
+
+def is_sudo_available() -> bool:
+ """
+ Check if the sudo command is available in the environment.
+
+ Returns:
+ (bool): True if the sudo command is available, False otherwise.
+ """
+ if WINDOWS:
+ return False
+ cmd = "sudo --version"
+ return subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode == 0
+
+
+# Run checks and define constants
+check_python("3.8", hard=False, verbose=True) # check python version
+check_torchvision() # check torch-torchvision compatibility
+
# Define constants
IS_PYTHON_MINIMUM_3_10 = check_python("3.10", hard=False)
IS_PYTHON_3_12 = PYTHON_VERSION.startswith("3.12")
diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py
index ff980967fb9..8b7e5bbe4ce 100644
--- a/ultralytics/utils/dist.py
+++ b/ultralytics/utils/dist.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import os
import shutil
@@ -37,7 +37,7 @@ def generate_ddp_file(trainer):
cfg = DEFAULT_CFG_DICT.copy()
cfg.update(save_dir='') # handle the extra key 'save_dir'
trainer = {name}(cfg=cfg, overrides=overrides)
- trainer.args.model = "{getattr(trainer.hub_session, 'model_url', trainer.args.model)}"
+ trainer.args.model = "{getattr(trainer.hub_session, "model_url", trainer.args.model)}"
results = trainer.train()
"""
(USER_CONFIG_DIR / "DDP").mkdir(exist_ok=True)
diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py
index 5cbc868ab69..be33ae8a114 100644
--- a/ultralytics/utils/downloads.py
+++ b/ultralytics/utils/downloads.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import contextlib
import re
import shutil
import subprocess
@@ -53,7 +52,7 @@ def is_url(url, check=False):
valid = is_url("https://www.example.com")
```
"""
- with contextlib.suppress(Exception):
+ try:
url = str(url)
result = parse.urlparse(url)
assert all([result.scheme, result.netloc]) # check if is url
@@ -61,7 +60,8 @@ def is_url(url, check=False):
with request.urlopen(url) as response:
return response.getcode() == 200 # check if exists online
return True
- return False
+ except Exception:
+ return False
def delete_dsstore(path, files_to_delete=(".DS_Store", "__MACOSX")):
@@ -138,7 +138,7 @@ def unzip_file(file, path=None, exclude=(".DS_Store", "__MACOSX"), exist_ok=Fals
If a path is not provided, the function will use the parent directory of the zipfile as the default path.
Args:
- file (str): The path to the zipfile to be extracted.
+ file (str | Path): The path to the zipfile to be extracted.
path (str, optional): The path to extract the zipfile to. Defaults to None.
exclude (tuple, optional): A tuple of filename strings to be excluded. Defaults to ('.DS_Store', '__MACOSX').
exist_ok (bool, optional): Whether to overwrite existing contents if they exist. Defaults to False.
@@ -269,8 +269,7 @@ def get_google_drive_file_info(link):
for k, v in response.cookies.items():
if k.startswith("download_warning"):
drive_url += f"&confirm={v}" # v is token
- cd = response.headers.get("content-disposition")
- if cd:
+ if cd := response.headers.get("content-disposition"):
filename = re.findall('filename="(.+)"', cd)[0]
return drive_url, filename
@@ -406,7 +405,7 @@ def get_github_assets(repo="ultralytics/assets", version="latest", retry=False):
LOGGER.warning(f"โ ๏ธ GitHub assets check failure for {url}: {r.status_code} {r.reason}")
return "", []
data = r.json()
- return data["tag_name"], [x["name"] for x in data["assets"]] # tag, assets i.e. ['yolov8n.pt', 'yolov8s.pt', ...]
+ return data["tag_name"], [x["name"] for x in data["assets"]] # tag, assets i.e. ['yolo11n.pt', 'yolov8s.pt', ...]
def attempt_download_asset(file, repo="ultralytics/assets", release="v8.3.0", **kwargs):
@@ -425,7 +424,7 @@ def attempt_download_asset(file, repo="ultralytics/assets", release="v8.3.0", **
Example:
```python
- file_path = attempt_download_asset("yolov8n.pt", repo="ultralytics/assets", release="latest")
+ file_path = attempt_download_asset("yolo11n.pt", repo="ultralytics/assets", release="latest")
```
"""
from ultralytics.utils import SETTINGS # scoped for circular import
diff --git a/ultralytics/utils/errors.py b/ultralytics/utils/errors.py
index 86aee1d90aa..8cb7aae13f1 100644
--- a/ultralytics/utils/errors.py
+++ b/ultralytics/utils/errors.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from ultralytics.utils import emojis
diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py
index d0953c748e2..0af6b0c2332 100644
--- a/ultralytics/utils/files.py
+++ b/ultralytics/utils/files.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import contextlib
import glob
@@ -183,7 +183,7 @@ def get_latest_run(search_dir="."):
return max(last_list, key=os.path.getctime) if last_list else ""
-def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_names=False):
+def update_models(model_names=("yolo11n.pt",), source_dir=Path("."), update_names=False):
"""
Updates and re-saves specified YOLO models in an 'updated_models' subdirectory.
@@ -195,7 +195,7 @@ def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_name
Examples:
Update specified YOLO models and save them in 'updated_models' subdirectory:
>>> from ultralytics.utils.files import update_models
- >>> model_names = ("yolov8n.pt", "yolov8s.pt")
+ >>> model_names = ("yolo11n.pt", "yolov8s.pt")
>>> update_models(model_names, source_dir=Path("/models"), update_names=True)
"""
from ultralytics import YOLO
@@ -219,4 +219,4 @@ def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_name
# Save model using model.save()
print(f"Re-saving {model_name} model to {save_path}")
- model.save(save_path, use_dill=False)
+ model.save(save_path)
diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py
index f8838957198..71ce36269f8 100644
--- a/ultralytics/utils/instance.py
+++ b/ultralytics/utils/instance.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from collections import abc
from itertools import repeat
@@ -7,7 +7,7 @@
import numpy as np
-from .ops import ltwh2xywh, ltwh2xyxy, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh
+from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh
def _ntuple(n):
@@ -28,7 +28,7 @@ def parse(x):
# `ltwh` means left top and width, height(COCO format)
_formats = ["xyxy", "xywh", "ltwh"]
-__all__ = ("Bboxes",) # tuple or list
+__all__ = ("Bboxes", "Instances") # tuple or list
class Bboxes:
@@ -176,7 +176,7 @@ def __getitem__(self, index) -> "Bboxes":
length as the number of bounding boxes.
"""
if isinstance(index, int):
- return Bboxes(self.bboxes[index].view(1, -1))
+ return Bboxes(self.bboxes[index].reshape(1, -1))
b = self.bboxes[index]
assert b.ndim == 2, f"Indexing on Bboxes with {index} failed to return a matrix!"
return Bboxes(b)
@@ -406,7 +406,20 @@ def concatenate(cls, instances_list: List["Instances"], axis=0) -> "Instances":
normalized = instances_list[0].normalized
cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis)
- cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis)
+ seg_len = [b.segments.shape[1] for b in instances_list]
+ if len(frozenset(seg_len)) > 1: # resample segments if there's different length
+ max_len = max(seg_len)
+ cat_segments = np.concatenate(
+ [
+ resample_segments(list(b.segments), max_len)
+ if len(b.segments)
+ else np.zeros((0, max_len, 2), dtype=np.float32) # re-generating empty segments
+ for b in instances_list
+ ],
+ axis=axis,
+ )
+ else:
+ cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis)
cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None
return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized)
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index aa1c02d6188..311b3071e81 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
import torch.nn as nn
@@ -189,8 +189,7 @@ def preprocess(self, targets, batch_size, scale_tensor):
out = torch.zeros(batch_size, counts.max(), ne - 1, device=self.device)
for j in range(batch_size):
matches = i == j
- n = matches.sum()
- if n:
+ if n := matches.sum():
out[j, :n] = targets[matches, 1:]
out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
return out
@@ -298,7 +297,7 @@ def __call__(self, preds, batch):
raise TypeError(
"ERROR โ segment dataset incorrectly formatted or not a segment dataset.\n"
"This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, "
- "i.e. 'yolo train model=yolov8n-seg.pt data=coco8.yaml'.\nVerify your dataset is a "
+ "i.e. 'yolo train model=yolo11n-seg.pt data=coco8.yaml'.\nVerify your dataset is a "
"correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' "
"as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help."
) from e
@@ -552,9 +551,8 @@ def calculate_keypoints_loss(
pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).
Returns:
- (tuple): Returns a tuple containing:
- - kpts_loss (torch.Tensor): The keypoints loss.
- - kpts_obj_loss (torch.Tensor): The keypoints object loss.
+ kpts_loss (torch.Tensor): The keypoints loss.
+ kpts_obj_loss (torch.Tensor): The keypoints object loss.
"""
batch_idx = batch_idx.flatten()
batch_size = len(masks)
@@ -605,6 +603,7 @@ class v8ClassificationLoss:
def __call__(self, preds, batch):
"""Compute the classification loss between predictions and true labels."""
+ preds = preds[1] if isinstance(preds, (list, tuple)) else preds
loss = F.cross_entropy(preds, batch["cls"], reduction="mean")
loss_items = loss.detach()
return loss, loss_items
@@ -630,8 +629,7 @@ def preprocess(self, targets, batch_size, scale_tensor):
out = torch.zeros(batch_size, counts.max(), 6, device=self.device)
for j in range(batch_size):
matches = i == j
- n = matches.sum()
- if n:
+ if n := matches.sum():
bboxes = targets[matches, 2:]
bboxes[..., :4].mul_(scale_tensor)
out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1)
@@ -668,7 +666,7 @@ def __call__(self, preds, batch):
raise TypeError(
"ERROR โ OBB dataset incorrectly formatted or not a OBB dataset.\n"
"This error can occur when incorrectly training a 'OBB' model on a 'detect' dataset, "
- "i.e. 'yolo train model=yolov8n-obb.pt data=dota8.yaml'.\nVerify your dataset is a "
+ "i.e. 'yolo train model=yolo11n-obb.pt data=dota8.yaml'.\nVerify your dataset is a "
"correctly formatted 'OBB' dataset using 'data=dota8.yaml' "
"as an example.\nSee https://docs.ultralytics.com/datasets/obb/ for help."
) from e
diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py
index 37a06b43a12..4c9755a4af0 100644
--- a/ultralytics/utils/metrics.py
+++ b/ultralytics/utils/metrics.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Model validation metrics."""
import math
@@ -74,11 +74,16 @@ def box_iou(box1, box2, eps=1e-7):
def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
"""
- Calculate Intersection over Union (IoU) of box1(1, 4) to box2(n, 4).
+ Calculates the Intersection over Union (IoU) between bounding boxes.
+
+ This function supports various shapes for `box1` and `box2` as long as the last dimension is 4.
+ For instance, you may pass tensors shaped like (4,), (N, 4), (B, N, 4), or (B, N, 1, 4).
+ Internally, the code will split the last dimension into (x, y, w, h) if `xywh=True`,
+ or (x1, y1, x2, y2) if `xywh=False`.
Args:
- box1 (torch.Tensor): A tensor representing a single bounding box with shape (1, 4).
- box2 (torch.Tensor): A tensor representing n bounding boxes with shape (n, 4).
+ box1 (torch.Tensor): A tensor representing one or more bounding boxes, with the last dimension being 4.
+ box2 (torch.Tensor): A tensor representing one or more bounding boxes, with the last dimension being 4.
xywh (bool, optional): If True, input boxes are in (x, y, w, h) format. If False, input boxes are in
(x1, y1, x2, y2) format. Defaults to True.
GIoU (bool, optional): If True, calculate Generalized IoU. Defaults to False.
@@ -271,7 +276,7 @@ def batch_probiou(obb1, obb2, eps=1e-7):
return 1 - hd
-def smooth_BCE(eps=0.1):
+def smooth_bce(eps=0.1):
"""
Computes smoothed positive and negative Binary Cross-Entropy targets.
@@ -373,10 +378,9 @@ def process_batch(self, detections, gt_bboxes, gt_cls):
else:
self.matrix[self.nc, gc] += 1 # true background
- if n:
- for i, dc in enumerate(detection_classes):
- if not any(m1 == i):
- self.matrix[dc, self.nc] += 1 # predicted background
+ for i, dc in enumerate(detection_classes):
+ if not any(m1 == i):
+ self.matrix[dc, self.nc] += 1 # predicted background
def matrix(self):
"""Returns the confusion matrix."""
@@ -429,7 +433,7 @@ def plot(self, normalize=True, save_dir="", names=(), on_plot=None):
ax.set_xlabel("True")
ax.set_ylabel("Predicted")
ax.set_title(title)
- plot_fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png'
+ plot_fname = Path(save_dir) / f"{title.lower().replace(' ', '_')}.png"
fig.savefig(plot_fname, dpi=250)
plt.close(fig)
if on_plot:
@@ -550,19 +554,18 @@ def ap_per_class(
prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
Returns:
- (tuple): A tuple of six arrays and one array of unique classes, where:
- tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
- fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
- p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
- r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
- f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
- ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
- unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
- p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
- r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
- f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
- x (np.ndarray): X-axis values for the curves. Shape: (1000,).
- prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
+ tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
+ fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+ p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
+ r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
+ f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
+ ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
+ unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
+ p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
+ r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
+ f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
+ x (np.ndarray): X-axis values for the curves. Shape: (1000,).
+ prec_values (np.ndarray): Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
"""
# Sort by objectness
i = np.argsort(-conf)
@@ -599,7 +602,7 @@ def ap_per_class(
# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
- if plot and j == 0:
+ if j == 0:
prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5
prec_values = np.array(prec_values) # (nc, 1000)
diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py
index b76168f95e5..af41ffee3d0 100644
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import contextlib
import math
@@ -75,6 +75,10 @@ def segment2box(segment, width=640, height=640):
(np.ndarray): the minimum and maximum x and y values of the segment.
"""
x, y = segment.T # segment xy
+ # any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294
+ if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
+ x = x.clip(0, width)
+ y = y.clip(0, height)
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
x = x[inside]
y = y[inside]
@@ -139,7 +143,7 @@ def make_divisible(x, divisor):
return math.ceil(x / divisor) * divisor
-def nms_rotated(boxes, scores, threshold=0.45):
+def nms_rotated(boxes, scores, threshold=0.45, use_triu=True):
"""
NMS for oriented bounding boxes using probiou and fast-nms.
@@ -147,16 +151,30 @@ def nms_rotated(boxes, scores, threshold=0.45):
boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr.
scores (torch.Tensor): Confidence scores, shape (N,).
threshold (float, optional): IoU threshold. Defaults to 0.45.
+ use_triu (bool, optional): Whether to use `torch.triu` operator. It'd be useful for disable it
+ when exporting obb models to some formats that do not support `torch.triu`.
Returns:
(torch.Tensor): Indices of boxes to keep after NMS.
"""
- if len(boxes) == 0:
- return np.empty((0,), dtype=np.int8)
sorted_idx = torch.argsort(scores, descending=True)
boxes = boxes[sorted_idx]
- ious = batch_probiou(boxes, boxes).triu_(diagonal=1)
- pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
+ ious = batch_probiou(boxes, boxes)
+ if use_triu:
+ ious = ious.triu_(diagonal=1)
+ # pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1)
+ # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
+ pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
+ else:
+ n = boxes.shape[0]
+ row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
+ col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
+ upper_mask = row_idx < col_idx
+ ious = ious * upper_mask
+ # Zeroing these scores ensures the additional indices would not affect the final results
+ scores[~((ious >= threshold).sum(0) <= 0)] = 0
+ # NOTE: return indices with fixed length to avoid TFLite reshape error
+ pick = torch.topk(scores, scores.shape[0]).indices
return sorted_idx[pick]
@@ -175,6 +193,7 @@ def non_max_suppression(
max_wh=7680,
in_place=True,
rotated=False,
+ end2end=False,
):
"""
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
@@ -201,6 +220,7 @@ def non_max_suppression(
max_wh (int): The maximum box width and height in pixels.
in_place (bool): If True, the input prediction tensor will be modified in place.
rotated (bool): If Oriented Bounding Boxes (OBB) are being passed for NMS.
+ end2end (bool): If the model doesn't require NMS.
Returns:
(List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
@@ -217,7 +237,7 @@ def non_max_suppression(
if classes is not None:
classes = torch.tensor(classes, device=prediction.device)
- if prediction.shape[-1] == 6: # end-to-end model (BNC, i.e. 1,300,6)
+ if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6)
output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
if classes is not None:
output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
@@ -317,11 +337,11 @@ def clip_boxes(boxes, shape):
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
Args:
- boxes (torch.Tensor): the bounding boxes to clip
- shape (tuple): the shape of the image
+ boxes (torch.Tensor): The bounding boxes to clip.
+ shape (tuple): The shape of the image.
Returns:
- (torch.Tensor | numpy.ndarray): Clipped boxes
+ (torch.Tensor | numpy.ndarray): The clipped boxes.
"""
if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1
@@ -359,9 +379,9 @@ def scale_image(masks, im0_shape, ratio_pad=None):
Takes a mask, and resizes it to the original image size.
Args:
- masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
- im0_shape (tuple): the original image shape
- ratio_pad (tuple): the ratio of the padding to the original image.
+ masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3].
+ im0_shape (tuple): The original image shape.
+ ratio_pad (tuple): The ratio of the padding to the original image.
Returns:
masks (np.ndarray): The masks that are being returned with shape [h, w, num].
@@ -401,7 +421,7 @@ def xyxy2xywh(x):
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
"""
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
- y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
+ y = empty_like(x) # faster than clone/copy
y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
y[..., 2] = x[..., 2] - x[..., 0] # width
@@ -421,7 +441,7 @@ def xywh2xyxy(x):
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
"""
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
- y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
+ y = empty_like(x) # faster than clone/copy
xy = x[..., :2] # centers
wh = x[..., 2:] / 2 # half width-height
y[..., :2] = xy - wh # top left xy
@@ -444,7 +464,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
"""
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
- y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
+ y = empty_like(x) # faster than clone/copy
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
@@ -470,7 +490,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
if clip:
x = clip_boxes(x, (h - eps, w - eps))
assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
- y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
+ y = empty_like(x) # faster than clone/copy
y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
@@ -625,9 +645,12 @@ def resample_segments(segments, n=1000):
segments (list): the resampled segments.
"""
for i, s in enumerate(segments):
+ if len(s) == n:
+ continue
s = np.concatenate((s, s[0:1, :]), axis=0)
- x = np.linspace(0, len(s) - 1, n)
+ x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
xp = np.arange(len(s))
+ x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
segments[i] = (
np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
) # segment xy
@@ -692,12 +715,12 @@ def process_mask_native(protos, masks_in, bboxes, shape):
Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
- masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
- bboxes (torch.Tensor): [n, 4], n is number of masks after nms
- shape (tuple): the size of the input image (h,w)
+ masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
+ bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
+ shape (tuple): The size of the input image (h,w).
Returns:
- masks (torch.Tensor): The returned masks with dimensions [h, w, n]
+ masks (torch.Tensor): The returned masks with dimensions [h, w, n].
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
@@ -783,23 +806,29 @@ def regularize_rboxes(rboxes):
return torch.stack([x, y, w_, h_, t], dim=-1) # regularized boxes
-def masks2segments(masks, strategy="largest"):
+def masks2segments(masks, strategy="all"):
"""
It takes a list of masks(n,h,w) and returns a list of segments(n,xy).
Args:
masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
- strategy (str): 'concat' or 'largest'. Defaults to largest
+ strategy (str): 'all' or 'largest'. Defaults to all
Returns:
segments (List): list of segment masks
"""
+ from ultralytics.data.converter import merge_multi_segment
+
segments = []
for x in masks.int().cpu().numpy().astype("uint8"):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
if c:
- if strategy == "concat": # concatenate all segments
- c = np.concatenate([x.reshape(-1, 2) for x in c])
+ if strategy == "all": # merge and concatenate all segments
+ c = (
+ np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
+ if len(c) > 1
+ else c[0].reshape(-1, 2)
+ )
elif strategy == "largest": # select largest segment
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
@@ -832,3 +861,10 @@ def clean_str(s):
(str): a string with special characters replaced by an underscore _
"""
return re.sub(pattern="[|@#!ยกยท$โฌ%&()=?ยฟ^*;:,ยจยด><+]", repl="_", string=s)
+
+
+def empty_like(x):
+ """Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
+ return (
+ torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32)
+ )
diff --git a/ultralytics/utils/patches.py b/ultralytics/utils/patches.py
index d918e0efeac..1531cd7f8f6 100644
--- a/ultralytics/utils/patches.py
+++ b/ultralytics/utils/patches.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
"""Monkey patches to update/extend functionality of existing functions."""
import time
@@ -86,25 +86,15 @@ def torch_load(*args, **kwargs):
return _torch_load(*args, **kwargs)
-def torch_save(*args, use_dill=True, **kwargs):
+def torch_save(*args, **kwargs):
"""
Optionally use dill to serialize lambda functions where pickle does not, adding robustness with 3 retries and
exponential standoff in case of save failure.
Args:
*args (tuple): Positional arguments to pass to torch.save.
- use_dill (bool): Whether to try using dill for serialization if available. Defaults to True.
**kwargs (Any): Keyword arguments to pass to torch.save.
"""
- try:
- assert use_dill
- import dill as pickle
- except (AssertionError, ImportError):
- import pickle
-
- if "pickle_module" not in kwargs:
- kwargs["pickle_module"] = pickle
-
for i in range(4): # 3 retries
try:
return _torch_save(*args, **kwargs)
diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py
index a70d8c259c6..2c211df4880 100644
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import contextlib
import math
import warnings
from pathlib import Path
@@ -13,7 +12,7 @@
from PIL import Image, ImageDraw, ImageFont
from PIL import __version__ as pil_version
-from ultralytics.utils import LOGGER, TryExcept, ops, plt_settings, threaded
+from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, TryExcept, ops, plt_settings, threaded
from ultralytics.utils.checks import check_font, check_version, is_ascii
from ultralytics.utils.files import increment_path
@@ -215,7 +214,16 @@ def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=Fa
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
def get_txt_color(self, color=(128, 128, 128), txt_color=(255, 255, 255)):
- """Assign text color based on background color."""
+ """
+ Assign text color based on background color.
+
+ Args:
+ color (tuple, optional): The background color of the rectangle for text (B, G, R).
+ txt_color (tuple, optional): The color of the text (R, G, B).
+
+ Returns:
+ txt_color (tuple): Text color for label
+ """
if color in self.dark_colors:
return 104, 31, 17
elif color in self.light_colors:
@@ -500,13 +508,21 @@ def result(self):
def show(self, title=None):
"""Show the annotated image."""
- Image.fromarray(np.asarray(self.im)[..., ::-1]).show(title)
+ im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert numpy array to PIL Image with RGB to BGR
+ if IS_COLAB or IS_KAGGLE: # can not use IS_JUPYTER as will run for all ipython environments
+ try:
+ display(im) # noqa - display() function only available in ipython environments
+ except ImportError as e:
+ LOGGER.warning(f"Unable to display image in Jupyter notebooks: {e}")
+ else:
+ im.show(title=title)
def save(self, filename="image.jpg"):
"""Save the annotated image to 'filename'."""
cv2.imwrite(filename, np.asarray(self.im))
- def get_bbox_dimension(self, bbox=None):
+ @staticmethod
+ def get_bbox_dimension(bbox=None):
"""
Calculate the area of a bounding box.
@@ -514,7 +530,9 @@ def get_bbox_dimension(self, bbox=None):
bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
Returns:
- angle (degree): Degree value of angle between three points
+ width (float): Width of the bounding box.
+ height (float): Height of the bounding box.
+ area (float): Area enclosed by the bounding box.
"""
x_min, y_min, x_max, y_max = bbox
width = x_max - x_min
@@ -554,10 +572,10 @@ def queue_counts_display(self, label, points=None, region_color=(255, 255, 255),
Displays queue counts on an image centered at the points with customizable font size and colors.
Args:
- label (str): queue counts label
- points (tuple): region points for center point calculation to display text
- region_color (RGB): queue region color
- txt_color (RGB): text display color
+ label (str): Queue counts label.
+ points (tuple): Region points for center point calculation to display text.
+ region_color (tuple): RGB queue region color.
+ txt_color (tuple): RGB text display color.
"""
x_values = [point[0] for point in points]
y_values = [point[1] for point in points]
@@ -594,13 +612,13 @@ def display_objects_labels(self, im0, text, txt_color, bg_color, x_center, y_cen
Display the bounding boxes labels in parking management app.
Args:
- im0 (ndarray): inference image
- text (str): object/class name
- txt_color (bgr color): display color for text foreground
- bg_color (bgr color): display color for text background
- x_center (float): x position center point for bounding box
- y_center (float): y position center point for bounding box
- margin (int): gap between text and rectangle for better display
+ im0 (ndarray): Inference image.
+ text (str): Object/class name.
+ txt_color (tuple): Display color for text foreground.
+ bg_color (tuple): Display color for text background.
+ x_center (float): The x position center point for bounding box.
+ y_center (float): The y position center point for bounding box.
+ margin (int): The gap between text and rectangle for better display.
"""
text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0]
text_x = x_center - text_size[0] // 2
@@ -618,11 +636,11 @@ def display_analytics(self, im0, text, txt_color, bg_color, margin):
Display the overall statistics for parking lots.
Args:
- im0 (ndarray): inference image
- text (dict): labels dictionary
- txt_color (bgr color): display color for text foreground
- bg_color (bgr color): display color for text background
- margin (int): gap between text and rectangle for better display
+ im0 (ndarray): Inference image.
+ text (dict): Labels dictionary.
+ txt_color (tuple): Display color for text foreground.
+ bg_color (tuple): Display color for text background.
+ margin (int): Gap between text and rectangle for better display.
"""
horizontal_gap = int(im0.shape[1] * 0.02)
vertical_gap = int(im0.shape[0] * 0.01)
@@ -662,14 +680,13 @@ def estimate_pose_angle(a, b, c):
angle = 360 - angle
return angle
- def draw_specific_points(self, keypoints, indices=None, shape=(640, 640), radius=2, conf_thres=0.25):
+ def draw_specific_points(self, keypoints, indices=None, radius=2, conf_thres=0.25):
"""
Draw specific keypoints for gym steps counting.
Args:
keypoints (list): Keypoints data to be plotted.
indices (list, optional): Keypoint indices to be plotted. Defaults to [2, 5, 7].
- shape (tuple, optional): Image size for model inference. Defaults to (640, 640).
radius (int, optional): Keypoint radius. Defaults to 2.
conf_thres (float, optional): Confidence threshold for keypoints. Defaults to 0.25.
@@ -680,142 +697,157 @@ def draw_specific_points(self, keypoints, indices=None, shape=(640, 640), radius
Keypoint format: [x, y] or [x, y, confidence].
Modifies self.im in-place.
"""
- if indices is None:
- indices = [2, 5, 7]
- for i, k in enumerate(keypoints):
- if i in indices:
- x_coord, y_coord = k[0], k[1]
- if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
- if len(k) == 3:
- conf = k[2]
- if conf < conf_thres:
- continue
- cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, (0, 255, 0), -1, lineType=cv2.LINE_AA)
+ indices = indices or [2, 5, 7]
+ points = [(int(k[0]), int(k[1])) for i, k in enumerate(keypoints) if i in indices and k[2] >= conf_thres]
+
+ # Draw lines between consecutive points
+ for start, end in zip(points[:-1], points[1:]):
+ cv2.line(self.im, start, end, (0, 255, 0), 2, lineType=cv2.LINE_AA)
+
+ # Draw circles for keypoints
+ for pt in points:
+ cv2.circle(self.im, pt, radius, (0, 0, 255), -1, lineType=cv2.LINE_AA)
+
return self.im
- def plot_angle_and_count_and_stage(
- self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255)
- ):
+ def plot_workout_information(self, display_text, position, color=(104, 31, 17), txt_color=(255, 255, 255)):
"""
- Plot the pose angle, count value and step stage.
+ Draw text with a background on the image.
Args:
- angle_text (str): angle value for workout monitoring
- count_text (str): counts value for workout monitoring
- stage_text (str): stage decision for workout monitoring
- center_kpt (list): centroid pose index for workout monitoring
- color (tuple): text background color for workout monitoring
- txt_color (tuple): text foreground color for workout monitoring
+ display_text (str): The text to be displayed.
+ position (tuple): Coordinates (x, y) on the image where the text will be placed.
+ color (tuple, optional): Text background color
+ txt_color (tuple, optional): Text foreground color
"""
- angle_text, count_text, stage_text = (f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}")
+ (text_width, text_height), _ = cv2.getTextSize(display_text, 0, self.sf, self.tf)
- # Draw angle
- (angle_text_width, angle_text_height), _ = cv2.getTextSize(angle_text, 0, self.sf, self.tf)
- angle_text_position = (int(center_kpt[0]), int(center_kpt[1]))
- angle_background_position = (angle_text_position[0], angle_text_position[1] - angle_text_height - 5)
- angle_background_size = (angle_text_width + 2 * 5, angle_text_height + 2 * 5 + (self.tf * 2))
+ # Draw background rectangle
cv2.rectangle(
self.im,
- angle_background_position,
- (
- angle_background_position[0] + angle_background_size[0],
- angle_background_position[1] + angle_background_size[1],
- ),
+ (position[0], position[1] - text_height - 5),
+ (position[0] + text_width + 10, position[1] - text_height - 5 + text_height + 10 + self.tf),
color,
-1,
)
- cv2.putText(self.im, angle_text, angle_text_position, 0, self.sf, txt_color, self.tf)
-
- # Draw Counts
- (count_text_width, count_text_height), _ = cv2.getTextSize(count_text, 0, self.sf, self.tf)
- count_text_position = (angle_text_position[0], angle_text_position[1] + angle_text_height + 20)
- count_background_position = (
- angle_background_position[0],
- angle_background_position[1] + angle_background_size[1] + 5,
- )
- count_background_size = (count_text_width + 10, count_text_height + 10 + self.tf)
+ # Draw text
+ cv2.putText(self.im, display_text, position, 0, self.sf, txt_color, self.tf)
- cv2.rectangle(
- self.im,
- count_background_position,
- (
- count_background_position[0] + count_background_size[0],
- count_background_position[1] + count_background_size[1],
- ),
- color,
- -1,
- )
- cv2.putText(self.im, count_text, count_text_position, 0, self.sf, txt_color, self.tf)
+ return text_height
- # Draw Stage
- (stage_text_width, stage_text_height), _ = cv2.getTextSize(stage_text, 0, self.sf, self.tf)
- stage_text_position = (int(center_kpt[0]), int(center_kpt[1]) + angle_text_height + count_text_height + 40)
- stage_background_position = (stage_text_position[0], stage_text_position[1] - stage_text_height - 5)
- stage_background_size = (stage_text_width + 10, stage_text_height + 10)
+ def plot_angle_and_count_and_stage(
+ self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255)
+ ):
+ """
+ Plot the pose angle, count value, and step stage.
- cv2.rectangle(
- self.im,
- stage_background_position,
- (
- stage_background_position[0] + stage_background_size[0],
- stage_background_position[1] + stage_background_size[1],
- ),
- color,
- -1,
+ Args:
+ angle_text (str): Angle value for workout monitoring
+ count_text (str): Counts value for workout monitoring
+ stage_text (str): Stage decision for workout monitoring
+ center_kpt (list): Centroid pose index for workout monitoring
+ color (tuple, optional): Text background color
+ txt_color (tuple, optional): Text foreground color
+ """
+ # Format text
+ angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}"
+
+ # Draw angle, count and stage text
+ angle_height = self.plot_workout_information(
+ angle_text, (int(center_kpt[0]), int(center_kpt[1])), color, txt_color
+ )
+ count_height = self.plot_workout_information(
+ count_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + 20), color, txt_color
+ )
+ self.plot_workout_information(
+ stage_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + count_height + 40), color, txt_color
)
- cv2.putText(self.im, stage_text, stage_text_position, 0, self.sf, txt_color, self.tf)
def seg_bbox(self, mask, mask_color=(255, 0, 255), label=None, txt_color=(255, 255, 255)):
"""
Function for drawing segmented object in bounding box shape.
Args:
- mask (list): masks data list for instance segmentation area plotting
- mask_color (RGB): mask foreground color
- label (str): Detection label text
- txt_color (RGB): text color
+ mask (np.ndarray): A 2D array of shape (N, 2) containing the contour points of the segmented object.
+ mask_color (tuple): RGB color for the contour and label background.
+ label (str, optional): Text label for the object. If None, no label is drawn.
+ txt_color (tuple): RGB color for the label text.
"""
+ if mask.size == 0: # no masks to plot
+ return
+
cv2.polylines(self.im, [np.int32([mask])], isClosed=True, color=mask_color, thickness=2)
- text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf)
+ if label:
+ text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf)
+ cv2.rectangle(
+ self.im,
+ (int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10),
+ (int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)),
+ mask_color,
+ -1,
+ )
+ cv2.putText(
+ self.im, label, (int(mask[0][0]) - text_size[0] // 2, int(mask[0][1])), 0, self.sf, txt_color, self.tf
+ )
- cv2.rectangle(
- self.im,
- (int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10),
- (int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)),
- mask_color,
- -1,
- )
+ def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
+ """
+ Function for drawing a sweep annotation line and an optional label.
+
+ Args:
+ line_x (int): The x-coordinate of the sweep line.
+ line_y (int): The y-coordinate limit of the sweep line.
+ label (str, optional): Text label to be drawn in center of sweep line. If None, no label is drawn.
+ color (tuple): RGB color for the line and label background.
+ txt_color (tuple): RGB color for the label text.
+ """
+ # Draw the sweep line
+ cv2.line(self.im, (line_x, 0), (line_x, line_y), color, self.tf * 2)
+ # Draw label, if provided
if label:
+ (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf)
+ cv2.rectangle(
+ self.im,
+ (line_x - text_width // 2 - 10, line_y // 2 - text_height // 2 - 10),
+ (line_x + text_width // 2 + 10, line_y // 2 + text_height // 2 + 10),
+ color,
+ -1,
+ )
cv2.putText(
- self.im, label, (int(mask[0][0]) - text_size[0] // 2, int(mask[0][1])), 0, self.sf, txt_color, self.tf
+ self.im,
+ label,
+ (line_x - text_width // 2, line_y // 2 + text_height // 2),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ self.sf,
+ txt_color,
+ self.tf,
)
- def plot_distance_and_line(self, pixels_distance, centroids, line_color, centroid_color):
+ def plot_distance_and_line(
+ self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255)
+ ):
"""
Plot the distance and line on frame.
Args:
pixels_distance (float): Pixels distance between two bbox centroids.
centroids (list): Bounding box centroids data.
- line_color (RGB): Distance line color.
- centroid_color (RGB): Bounding box centroid color.
+ line_color (tuple, optional): Distance line color.
+ centroid_color (tuple, optional): Bounding box centroid color.
"""
# Get the text size
- (text_width_m, text_height_m), _ = cv2.getTextSize(
- f"Pixels Distance: {pixels_distance:.2f}", 0, self.sf, self.tf
- )
+ text = f"Pixels Distance: {pixels_distance:.2f}"
+ (text_width_m, text_height_m), _ = cv2.getTextSize(text, 0, self.sf, self.tf)
# Define corners with 10-pixel margin and draw rectangle
- top_left = (15, 25)
- bottom_right = (15 + text_width_m + 20, 25 + text_height_m + 20)
- cv2.rectangle(self.im, top_left, bottom_right, centroid_color, -1)
+ cv2.rectangle(self.im, (15, 25), (15 + text_width_m + 20, 25 + text_height_m + 20), line_color, -1)
# Calculate the position for the text with a 10-pixel margin and draw text
- text_position = (top_left[0] + 10, top_left[1] + text_height_m + 10)
+ text_position = (25, 25 + text_height_m + 10)
cv2.putText(
self.im,
- f"Pixels Distance: {pixels_distance:.2f}",
+ text,
text_position,
0,
self.sf,
@@ -1101,10 +1133,12 @@ def plot_images(
mask = mask.astype(bool)
else:
mask = image_masks[j].astype(bool)
- with contextlib.suppress(Exception):
+ try:
im[y : y + h, x : x + w, :][mask] = (
im[y : y + h, x : x + w, :][mask] * 0.4 + np.array(color) * 0.6
)
+ except Exception:
+ pass
annotator.fromarray(im)
if not save:
return np.asarray(annotator.im)
@@ -1141,19 +1175,19 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False,
save_dir = Path(file).parent if file else Path(dir)
if classify:
fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True)
- index = [1, 4, 2, 3]
+ index = [2, 5, 3, 4]
elif segment:
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
- index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
+ index = [2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 8, 9, 12, 13]
elif pose:
fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True)
- index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13]
+ index = [2, 3, 4, 5, 6, 7, 8, 11, 12, 15, 16, 17, 18, 19, 9, 10, 13, 14]
elif regress:
fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True)
index = [1, 4, 2, 3]
else:
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
- index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]
+ index = [2, 3, 4, 5, 6, 9, 10, 11, 7, 8]
ax = ax.ravel()
files = list(save_dir.glob("results*.csv"))
assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot."
@@ -1213,7 +1247,7 @@ def plt_color_scatter(v, f, bins=20, cmap="viridis", alpha=0.8, edgecolors="none
def plot_tune_results(csv_file="tune_results.csv"):
"""
- Plot the evolution results stored in an 'tune_results.csv' file. The function generates a scatter plot for each key
+ Plot the evolution results stored in a 'tune_results.csv' file. The function generates a scatter plot for each key
in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on the plots.
Args:
diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py
index 74604eda23c..e4a40f5e241 100644
--- a/ultralytics/utils/tal.py
+++ b/ultralytics/utils/tal.py
@@ -1,8 +1,9 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
import torch
import torch.nn as nn
+from . import LOGGER
from .checks import check_version
from .metrics import bbox_iou, probiou
from .ops import xywhr2xyxyxyxy
@@ -58,17 +59,46 @@ def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_g
"""
self.bs = pd_scores.shape[0]
self.n_max_boxes = gt_bboxes.shape[1]
+ device = gt_bboxes.device
if self.n_max_boxes == 0:
- device = gt_bboxes.device
return (
- torch.full_like(pd_scores[..., 0], self.bg_idx).to(device),
- torch.zeros_like(pd_bboxes).to(device),
- torch.zeros_like(pd_scores).to(device),
- torch.zeros_like(pd_scores[..., 0]).to(device),
- torch.zeros_like(pd_scores[..., 0]).to(device),
+ torch.full_like(pd_scores[..., 0], self.bg_idx),
+ torch.zeros_like(pd_bboxes),
+ torch.zeros_like(pd_scores),
+ torch.zeros_like(pd_scores[..., 0]),
+ torch.zeros_like(pd_scores[..., 0]),
)
+ try:
+ return self._forward(pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)
+ except torch.OutOfMemoryError:
+ # Move tensors to CPU, compute, then move back to original device
+ LOGGER.warning("WARNING: CUDA OutOfMemoryError in TaskAlignedAssigner, using CPU")
+ cpu_tensors = [t.cpu() for t in (pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)]
+ result = self._forward(*cpu_tensors)
+ return tuple(t.to(device) for t in result)
+
+ def _forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
+ """
+ Compute the task-aligned assignment. Reference code is available at
+ https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py.
+
+ Args:
+ pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ pd_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ anc_points (Tensor): shape(num_total_anchors, 2)
+ gt_labels (Tensor): shape(bs, n_max_boxes, 1)
+ gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
+ mask_gt (Tensor): shape(bs, n_max_boxes, 1)
+
+ Returns:
+ target_labels (Tensor): shape(bs, num_total_anchors)
+ target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ fg_mask (Tensor): shape(bs, num_total_anchors)
+ target_gt_idx (Tensor): shape(bs, num_total_anchors)
+ """
mask_pos, align_metric, overlaps = self.get_pos_mask(
pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt
)
@@ -306,7 +336,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
assert feats is not None
dtype, device = feats[0].dtype, feats[0].device
for i, stride in enumerate(strides):
- _, _, h, w = feats[i].shape
+ h, w = feats[i].shape[2:] if isinstance(feats, list) else (int(feats[i][0]), int(feats[i][1]))
sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x
sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y
sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py
index e7fcca0ad70..1f87ec79388 100644
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@@ -1,6 +1,5 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import contextlib
import gc
import math
import os
@@ -13,6 +12,7 @@
from typing import Union
import numpy as np
+import thop
import torch
import torch.distributed as dist
import torch.nn as nn
@@ -31,11 +31,6 @@
)
from ultralytics.utils.checks import check_version
-try:
- import thop
-except ImportError:
- thop = None
-
# Version checks (all default to version>=min_version)
TORCH_1_9 = check_version(torch.__version__, "1.9.0")
TORCH_1_13 = check_version(torch.__version__, "1.13.0")
@@ -113,16 +108,24 @@ def get_cpu_info():
from ultralytics.utils import PERSISTENT_CACHE # avoid circular import error
if "cpu_info" not in PERSISTENT_CACHE:
- with contextlib.suppress(Exception):
+ try:
import cpuinfo # pip install py-cpuinfo
k = "brand_raw", "hardware_raw", "arch_string_raw" # keys sorted by preference
info = cpuinfo.get_cpu_info() # info dict
string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown")
PERSISTENT_CACHE["cpu_info"] = string.replace("(R)", "").replace("CPU ", "").replace("@ ", "")
+ except Exception:
+ pass
return PERSISTENT_CACHE.get("cpu_info", "unknown")
+def get_gpu_info(index):
+ """Return a string with system GPU information, i.e. 'Tesla T4, 15102MiB'."""
+ properties = torch.cuda.get_device_properties(index)
+ return f"{properties.name}, {properties.total_memory / (1 << 20):.0f}MiB"
+
+
def select_device(device="", batch=0, newline=False, verbose=True):
"""
Selects the appropriate PyTorch device based on the provided arguments.
@@ -156,7 +159,7 @@ def select_device(device="", batch=0, newline=False, verbose=True):
Note:
Sets the 'CUDA_VISIBLE_DEVICES' environment variable for specifying which GPUs to use.
"""
- if isinstance(device, torch.device):
+ if isinstance(device, torch.device) or str(device).startswith("tpu"):
return device
s = f"Ultralytics {__version__} ๐ Python-{PYTHON_VERSION} torch-{torch.__version__} "
@@ -170,6 +173,8 @@ def select_device(device="", batch=0, newline=False, verbose=True):
elif device: # non-cpu device requested
if device == "cuda":
device = "0"
+ if "," in device:
+ device = ",".join([x for x in device.split(",") if x]) # remove sequential commas, i.e. "0,,1" -> "0,1"
visible = os.environ.get("CUDA_VISIBLE_DEVICES", None)
os.environ["CUDA_VISIBLE_DEVICES"] = device # set environment variable - must be before assert is_available()
if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.split(","))):
@@ -191,7 +196,7 @@ def select_device(device="", batch=0, newline=False, verbose=True):
)
if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available
- devices = device.split(",") if device else "0" # range(torch.cuda.device_count()) # i.e. 0,1,6,7
+ devices = device.split(",") if device else "0" # i.e. "0,1" -> ["0", "1"]
n = len(devices) # device count
if n > 1: # multi-GPU
if batch < 1:
@@ -206,8 +211,7 @@ def select_device(device="", batch=0, newline=False, verbose=True):
)
space = " " * (len(s) + 1)
for i, d in enumerate(devices):
- p = torch.cuda.get_device_properties(i)
- s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
+ s += f"{'' if i == 0 else space}CUDA:{d} ({get_gpu_info(i)})\n" # bytes to MB
arg = "cuda:0"
elif mps and TORCH_2_0 and torch.backends.mps.is_available():
# Prefer MPS if available
@@ -293,28 +297,22 @@ def fuse_deconv_and_bn(deconv, bn):
def model_info(model, detailed=False, verbose=True, imgsz=640):
- """
- Model information.
-
- imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
- """
+ """Print and return detailed model information layer by layer."""
if not verbose:
return
n_p = get_num_params(model) # number of parameters
n_g = get_num_gradients(model) # number of gradients
n_l = len(list(model.modules())) # number of layers
if detailed:
- LOGGER.info(
- f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}"
- )
+ LOGGER.info(f"{'layer':>5}{'name':>40}{'gradient':>10}{'parameters':>12}{'shape':>20}{'mu':>10}{'sigma':>10}")
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace("module_list.", "")
LOGGER.info(
- "%5g %40s %9s %12g %20s %10.3g %10.3g %10s"
- % (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype)
+ f"{i:>5g}{name:>40s}{p.requires_grad!r:>10}{p.numel():>12g}{str(list(p.shape)):>20s}"
+ f"{p.mean():>10.3g}{p.std():>10.3g}{str(p.dtype):>15s}"
)
- flops = get_flops(model, imgsz)
+ flops = get_flops(model, imgsz) # imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]
fused = " (fused)" if getattr(model, "is_fused", lambda: False)() else ""
fs = f", {flops:.1f} GFLOPs" if flops else ""
yaml_file = getattr(model, "yaml_file", "") or getattr(model, "yaml", {}).get("yaml_file", "")
@@ -365,9 +363,6 @@ def model_info_for_loggers(trainer):
def get_flops(model, imgsz=640):
"""Return a YOLO model's FLOPs."""
- if not thop:
- return 0.0 # if not installed return 0.0 GFLOPs
-
try:
model = de_parallel(model)
p = next(model.parameters())
@@ -595,7 +590,7 @@ def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "", updates: dict
# Save
combined = {**metadata, **x, **(updates or {})}
- torch.save(combined, s or f, use_dill=False) # combine dicts (prefer to the right)
+ torch.save(combined, s or f) # combine dicts (prefer to the right)
mb = os.path.getsize(s or f) / 1e6 # file size
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
return combined
@@ -615,7 +610,33 @@ def convert_optimizer_state_dict_to_fp16(state_dict):
return state_dict
-def profile(input, ops, n=10, device=None):
+@contextmanager
+def cuda_memory_usage(device=None):
+ """
+ Monitor and manage CUDA memory usage.
+
+ This function checks if CUDA is available and, if so, empties the CUDA cache to free up unused memory.
+ It then yields a dictionary containing memory usage information, which can be updated by the caller.
+ Finally, it updates the dictionary with the amount of memory reserved by CUDA on the specified device.
+
+ Args:
+ device (torch.device, optional): The CUDA device to query memory usage for. Defaults to None.
+
+ Yields:
+ (dict): A dictionary with a key 'memory' initialized to 0, which will be updated with the reserved memory.
+ """
+ cuda_info = dict(memory=0)
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ try:
+ yield cuda_info
+ finally:
+ cuda_info["memory"] = torch.cuda.memory_reserved(device)
+ else:
+ yield cuda_info
+
+
+def profile(input, ops, n=10, device=None, max_num_obj=0):
"""
Ultralytics speed, memory and FLOPs profiler.
@@ -636,7 +657,8 @@ def profile(input, ops, n=10, device=None):
f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
f"{'input':>24s}{'output':>24s}"
)
-
+ gc.collect() # attempt to free unused memory
+ torch.cuda.empty_cache()
for x in input if isinstance(input, list) else [input]:
x = x.to(device)
x.requires_grad = True
@@ -645,24 +667,36 @@ def profile(input, ops, n=10, device=None):
m = m.half() if hasattr(m, "half") and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
try:
- flops = thop.profile(m, inputs=[x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs
+ flops = thop.profile(deepcopy(m), inputs=[x], verbose=False)[0] / 1e9 * 2 # GFLOPs
except Exception:
flops = 0
try:
+ mem = 0
for _ in range(n):
- t[0] = time_sync()
- y = m(x)
- t[1] = time_sync()
- try:
- (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
- t[2] = time_sync()
- except Exception: # no backward method
- # print(e) # for debug
- t[2] = float("nan")
+ with cuda_memory_usage(device) as cuda_info:
+ t[0] = time_sync()
+ y = m(x)
+ t[1] = time_sync()
+ try:
+ (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
+ t[2] = time_sync()
+ except Exception: # no backward method
+ # print(e) # for debug
+ t[2] = float("nan")
+ mem += cuda_info["memory"] / 1e9 # (GB)
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
- mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # (GB)
+ if max_num_obj: # simulate training with predictions per image grid (for AutoBatch)
+ with cuda_memory_usage(device) as cuda_info:
+ torch.randn(
+ x.shape[0],
+ max_num_obj,
+ int(sum((x.shape[-1] / s) * (x.shape[-2] / s) for s in m.stride.tolist())),
+ device=device,
+ dtype=torch.float32,
+ )
+ mem += cuda_info["memory"] / 1e9 # (GB)
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else "list" for x in (x, y)) # shapes
p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
LOGGER.info(f"{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}")
@@ -670,8 +704,9 @@ def profile(input, ops, n=10, device=None):
except Exception as e:
LOGGER.info(e)
results.append(None)
- gc.collect() # attempt to free unused memory
- torch.cuda.empty_cache()
+ finally:
+ gc.collect() # attempt to free unused memory
+ torch.cuda.empty_cache()
return results
@@ -719,3 +754,48 @@ def __call__(self, epoch, fitness):
f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping."
)
return stop
+
+
+class FXModel(nn.Module):
+ """
+ A custom model class for torch.fx compatibility.
+
+ This class extends `torch.nn.Module` and is designed to ensure compatibility with torch.fx for tracing and graph manipulation.
+ It copies attributes from an existing model and explicitly sets the model attribute to ensure proper copying.
+
+ Args:
+ model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+ """
+
+ def __init__(self, model):
+ """
+ Initialize the FXModel.
+
+ Args:
+ model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+ """
+ super().__init__()
+ copy_attr(self, model)
+ # Explicitly set `model` since `copy_attr` somehow does not copy it.
+ self.model = model.model
+
+ def forward(self, x):
+ """
+ Forward pass through the model.
+
+ This method performs the forward pass through the model, handling the dependencies between layers and saving intermediate outputs.
+
+ Args:
+ x (torch.Tensor): The input tensor to the model.
+
+ Returns:
+ (torch.Tensor): The output tensor from the model.
+ """
+ y = [] # outputs
+ for m in self.model:
+ if m.f != -1: # if not from previous layer
+ # from earlier layers
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]
+ x = m(x) # run
+ y.append(x) # save output
+ return x
diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py
index 3f873a6fafc..e8b97d89f07 100644
--- a/ultralytics/utils/triton.py
+++ b/ultralytics/utils/triton.py
@@ -1,4 +1,4 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
from typing import List
from urllib.parse import urlsplit
@@ -66,6 +66,7 @@ def __init__(self, url: str, endpoint: str = "", scheme: str = ""):
self.np_input_formats = [type_map[x] for x in self.input_formats]
self.input_names = [x["name"] for x in config["input"]]
self.output_names = [x["name"] for x in config["output"]]
+ self.metadata = eval(config.get("parameters", {}).get("metadata", {}).get("string_value", "None"))
def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]:
"""
diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py
index 1329bfe6ecc..831f0fa0f2d 100644
--- a/ultralytics/utils/tuner.py
+++ b/ultralytics/utils/tuner.py
@@ -1,13 +1,16 @@
-# Ultralytics YOLO ๐, AGPL-3.0 license
+# Ultralytics ๐ AGPL-3.0 License - https://ultralytics.com/license
-import subprocess
-
-from ultralytics.cfg import TASK2DATA, TASK2METRIC, get_save_dir
+from ultralytics.cfg import TASK2DATA, TASK2METRIC, get_cfg, get_save_dir
from ultralytics.utils import DEFAULT_CFG, DEFAULT_CFG_DICT, LOGGER, NUM_THREADS, checks
def run_ray_tune(
- model, space: dict = None, grace_period: int = 10, gpu_per_trial: int = None, max_samples: int = 10, **train_args
+ model,
+ space: dict = None,
+ grace_period: int = 10,
+ gpu_per_trial: int = None,
+ max_samples: int = 10,
+ **train_args,
):
"""
Runs hyperparameter tuning using Ray Tune.
@@ -27,10 +30,10 @@ def run_ray_tune(
```python
from ultralytics import YOLO
- # Load a YOLOv8n model
- model = YOLO("yolov8n.pt")
+ # Load a YOLO11n model
+ model = YOLO("yolo11n.pt")
- # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset
+ # Start tuning hyperparameters for YOLO11n training on the COCO8 dataset
result_grid = model.tune(data="coco8.yaml", use_ray=True)
```
"""
@@ -39,7 +42,7 @@ def run_ray_tune(
train_args = {}
try:
- subprocess.run("pip install ray[tune]".split(), check=True) # do not add single quotes here
+ checks.check_requirements("ray[tune]")
import ray
from ray import tune
@@ -131,7 +134,9 @@ def _tune(config):
tuner_callbacks = [WandbLoggerCallback(project="YOLOv8-tune")] if wandb else []
# Create the Ray Tune hyperparameter search tuner
- tune_dir = get_save_dir(DEFAULT_CFG, name="tune").resolve() # must be absolute dir
+ tune_dir = get_save_dir(
+ get_cfg(DEFAULT_CFG, train_args), name=train_args.pop("name", "tune")
+ ).resolve() # must be absolute dir
tune_dir.mkdir(parents=True, exist_ok=True)
tuner = tune.Tuner(
trainable_with_resources,