From 0a0759b56bf9cf3b6b0968dec8a741479ded6d0e Mon Sep 17 00:00:00 2001 From: Mehrdad Date: Wed, 5 Feb 2025 13:20:21 -0800 Subject: [PATCH] Merge remote-tracking branch 'upstream/main' Staged --- .dockerignore | 35 + .github/ISSUE_TEMPLATE/bug-report.yml | 18 +- .github/ISSUE_TEMPLATE/config.yml | 4 +- .github/ISSUE_TEMPLATE/feature-request.yml | 16 +- .github/ISSUE_TEMPLATE/question.yml | 6 +- .github/dependabot.yml | 3 +- .github/workflows/{ci.yaml => ci.yml} | 96 +- .github/workflows/cla.yml | 5 +- .github/workflows/codeql.yaml | 42 - .github/workflows/{docker.yaml => docker.yml} | 96 +- .github/workflows/docs.yml | 40 +- .github/workflows/format.yml | 26 +- .github/workflows/links.yml | 33 +- .github/workflows/merge-main-into-prs.yml | 5 +- .github/workflows/publish.yml | 190 ++-- .github/workflows/stale.yml | 6 +- .gitignore | 4 + CONTRIBUTING.md | 6 +- README.md | 145 +-- README.zh-CN.md | 141 +-- docker/Dockerfile | 16 +- docker/Dockerfile-arm64 | 4 +- docker/Dockerfile-cpu | 16 +- docker/Dockerfile-jetson-jetpack4 | 7 +- docker/Dockerfile-jetson-jetpack5 | 29 +- docker/Dockerfile-jetson-jetpack6 | 25 +- docker/Dockerfile-jupyter | 33 + docker/Dockerfile-python | 6 +- docker/Dockerfile-runner | 7 +- docs/README.md | 12 +- docs/build_docs.py | 128 ++- docs/build_reference.py | 2 +- docs/en/datasets/classify/caltech101.md | 8 +- docs/en/datasets/classify/caltech256.md | 12 +- docs/en/datasets/classify/cifar10.md | 10 +- docs/en/datasets/classify/cifar100.md | 19 +- docs/en/datasets/classify/fashion-mnist.md | 14 +- docs/en/datasets/classify/imagenet.md | 20 +- docs/en/datasets/classify/imagenet10.md | 8 +- docs/en/datasets/classify/imagenette.md | 20 +- docs/en/datasets/classify/imagewoof.md | 12 +- docs/en/datasets/classify/index.md | 13 +- docs/en/datasets/classify/mnist.md | 14 +- docs/en/datasets/detect/african-wildlife.md | 27 +- docs/en/datasets/detect/argoverse.md | 12 +- docs/en/datasets/detect/brain-tumor.md | 20 +- docs/en/datasets/detect/coco.md | 32 +- docs/en/datasets/detect/coco8.md | 24 +- docs/en/datasets/detect/globalwheat2020.md | 14 +- docs/en/datasets/detect/index.md | 19 +- docs/en/datasets/detect/lvis.md | 16 +- docs/en/datasets/detect/medical-pills.md | 147 +++ docs/en/datasets/detect/objects365.md | 16 +- docs/en/datasets/detect/open-images-v7.md | 57 +- docs/en/datasets/detect/roboflow-100.md | 4 +- docs/en/datasets/detect/signature.md | 16 +- docs/en/datasets/detect/sku-110k.md | 18 +- docs/en/datasets/detect/visdrone.md | 14 +- docs/en/datasets/detect/voc.md | 14 +- docs/en/datasets/detect/xview.md | 8 +- docs/en/datasets/explorer/api.md | 26 +- docs/en/datasets/explorer/dashboard.md | 4 + docs/en/datasets/explorer/explorer.ipynb | 604 ---------- docs/en/datasets/explorer/explorer.md | 278 +++++ docs/en/datasets/explorer/index.md | 4 + docs/en/datasets/index.md | 22 +- docs/en/datasets/obb/dota-v2.md | 18 +- docs/en/datasets/obb/dota8.md | 26 +- docs/en/datasets/obb/index.md | 26 +- docs/en/datasets/pose/coco.md | 39 +- docs/en/datasets/pose/coco8-pose.md | 28 +- docs/en/datasets/pose/dog-pose.md | 141 +++ docs/en/datasets/pose/hand-keypoints.md | 31 +- docs/en/datasets/pose/index.md | 19 +- docs/en/datasets/pose/tiger-pose.md | 28 +- docs/en/datasets/segment/carparts-seg.md | 20 +- docs/en/datasets/segment/coco.md | 38 +- docs/en/datasets/segment/coco8-seg.md | 22 +- docs/en/datasets/segment/crack-seg.md | 14 +- docs/en/datasets/segment/index.md | 22 +- docs/en/datasets/segment/package-seg.md | 20 +- docs/en/datasets/track/index.md | 10 +- docs/en/guides/analytics.md | 439 +++---- docs/en/guides/azureml-quickstart.md | 81 +- docs/en/guides/conda-quickstart.md | 6 +- .../guides/coral-edge-tpu-on-raspberry-pi.md | 111 +- .../guides/data-collection-and-annotation.md | 6 +- docs/en/guides/deepstream-nvidia-jetson.md | 169 ++- docs/en/guides/defining-project-goals.md | 16 +- docs/en/guides/distance-calculation.md | 70 +- docs/en/guides/docker-quickstart.md | 6 +- docs/en/guides/heatmaps.md | 309 +---- docs/en/guides/hyperparameter-tuning.md | 76 +- docs/en/guides/index.md | 16 +- .../instance-segmentation-and-tracking.md | 46 +- .../guides/isolating-segmentation-objects.md | 30 +- docs/en/guides/kfold-cross-validation.md | 13 +- docs/en/guides/model-deployment-options.md | 97 +- docs/en/guides/model-deployment-practices.md | 26 +- docs/en/guides/model-evaluation-insights.md | 63 +- .../model-monitoring-and-maintenance.md | 17 +- docs/en/guides/model-testing.md | 37 +- docs/en/guides/model-training-tips.md | 46 +- docs/en/guides/nvidia-jetson.md | 445 +++++--- docs/en/guides/object-blurring.md | 42 +- docs/en/guides/object-counting.md | 332 ++---- docs/en/guides/object-cropping.md | 40 +- ...ng-openvino-latency-vs-throughput-modes.md | 2 +- docs/en/guides/parking-management.md | 56 +- .../en/guides/preprocessing_annotated_data.md | 32 +- docs/en/guides/queue-management.md | 158 ++- docs/en/guides/raspberry-pi.md | 206 ++-- docs/en/guides/region-counting.md | 116 +- docs/en/guides/sahi-tiled-inference.md | 86 +- docs/en/guides/security-alarm-system.md | 227 ++-- docs/en/guides/speed-estimation.md | 123 +- docs/en/guides/steps-of-a-cv-project.md | 16 +- docs/en/guides/streamlit-live-inference.md | 83 +- docs/en/guides/trackzone.md | 173 +++ docs/en/guides/triton-inference-server.md | 107 +- docs/en/guides/view-results-in-terminal.md | 6 +- docs/en/guides/vision-eye.md | 48 +- docs/en/guides/workouts-monitoring.md | 149 +-- docs/en/guides/yolo-common-issues.md | 62 +- docs/en/guides/yolo-performance-metrics.md | 48 +- docs/en/guides/yolo-thread-safe-inference.md | 12 +- docs/en/help/CI.md | 29 +- docs/en/help/CLA.md | 120 +- docs/en/help/FAQ.md | 52 +- ...{code_of_conduct.md => code-of-conduct.md} | 0 docs/en/help/contributing.md | 139 ++- docs/en/help/index.md | 12 +- ...ple.md => minimum-reproducible-example.md} | 0 docs/en/help/privacy.md | 3 +- docs/en/help/security.md | 2 +- docs/en/hub/app/android.md | 4 +- docs/en/hub/app/index.md | 4 +- docs/en/hub/app/ios.md | 4 +- docs/en/hub/cloud-training.md | 2 +- docs/en/hub/datasets.md | 4 +- docs/en/hub/index.md | 14 +- docs/en/hub/inference-api.md | 8 +- docs/en/hub/models.md | 16 +- docs/en/hub/quickstart.md | 6 +- docs/en/index.md | 183 ++- docs/en/integrations/albumentations.md | 199 ++++ docs/en/integrations/amazon-sagemaker.md | 62 +- docs/en/integrations/clearml.md | 64 +- docs/en/integrations/comet.md | 66 +- docs/en/integrations/coreml.md | 68 +- docs/en/integrations/dvc.md | 66 +- docs/en/integrations/edge-tpu.md | 66 +- docs/en/integrations/google-colab.md | 42 +- docs/en/integrations/gradio.md | 46 +- docs/en/integrations/ibm-watsonx.md | 64 +- docs/en/integrations/index.md | 58 +- docs/en/integrations/jupyterlab.md | 58 +- docs/en/integrations/kaggle.md | 57 +- docs/en/integrations/mnn.md | 344 ++++++ docs/en/integrations/ncnn.md | 64 +- docs/en/integrations/neural-magic.md | 96 +- docs/en/integrations/onnx.md | 78 +- docs/en/integrations/openvino.md | 27 +- docs/en/integrations/paddlepaddle.md | 78 +- docs/en/integrations/paperspace.md | 52 +- docs/en/integrations/ray-tune.md | 64 +- docs/en/integrations/roboflow.md | 76 +- docs/en/integrations/rockchip-rknn.md | 206 ++++ docs/en/integrations/seeedstudio-recamera.md | 110 ++ docs/en/integrations/sony-imx500.md | 330 ++++++ docs/en/integrations/tensorboard.md | 78 +- docs/en/integrations/tensorrt.md | 86 +- docs/en/integrations/tf-graphdef.md | 90 +- docs/en/integrations/tf-savedmodel.md | 62 +- docs/en/integrations/tfjs.md | 72 +- docs/en/integrations/tflite.md | 76 +- docs/en/integrations/torchscript.md | 82 +- docs/en/integrations/vscode.md | 12 +- docs/en/integrations/weights-biases.md | 208 ++-- docs/en/macros/augmentation-args.md | 2 +- docs/en/macros/export-args.md | 30 +- docs/en/macros/export-table.md | 33 +- docs/en/macros/predict-args.md | 37 +- docs/en/macros/sam-auto-annotate.md | 12 + docs/en/macros/solutions-args.md | 12 + docs/en/macros/train-args.md | 100 +- docs/en/macros/validation-args.md | 4 +- docs/en/macros/yolo-cls-perf.md | 7 + docs/en/macros/yolo-det-perf.md | 7 + docs/en/macros/yolo-obb-perf.md | 7 + docs/en/macros/yolo-pose-perf.md | 7 + docs/en/macros/yolo-seg-perf.md | 7 + docs/en/models/index.md | 6 +- docs/en/models/mobile-sam.md | 55 +- docs/en/models/rtdetr.md | 9 +- docs/en/models/sam-2.md | 107 +- docs/en/models/sam.md | 40 +- docs/en/models/yolo-nas.md | 3 +- docs/en/models/yolo-world.md | 4 +- docs/en/models/yolo11.md | 75 +- docs/en/models/yolov10.md | 5 + docs/en/models/yolov3.md | 56 +- docs/en/models/yolov5.md | 11 +- docs/en/models/yolov6.md | 23 +- docs/en/models/yolov7.md | 16 +- docs/en/models/yolov8.md | 11 +- docs/en/models/yolov9.md | 15 +- docs/en/modes/benchmark.md | 78 +- docs/en/modes/export.md | 40 +- docs/en/modes/index.md | 85 +- docs/en/modes/predict.md | 169 +-- docs/en/modes/track.md | 64 +- docs/en/modes/train.md | 78 +- docs/en/modes/val.md | 50 +- docs/en/quickstart.md | 2 +- docs/en/reference/cfg/__init__.md | 6 +- docs/en/reference/data/converter.md | 4 + docs/en/reference/data/explorer/explorer.md | 21 - docs/en/reference/data/explorer/gui/dash.md | 57 - docs/en/reference/data/explorer/utils.md | 33 - docs/en/reference/data/utils.md | 4 + docs/en/reference/engine/exporter.md | 8 + docs/en/reference/models/sam/predict.md | 4 + docs/en/reference/nn/modules/block.md | 4 + docs/en/reference/nn/modules/conv.md | 4 + docs/en/reference/solutions/region_counter.md | 16 + docs/en/reference/solutions/security_alarm.md | 16 + docs/en/reference/solutions/solutions.md | 16 + .../solutions/streamlit_inference.md | 2 +- docs/en/reference/solutions/trackzone.md | 16 + docs/en/reference/utils/__init__.md | 4 + docs/en/reference/utils/checks.md | 8 + docs/en/reference/utils/metrics.md | 2 +- docs/en/reference/utils/ops.md | 4 + docs/en/reference/utils/torch_utils.md | 12 + docs/en/solutions/index.md | 86 +- docs/en/tasks/classify.md | 84 +- docs/en/tasks/detect.md | 98 +- docs/en/tasks/index.md | 63 +- docs/en/tasks/obb.md | 94 +- docs/en/tasks/pose.md | 119 +- docs/en/tasks/segment.md | 92 +- docs/en/usage/callbacks.md | 71 +- docs/en/usage/cfg.md | 35 +- docs/en/usage/cli.md | 84 +- docs/en/usage/engine.md | 30 +- docs/en/usage/python.md | 160 +-- docs/en/usage/simple-utilities.md | 140 ++- .../environments/aws_quickstart_tutorial.md | 2 +- .../docker_image_quickstart_tutorial.md | 4 +- docs/en/yolov5/index.md | 14 +- .../tutorials/clearml_logging_integration.md | 4 +- .../tutorials/comet_logging_integration.md | 2 +- .../tutorials/hyperparameter_evolution.md | 2 +- docs/en/yolov5/tutorials/model_ensembling.md | 2 +- docs/en/yolov5/tutorials/model_export.md | 30 +- .../tutorials/model_pruning_and_sparsity.md | 2 +- .../en/yolov5/tutorials/multi_gpu_training.md | 2 +- .../tutorials/pytorch_hub_model_loading.md | 2 +- .../roboflow_datasets_integration.md | 6 +- .../tutorials/test_time_augmentation.md | 2 +- .../tips_for_best_training_results.md | 2 +- docs/en/yolov5/tutorials/train_custom_data.md | 8 +- .../transfer_learning_with_frozen_layers.md | 2 +- docs/mkdocs_github_authors.yaml | 55 +- docs/model_data.py | 93 ++ docs/overrides/assets/favicon.ico | Bin 9662 -> 0 bytes docs/overrides/javascript/benchmark.js | 229 ++++ docs/overrides/javascript/extra.js | 196 +++- docs/overrides/javascript/giscus.js | 85 ++ docs/overrides/main.html | 2 +- docs/overrides/partials/comments.html | 48 +- docs/overrides/partials/source-file.html | 26 - docs/overrides/stylesheets/style.css | 27 +- examples/README.md | 11 +- examples/RTDETR-ONNXRuntime-Python/README.md | 43 + examples/RTDETR-ONNXRuntime-Python/main.py | 222 ++++ .../YOLO-Series-ONNXRuntime-Rust/Cargo.toml | 14 + .../YOLO-Series-ONNXRuntime-Rust/README.md | 94 ++ .../YOLO-Series-ONNXRuntime-Rust/src/main.rs | 236 ++++ .../action_recognition.py | 8 +- examples/YOLOv8-CPP-Inference/README.md | 8 +- .../YOLOv8-LibTorch-CPP-Inference/README.md | 2 +- .../YOLOv8-LibTorch-CPP-Inference/main.cc | 1 + examples/YOLOv8-ONNXRuntime-CPP/inference.cpp | 2 +- examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml | 13 +- examples/YOLOv8-ONNXRuntime-Rust/README.md | 33 +- examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs | 2 +- examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs | 41 + examples/YOLOv8-ONNXRuntime-Rust/src/main.rs | 2 +- examples/YOLOv8-ONNXRuntime-Rust/src/model.rs | 29 +- .../src/ort_backend.rs | 183 +-- examples/YOLOv8-ONNXRuntime/main.py | 2 +- examples/YOLOv8-OpenCV-ONNX-Python/main.py | 2 +- .../README.md | 65 -- .../YOLOv8-OpenCV-int8-tflite-Python/main.py | 298 ----- .../YOLOv8-OpenVINO-CPP-Inference/README.md | 2 +- examples/YOLOv8-Region-Counter/readme.md | 13 +- .../yolov8_region_counter.py | 24 +- .../YOLOv8-SAHI-Inference-Video/readme.md | 20 +- .../yolov8_sahi.py | 36 +- .../main.py | 2 +- examples/YOLOv8-TFLite-Python/README.md | 55 + examples/YOLOv8-TFLite-Python/main.py | 221 ++++ examples/heatmaps.ipynb | 47 +- examples/hub.ipynb | 6 +- examples/object_counting.ipynb | 54 +- examples/object_tracking.ipynb | 30 +- examples/tutorial.ipynb | 310 ++--- mkdocs.yml | 111 +- pyproject.toml | 36 +- tests/__init__.py | 3 +- tests/conftest.py | 6 +- tests/test_cli.py | 10 +- tests/test_cuda.py | 26 +- tests/test_engine.py | 2 +- tests/test_explorer.py | 66 -- tests/test_exports.py | 101 +- tests/test_integrations.py | 2 +- tests/test_python.py | 15 +- tests/test_solutions.py | 74 +- ultralytics/__init__.py | 11 +- ultralytics/cfg/__init__.py | 490 +++++--- ultralytics/cfg/datasets/Argoverse.yaml | 3 +- ultralytics/cfg/datasets/DOTAv1.5.yaml | 3 +- ultralytics/cfg/datasets/DOTAv1.yaml | 3 +- ultralytics/cfg/datasets/GlobalWheat2020.yaml | 3 +- ultralytics/cfg/datasets/ImageNet.yaml | 3 +- ultralytics/cfg/datasets/Objects365.yaml | 3 +- ultralytics/cfg/datasets/SKU-110K.yaml | 3 +- ultralytics/cfg/datasets/VOC.yaml | 3 +- ultralytics/cfg/datasets/VisDrone.yaml | 3 +- .../cfg/datasets/african-wildlife.yaml | 3 +- ultralytics/cfg/datasets/brain-tumor.yaml | 3 +- ultralytics/cfg/datasets/carparts-seg.yaml | 3 +- ultralytics/cfg/datasets/coco-pose.yaml | 11 +- ultralytics/cfg/datasets/coco.yaml | 3 +- ultralytics/cfg/datasets/coco128-seg.yaml | 5 +- ultralytics/cfg/datasets/coco128.yaml | 5 +- ultralytics/cfg/datasets/coco8-pose.yaml | 3 +- ultralytics/cfg/datasets/coco8-seg.yaml | 3 +- ultralytics/cfg/datasets/coco8.yaml | 3 +- ultralytics/cfg/datasets/crack-seg.yaml | 3 +- ultralytics/cfg/datasets/dog-pose.yaml | 24 + ultralytics/cfg/datasets/dota8.yaml | 3 +- ultralytics/cfg/datasets/hand-keypoints.yaml | 3 +- ultralytics/cfg/datasets/lvis.yaml | 5 +- ultralytics/cfg/datasets/medical-pills.yaml | 22 + ultralytics/cfg/datasets/open-images-v7.yaml | 3 +- ultralytics/cfg/datasets/package-seg.yaml | 7 +- ultralytics/cfg/datasets/signature.yaml | 3 +- ultralytics/cfg/datasets/tiger-pose.yaml | 3 +- ultralytics/cfg/datasets/xView.yaml | 3 +- ultralytics/cfg/default.yaml | 13 +- .../cfg/models/11/yolo11-cls-resnet18.yaml | 17 + ultralytics/cfg/models/11/yolo11-cls.yaml | 7 +- ultralytics/cfg/models/11/yolo11-obb.yaml | 7 +- ultralytics/cfg/models/11/yolo11-pose.yaml | 7 +- ultralytics/cfg/models/11/yolo11-seg.yaml | 7 +- ultralytics/cfg/models/11/yolo11.yaml | 7 +- ultralytics/cfg/models/README.md | 6 +- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml | 7 +- .../cfg/models/rt-detr/rtdetr-resnet101.yaml | 7 +- .../cfg/models/rt-detr/rtdetr-resnet50.yaml | 7 +- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml | 7 +- ultralytics/cfg/models/v10/yolov10b.yaml | 7 +- ultralytics/cfg/models/v10/yolov10l.yaml | 7 +- ultralytics/cfg/models/v10/yolov10m.yaml | 7 +- ultralytics/cfg/models/v10/yolov10n.yaml | 7 +- ultralytics/cfg/models/v10/yolov10s.yaml | 7 +- ultralytics/cfg/models/v10/yolov10x.yaml | 7 +- ultralytics/cfg/models/v3/yolov3-spp.yaml | 7 +- ultralytics/cfg/models/v3/yolov3-tiny.yaml | 7 +- ultralytics/cfg/models/v3/yolov3.yaml | 7 +- ultralytics/cfg/models/v5/yolov5-p6.yaml | 7 +- ultralytics/cfg/models/v5/yolov5-relu6.yaml | 2 +- ultralytics/cfg/models/v5/yolov5.yaml | 7 +- ultralytics/cfg/models/v6/yolov6.yaml | 9 +- .../cfg/models/v8/relu6-yolov8-cls.yaml | 2 +- .../cfg/models/v8/relu6-yolov8-regress.yaml | 2 +- .../cfg/models/v8/relu6-yolov8-regress6.yaml | 2 +- ultralytics/cfg/models/v8/relu6-yolov8.yaml | 2 +- .../cfg/models/v8/yolov8-cls-resnet101.yaml | 7 +- .../cfg/models/v8/yolov8-cls-resnet50.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-cls.yaml | 7 +- .../cfg/models/v8/yolov8-ghost-p2.yaml | 8 +- .../cfg/models/v8/yolov8-ghost-p6.yaml | 8 +- ultralytics/cfg/models/v8/yolov8-ghost.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-obb.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-p2.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-p6.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml | 7 +- .../cfg/models/v8/yolov8-pose-relu6.yaml | 2 +- ultralytics/cfg/models/v8/yolov8-pose.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-relu6.yaml | 2 +- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-seg.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-world.yaml | 7 +- ultralytics/cfg/models/v8/yolov8-worldv2.yaml | 7 +- ultralytics/cfg/models/v8/yolov8.yaml | 7 +- ultralytics/cfg/models/v9/yolov9c-seg.yaml | 7 +- ultralytics/cfg/models/v9/yolov9c.yaml | 7 +- ultralytics/cfg/models/v9/yolov9e-seg.yaml | 7 +- ultralytics/cfg/models/v9/yolov9e.yaml | 7 +- ultralytics/cfg/models/v9/yolov9m.yaml | 7 +- ultralytics/cfg/models/v9/yolov9s.yaml | 7 +- ultralytics/cfg/models/v9/yolov9t.yaml | 7 +- ultralytics/cfg/solutions/default.yaml | 24 + ultralytics/cfg/trackers/botsort.yaml | 11 +- ultralytics/cfg/trackers/bytetrack.yaml | 11 +- ultralytics/data/__init__.py | 2 +- ultralytics/data/annotator.py | 26 +- ultralytics/data/augment.py | 50 +- ultralytics/data/base.py | 44 +- ultralytics/data/build.py | 14 +- ultralytics/data/converter.py | 99 +- ultralytics/data/dataset.py | 64 +- ultralytics/data/explorer/__init__.py | 5 - ultralytics/data/explorer/explorer.py | 460 -------- ultralytics/data/explorer/gui/__init__.py | 1 - ultralytics/data/explorer/gui/dash.py | 282 ----- ultralytics/data/explorer/utils.py | 167 --- ultralytics/data/loaders.py | 247 ++-- ultralytics/data/scripts/download_weights.sh | 4 +- ultralytics/data/split_dota.py | 16 +- ultralytics/data/utils.py | 80 +- ultralytics/engine/__init__.py | 2 +- ultralytics/engine/exporter.py | 663 +++++++++-- ultralytics/engine/model.py | 198 ++-- ultralytics/engine/predictor.py | 41 +- ultralytics/engine/results.py | 135 ++- ultralytics/engine/trainer.py | 63 +- ultralytics/engine/tuner.py | 31 +- ultralytics/engine/validator.py | 35 +- ultralytics/hub/__init__.py | 8 +- ultralytics/hub/auth.py | 4 +- ultralytics/hub/google/__init__.py | 2 +- ultralytics/hub/session.py | 2 +- ultralytics/hub/utils.py | 4 +- ultralytics/models/__init__.py | 2 +- ultralytics/models/fastsam/__init__.py | 2 +- ultralytics/models/fastsam/model.py | 2 +- ultralytics/models/fastsam/predict.py | 12 +- ultralytics/models/fastsam/utils.py | 2 +- ultralytics/models/fastsam/val.py | 2 +- ultralytics/models/nas/__init__.py | 2 +- ultralytics/models/nas/model.py | 2 +- ultralytics/models/nas/predict.py | 2 +- ultralytics/models/nas/val.py | 10 +- ultralytics/models/rtdetr/__init__.py | 2 +- ultralytics/models/rtdetr/model.py | 2 +- ultralytics/models/rtdetr/predict.py | 2 +- ultralytics/models/rtdetr/train.py | 5 +- ultralytics/models/rtdetr/val.py | 2 +- ultralytics/models/sam/__init__.py | 6 +- ultralytics/models/sam/amg.py | 4 +- ultralytics/models/sam/build.py | 12 +- ultralytics/models/sam/model.py | 4 +- ultralytics/models/sam/modules/__init__.py | 2 +- ultralytics/models/sam/modules/blocks.py | 24 +- ultralytics/models/sam/modules/decoders.py | 2 +- ultralytics/models/sam/modules/encoders.py | 8 +- .../models/sam/modules/memory_attention.py | 2 +- ultralytics/models/sam/modules/sam.py | 196 ++-- .../models/sam/modules/tiny_encoder.py | 5 +- ultralytics/models/sam/modules/transformer.py | 2 +- ultralytics/models/sam/modules/utils.py | 2 +- ultralytics/models/sam/predict.py | 1004 +++++++++++++++-- ultralytics/models/utils/__init__.py | 2 +- ultralytics/models/utils/loss.py | 13 +- ultralytics/models/utils/ops.py | 4 +- ultralytics/models/yolo/__init__.py | 2 +- ultralytics/models/yolo/classify/__init__.py | 2 +- ultralytics/models/yolo/classify/predict.py | 5 +- ultralytics/models/yolo/classify/train.py | 7 +- ultralytics/models/yolo/classify/val.py | 8 +- ultralytics/models/yolo/detect/__init__.py | 2 +- ultralytics/models/yolo/detect/predict.py | 52 +- ultralytics/models/yolo/detect/train.py | 11 +- ultralytics/models/yolo/detect/val.py | 21 +- ultralytics/models/yolo/model.py | 4 +- ultralytics/models/yolo/obb/__init__.py | 2 +- ultralytics/models/yolo/obb/predict.py | 45 +- ultralytics/models/yolo/obb/train.py | 8 +- ultralytics/models/yolo/obb/val.py | 26 +- ultralytics/models/yolo/pose/__init__.py | 2 +- ultralytics/models/yolo/pose/predict.py | 27 +- ultralytics/models/yolo/pose/train.py | 4 +- ultralytics/models/yolo/pose/val.py | 12 +- ultralytics/models/yolo/segment/__init__.py | 2 +- ultralytics/models/yolo/segment/predict.py | 48 +- ultralytics/models/yolo/segment/train.py | 4 +- ultralytics/models/yolo/segment/val.py | 10 +- ultralytics/models/yolo/world/__init__.py | 2 +- ultralytics/models/yolo/world/train.py | 2 +- ultralytics/models/yolo/world/train_world.py | 2 +- ultralytics/nn/__init__.py | 2 +- ultralytics/nn/autobackend.py | 240 +++- ultralytics/nn/modules/__init__.py | 6 +- ultralytics/nn/modules/activation.py | 2 +- ultralytics/nn/modules/block.py | 56 +- ultralytics/nn/modules/conv.py | 22 +- ultralytics/nn/modules/head.py | 73 +- ultralytics/nn/modules/transformer.py | 2 +- ultralytics/nn/modules/utils.py | 2 +- ultralytics/nn/tasks.py | 224 ++-- ultralytics/solutions/__init__.py | 12 +- ultralytics/solutions/ai_gym.py | 210 ++-- ultralytics/solutions/analytics.py | 478 ++++---- ultralytics/solutions/distance_calculation.py | 143 ++- ultralytics/solutions/heatmap.py | 324 ++---- ultralytics/solutions/object_counter.py | 404 +++---- ultralytics/solutions/parking_management.py | 349 +++--- ultralytics/solutions/queue_management.py | 207 ++-- ultralytics/solutions/region_counter.py | 116 ++ ultralytics/solutions/security_alarm.py | 144 +++ ultralytics/solutions/solutions.py | 178 +++ ultralytics/solutions/speed_estimation.py | 148 ++- ultralytics/solutions/streamlit_inference.py | 315 +++--- ultralytics/solutions/trackzone.py | 68 ++ ultralytics/trackers/README.md | 52 +- ultralytics/trackers/__init__.py | 2 +- ultralytics/trackers/basetrack.py | 4 +- ultralytics/trackers/bot_sort.py | 2 +- ultralytics/trackers/byte_tracker.py | 2 +- ultralytics/trackers/track.py | 5 +- ultralytics/trackers/utils/__init__.py | 2 +- ultralytics/trackers/utils/gmc.py | 26 +- ultralytics/trackers/utils/kalman_filter.py | 2 +- ultralytics/trackers/utils/matching.py | 15 +- ultralytics/utils/__init__.py | 237 ++-- ultralytics/utils/autobatch.py | 39 +- ultralytics/utils/benchmarks.py | 106 +- ultralytics/utils/callbacks/__init__.py | 2 +- ultralytics/utils/callbacks/base.py | 2 +- ultralytics/utils/callbacks/clearml.py | 6 +- ultralytics/utils/callbacks/comet.py | 53 +- ultralytics/utils/callbacks/dvc.py | 2 +- ultralytics/utils/callbacks/hub.py | 16 +- ultralytics/utils/callbacks/mlflow.py | 6 +- ultralytics/utils/callbacks/neptune.py | 8 +- ultralytics/utils/callbacks/raytune.py | 5 +- ultralytics/utils/callbacks/tensorboard.py | 35 +- ultralytics/utils/callbacks/wb.py | 33 +- ultralytics/utils/checks.py | 175 ++- ultralytics/utils/dist.py | 4 +- ultralytics/utils/downloads.py | 17 +- ultralytics/utils/errors.py | 2 +- ultralytics/utils/files.py | 8 +- ultralytics/utils/instance.py | 23 +- ultralytics/utils/loss.py | 18 +- ultralytics/utils/metrics.py | 51 +- ultralytics/utils/ops.py | 88 +- ultralytics/utils/patches.py | 14 +- ultralytics/utils/plotting.py | 280 +++-- ultralytics/utils/tal.py | 46 +- ultralytics/utils/torch_utils.py | 164 ++- ultralytics/utils/triton.py | 3 +- ultralytics/utils/tuner.py | 25 +- 560 files changed, 16714 insertions(+), 11459 deletions(-) create mode 100644 .dockerignore rename .github/workflows/{ci.yaml => ci.yml} (82%) delete mode 100644 .github/workflows/codeql.yaml rename .github/workflows/{docker.yaml => docker.yml} (66%) create mode 100644 docker/Dockerfile-jupyter create mode 100644 docs/en/datasets/detect/medical-pills.md delete mode 100644 docs/en/datasets/explorer/explorer.ipynb create mode 100644 docs/en/datasets/explorer/explorer.md create mode 100644 docs/en/datasets/pose/dog-pose.md create mode 100644 docs/en/guides/trackzone.md rename docs/en/help/{code_of_conduct.md => code-of-conduct.md} (100%) rename docs/en/help/{minimum_reproducible_example.md => minimum-reproducible-example.md} (100%) create mode 100644 docs/en/integrations/albumentations.md create mode 100644 docs/en/integrations/mnn.md create mode 100644 docs/en/integrations/rockchip-rknn.md create mode 100644 docs/en/integrations/seeedstudio-recamera.md create mode 100644 docs/en/integrations/sony-imx500.md create mode 100644 docs/en/macros/sam-auto-annotate.md create mode 100644 docs/en/macros/solutions-args.md create mode 100644 docs/en/macros/yolo-cls-perf.md create mode 100644 docs/en/macros/yolo-det-perf.md create mode 100644 docs/en/macros/yolo-obb-perf.md create mode 100644 docs/en/macros/yolo-pose-perf.md create mode 100644 docs/en/macros/yolo-seg-perf.md delete mode 100644 docs/en/reference/data/explorer/explorer.md delete mode 100644 docs/en/reference/data/explorer/gui/dash.md delete mode 100644 docs/en/reference/data/explorer/utils.md create mode 100644 docs/en/reference/solutions/region_counter.md create mode 100644 docs/en/reference/solutions/security_alarm.md create mode 100644 docs/en/reference/solutions/solutions.md create mode 100644 docs/en/reference/solutions/trackzone.md create mode 100644 docs/model_data.py delete mode 100644 docs/overrides/assets/favicon.ico create mode 100644 docs/overrides/javascript/benchmark.js create mode 100644 docs/overrides/javascript/giscus.js delete mode 100644 docs/overrides/partials/source-file.html create mode 100644 examples/RTDETR-ONNXRuntime-Python/README.md create mode 100644 examples/RTDETR-ONNXRuntime-Python/main.py create mode 100644 examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml create mode 100644 examples/YOLO-Series-ONNXRuntime-Rust/README.md create mode 100644 examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs delete mode 100644 examples/YOLOv8-OpenCV-int8-tflite-Python/README.md delete mode 100644 examples/YOLOv8-OpenCV-int8-tflite-Python/main.py create mode 100644 examples/YOLOv8-TFLite-Python/README.md create mode 100644 examples/YOLOv8-TFLite-Python/main.py delete mode 100644 tests/test_explorer.py create mode 100644 ultralytics/cfg/datasets/dog-pose.yaml create mode 100644 ultralytics/cfg/datasets/medical-pills.yaml create mode 100644 ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml create mode 100644 ultralytics/cfg/solutions/default.yaml delete mode 100644 ultralytics/data/explorer/__init__.py delete mode 100644 ultralytics/data/explorer/explorer.py delete mode 100644 ultralytics/data/explorer/gui/__init__.py delete mode 100644 ultralytics/data/explorer/gui/dash.py delete mode 100644 ultralytics/data/explorer/utils.py create mode 100644 ultralytics/solutions/region_counter.py create mode 100644 ultralytics/solutions/security_alarm.py create mode 100644 ultralytics/solutions/solutions.py create mode 100644 ultralytics/solutions/trackzone.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..4903d51fa80 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,35 @@ +# Python +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +*.py[cod] +*$py.class +.pytest_cache +.coverage +coverage.xml +.ruff_cache +*.egg-info +dist +build + +# Development +.env +.venv +env/ +venv/ +ENV/ +.idea +.vscode +*.swp +*.swo +.DS_Store + +# Project specific +*.log +benchmarks.log +runs/ + +# Dependencies +node_modules/ diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 430b05957ab..f5f9022ddeb 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license name: ๐Ÿ› Bug Report # title: " " @@ -14,7 +14,7 @@ body: attributes: label: Search before asking description: > - Please search the Ultralytics [Docs](https://docs.ultralytics.com) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar bug report already exists. + Please search the Ultralytics [Docs](https://docs.ultralytics.com/) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar bug report already exists. options: - label: > I have searched the Ultralytics YOLO [issues](https://github.com/ultralytics/ultralytics/issues) and found no similar bug report. @@ -43,7 +43,7 @@ body: - type: textarea attributes: label: Bug - description: Please provide as much information as possible. Copy and paste console output and error messages. Use [Markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to format text, code and logs. If necessary, include screenshots for visual elements only. Providing detailed information will help us resolve the issue more efficiently. + description: Please provide as much information as possible. Copy and paste console output and error messages including the _full_ traceback. Use [Markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to format text, code and logs. If necessary, include screenshots for visual elements only. Providing detailed information will help us resolve the issue more efficiently. placeholder: | ๐Ÿ’ก ProTip! Include as much information as possible (logs, tracebacks, screenshots, etc.) to receive the most helpful response. validations: @@ -52,11 +52,11 @@ body: - type: textarea attributes: label: Environment - description: Many issues are often related to dependency versions and hardware. Please provide the output of `yolo checks` or `ultralytics.checks()` command to help us diagnose the problem. + description: Try the latest version (`pip install -U ultralytics`) before reporting a bug. If it's still present, please provide the output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command to help us diagnose the problem. placeholder: | - Paste output of `yolo checks` or `ultralytics.checks()` command, i.e.: + Paste output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command, i.e.: ``` - Ultralytics YOLOv8.0.181 ๐Ÿš€ Python-3.11.2 torch-2.0.1 CPU (Apple M2) + Ultralytics 8.3.2 ๐Ÿš€ Python-3.11.2 torch-2.4.1 CPU (Apple M3) Setup complete โœ… (8 CPUs, 16.0 GB RAM, 266.5/460.4 GB disk) OS macOS-13.5.2 @@ -64,7 +64,7 @@ body: Python 3.11.2 Install git RAM 16.00 GB - CPU Apple M2 + CPU Apple M3 CUDA None ``` validations: @@ -74,7 +74,7 @@ body: attributes: label: Minimal Reproducible Example description: > - When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem. This is referred to by community members as creating a [minimal reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/). + When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem. This is referred to by community members as creating a [minimal reproducible example](https://docs.ultralytics.com/help/minimum-reproducible-example/). placeholder: | ``` # Code to reproduce your issue here @@ -92,6 +92,6 @@ body: label: Are you willing to submit a PR? description: > (Optional) We encourage you to submit a [Pull Request](https://github.com/ultralytics/ultralytics/pulls) (PR) to help improve Ultralytics YOLO for everyone, especially if you have a good understanding of how to implement a fix or feature. - See the Ultralytics YOLO [Contributing Guide](https://docs.ultralytics.com/help/contributing) to get started. + See the Ultralytics YOLO [Contributing Guide](https://docs.ultralytics.com/help/contributing/) to get started. options: - label: Yes I'd like to help by submitting a PR! diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 73745a3a562..0da481e9fb9 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,10 +1,10 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license blank_issues_enabled: true contact_links: - name: ๐Ÿ“„ Docs url: https://docs.ultralytics.com/ - about: Full Ultralytics YOLOv8 Documentation + about: Full Ultralytics YOLO Documentation - name: ๐Ÿ’ฌ Forum url: https://community.ultralytics.com/ about: Ask on Ultralytics Community Forum diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml index c065446c1f4..6b72a38433c 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.yml +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -1,23 +1,23 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license name: ๐Ÿš€ Feature Request -description: Suggest a YOLOv8 idea +description: Suggest an Ultralytics YOLO idea # title: " " labels: [enhancement] body: - type: markdown attributes: value: | - Thank you for submitting a YOLOv8 ๐Ÿš€ Feature Request! + Thank you for submitting an Ultralytics ๐Ÿš€ Feature Request! - type: checkboxes attributes: label: Search before asking description: > - Please search the Ultralytics [Docs](https://docs.ultralytics.com) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar feature request already exists. + Please search the Ultralytics [Docs](https://docs.ultralytics.com/) and [issues](https://github.com/ultralytics/ultralytics/issues) to see if a similar feature request already exists. options: - label: > - I have searched the YOLOv8 [issues](https://github.com/ultralytics/ultralytics/issues) and found no similar feature requests. + I have searched the Ultralytics [issues](https://github.com/ultralytics/ultralytics/issues) and found no similar feature requests. required: true - type: textarea @@ -25,7 +25,7 @@ body: label: Description description: A short description of your feature. placeholder: | - What new feature would you like to see in YOLOv8? + What new feature would you like to see in YOLO? validations: required: true @@ -46,7 +46,7 @@ body: attributes: label: Are you willing to submit a PR? description: > - (Optional) We encourage you to submit a [Pull Request](https://github.com/ultralytics/ultralytics/pulls) (PR) to help improve YOLOv8 for everyone, especially if you have a good understanding of how to implement a fix or feature. - See the YOLOv8 [Contributing Guide](https://docs.ultralytics.com/help/contributing) to get started. + (Optional) We encourage you to submit a [Pull Request](https://github.com/ultralytics/ultralytics/pulls) (PR) to help improve YOLO for everyone, especially if you have a good understanding of how to implement a fix or feature. + See the Ultralytics [Contributing Guide](https://docs.ultralytics.com/help/contributing/) to get started. options: - label: Yes I'd like to help by submitting a PR! diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index f957b43d6d0..5c6c2f39a21 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license name: โ“ Question description: Ask an Ultralytics YOLO question @@ -14,10 +14,10 @@ body: attributes: label: Search before asking description: > - Please search the Ultralytics [Docs](https://docs.ultralytics.com), [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/ultralytics/ultralytics/discussions) to see if a similar question already exists. + Please search the Ultralytics [Docs](https://docs.ultralytics.com/), [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/orgs/ultralytics/discussions) to see if a similar question already exists. options: - label: > - I have searched the Ultralytics YOLO [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/ultralytics/ultralytics/discussions) and found no similar questions. + I have searched the Ultralytics YOLO [issues](https://github.com/ultralytics/ultralytics/issues) and [discussions](https://github.com/orgs/ultralytics/discussions) and found no similar questions. required: true - type: textarea diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2d4ae31873b..233db72b026 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Dependabot for package version updates # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yml similarity index 82% rename from .github/workflows/ci.yaml rename to .github/workflows/ci.yml index ef1fa85161b..7bf01805628 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # YOLO Continuous Integration (CI) GitHub Actions tests name: Ultralytics CI @@ -9,7 +10,7 @@ on: pull_request: branches: [main] schedule: - - cron: "0 0 * * *" # runs at 00:00 UTC every day + - cron: "0 8 * * *" # runs at 08:00 UTC every day workflow_dispatch: inputs: hub: @@ -51,16 +52,15 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "pip" # caching pip dependencies + - uses: astral-sh/setup-uv@v5 - name: Install requirements shell: bash # for Windows compatibility run: | - python -m pip install --upgrade pip wheel - pip install -e . --extra-index-url https://download.pytorch.org/whl/cpu + uv pip install --system . --extra-index-url https://download.pytorch.org/whl/cpu - name: Check environment run: | yolo checks - pip list + uv pip list - name: Test HUB training shell: python env: @@ -98,7 +98,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-14] + # Temporarily disable windows-latest due to https://github.com/ultralytics/ultralytics/actions/runs/13020330819/job/36319338854?pr=18921 + os: [ubuntu-latest, macos-15, ubuntu-24.04-arm] python-version: ["3.11"] model: [yolo11n] steps: @@ -106,12 +107,11 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "pip" # caching pip dependencies + - uses: astral-sh/setup-uv@v5 - name: Install requirements shell: bash # for Windows compatibility run: | - python -m pip install --upgrade pip wheel - pip install -e ".[export]" "coverage[toml]" --extra-index-url https://download.pytorch.org/whl/cpu + uv pip install --system -e ".[export]" "coverage[toml]" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-first-match - name: Check environment run: | yolo checks @@ -130,6 +130,7 @@ jobs: shell: bash run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 export_hw_optimized=True verbose=0.185 # Benchmarks for default configuration + uv pip list - name: Benchmark DetectionModel shell: bash run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}.pt' imgsz=160 verbose=0.318 @@ -171,15 +172,19 @@ jobs: coverage xml -o coverage-benchmarks.xml - name: Upload Coverage Reports to CodeCov if: github.repository == 'ultralytics/ultralytics' - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: flags: Benchmarks env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - name: Prune uv Cache + run: uv cache prune --ci - name: Benchmark Summary run: | cat benchmarks.log - echo "$(cat benchmarks.log)" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat benchmarks.log >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY Tests: if: github.event_name != 'workflow_dispatch' || github.event.inputs.tests == 'true' @@ -188,37 +193,36 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-14, windows-latest] + os: [ubuntu-latest, macos-15, windows-latest, ubuntu-24.04-arm] python-version: ["3.11"] torch: [latest] include: - os: ubuntu-latest - python-version: "3.8" # torch 1.8.0 requires python >=3.6, <=3.8 + python-version: "3.8" # torch 1.8.0 requires python >=3.6, <=3.9 torch: "1.8.0" # min torch version CI https://pypi.org/project/torchvision/ steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: "pip" # caching pip dependencies + - uses: astral-sh/setup-uv@v5 - name: Install requirements shell: bash # for Windows compatibility run: | # CoreML must be installed before export due to protobuf error from AutoInstall - python -m pip install --upgrade pip wheel slow="" torch="" if [ "${{ matrix.torch }}" == "1.8.0" ]; then torch="torch==1.8.0 torchvision==0.9.0" fi if [[ "${{ github.event_name }}" =~ ^(schedule|workflow_dispatch)$ ]]; then - slow="pycocotools mlflow ray[tune]" + slow="pycocotools mlflow" fi - pip install -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu + uv pip install --system -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu - name: Check environment run: | yolo checks - pip list + uv pip list - name: Pytest tests shell: bash # for Windows compatibility run: | @@ -229,11 +233,13 @@ jobs: pytest $slow --cov=ultralytics/ --cov-report xml tests/ - name: Upload Coverage Reports to CodeCov if: github.repository == 'ultralytics/ultralytics' # && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: flags: Tests env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - name: Prune uv Cache + run: uv cache prune --ci GPU: if: github.repository == 'ultralytics/ultralytics' && (github.event_name != 'workflow_dispatch' || github.event.inputs.gpu == 'true') @@ -241,12 +247,14 @@ jobs: runs-on: gpu-latest steps: - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 - name: Install requirements - run: pip install -e . pytest-cov + shell: bash # for Windows compatibility + run: uv pip install --system -e . pytest-cov - name: Check environment run: | yolo checks - pip list + uv pip list - name: Pytest tests run: | slow="" @@ -255,14 +263,15 @@ jobs: fi pytest $slow --cov=ultralytics/ --cov-report xml tests/test_cuda.py - name: Upload Coverage Reports to CodeCov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: flags: GPU env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} RaspberryPi: - if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true') + # if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event.inputs.raspberrypi == 'true') + if: false # temporarily disable RPi CI for maintainance timeout-minutes: 120 runs-on: raspberry-pi steps: @@ -275,7 +284,7 @@ jobs: - name: Install requirements run: | python -m pip install --upgrade pip wheel - pip install -e ".[export]" pytest mlflow pycocotools "ray[tune]" + pip install -e ".[export]" pytest mlflow pycocotools - name: Check environment run: | yolo checks @@ -285,7 +294,7 @@ jobs: - name: Benchmark ClassificationModel run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-cls.pt' imgsz=160 verbose=0.249 - name: Benchmark YOLOWorld DetectionModel - run: python -m ultralytics.cfg.__init__ benchmark model='yolo11s-worldv2.pt' imgsz=160 verbose=0.337 + run: python -m ultralytics.cfg.__init__ benchmark model='yolov8s-worldv2.pt' imgsz=160 verbose=0.337 - name: Benchmark SegmentationModel run: python -m ultralytics.cfg.__init__ benchmark model='yolo11n-seg.pt' imgsz=160 verbose=0.195 - name: Benchmark PoseModel @@ -322,13 +331,8 @@ jobs: channels: conda-forge,defaults channel-priority: true activate-environment: anaconda-client-env - - name: Cleanup toolcache - run: | - echo "Free space before deletion:" - df -h / - rm -rf /opt/hostedtoolcache - echo "Free space after deletion:" - df -h / + - name: Cleanup disk space + uses: ultralytics/actions/cleanup-disk@main - name: Install Linux packages run: | # Fix cv2 ImportError: 'libEGL.so.1: cannot open shared object file: No such file or directory' @@ -349,16 +353,18 @@ jobs: conda list - name: Test CLI run: | - yolo predict model=yolov8n.pt imgsz=320 - yolo train model=yolov8n.pt data=coco8.yaml epochs=1 imgsz=32 - yolo val model=yolov8n.pt data=coco8.yaml imgsz=32 - yolo export model=yolov8n.pt format=torchscript imgsz=160 + yolo predict model=yolo11n.pt imgsz=320 + yolo train model=yolo11n.pt data=coco8.yaml epochs=1 imgsz=32 + yolo val model=yolo11n.pt data=coco8.yaml imgsz=32 + yolo export model=yolo11n.pt format=torchscript imgsz=160 + yolo benchmark model=yolo11n.pt data='coco8.yaml' imgsz=640 format=onnx + yolo solutions - name: Test Python # Note this step must use the updated default bash environment, not a python environment run: | python -c " from ultralytics import YOLO - model = YOLO('yolov8n.pt') + model = YOLO('yolo11n.pt') results = model.train(data='coco8.yaml', epochs=3, imgsz=160) results = model.val(imgsz=160) results = model.predict(imgsz=160) @@ -375,14 +381,14 @@ jobs: Summary: runs-on: ubuntu-latest - needs: [HUB, Benchmarks, Tests, GPU, RaspberryPi, Conda] # Add job names that you want to check for failure - if: always() # This ensures the job runs even if previous jobs fail + needs: [HUB, Benchmarks, Tests, GPU, Conda] + if: always() steps: - name: Check for failure and notify - if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.GPU.result == 'failure' || needs.RaspberryPi.result == 'failure' || needs.Conda.result == 'failure' ) && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push') - uses: slackapi/slack-github-action@v1.27.0 + if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.GPU.result == 'failure' || needs.Conda.result == 'failure' ) && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push') && github.run_attempt == '1' + uses: slackapi/slack-github-action@v2.0.0 with: + webhook-type: incoming-webhook + webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} payload: | - {"text": " GitHub Actions error for ${{ github.workflow }} โŒ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"} - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} + text: " GitHub Actions error for ${{ github.workflow }} โŒ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n" diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 5ca3abefba3..f3a6c5a350b 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Ultralytics Contributor License Agreement (CLA) action https://docs.ultralytics.com/help/CLA # This workflow automatically requests Pull Requests (PR) authors to sign the Ultralytics CLA before PRs can be merged @@ -30,7 +31,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Must be repository secret PAT - PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + PERSONAL_ACCESS_TOKEN: ${{ secrets._GITHUB_TOKEN }} with: path-to-signatures: "signatures/version1/cla.json" path-to-document: "https://docs.ultralytics.com/help/CLA" # CLA document diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml deleted file mode 100644 index e6e3e85d3ce..00000000000 --- a/.github/workflows/codeql.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -name: "CodeQL" - -on: - schedule: - - cron: "0 0 1 * *" - workflow_dispatch: - -jobs: - analyze: - name: Analyze - runs-on: ${{ 'ubuntu-latest' }} - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: ["python"] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v3 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: security-extended,security-and-quality - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 - with: - category: "/language:${{matrix.language}}" diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yml similarity index 66% rename from .github/workflows/docker.yaml rename to .github/workflows/docker.yml index 8a3d41a91a9..b5bdabb22a7 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Builds ultralytics/ultralytics:latest images on DockerHub https://hub.docker.com/r/ultralytics name: Publish Docker Images @@ -81,12 +82,11 @@ jobs: # - dockerfile: "Dockerfile-conda" # tags: "latest-conda" # platforms: "linux/amd64" + outputs: + new_release: ${{ steps.check_tag.outputs.new_release }} steps: - - name: Cleanup disk - # Free up to 30GB of disk space per https://github.com/ultralytics/ultralytics/pull/15848 - uses: jlumbroso/free-disk-space@v1.3.1 - with: - tool-cache: true + - name: Cleanup disk space + uses: ultralytics/actions/cleanup-disk@main - name: Checkout repo uses: actions/checkout@v4 @@ -111,7 +111,6 @@ jobs: VERSION=$(grep "^__version__ =" ultralytics/__init__.py | awk -F'"' '{print $2}') echo "Retrieved Ultralytics version: $VERSION" echo "version=$VERSION" >> $GITHUB_OUTPUT - VERSION_TAG=$(echo "${{ matrix.tags }}" | sed "s/latest/${VERSION}/") echo "Intended version tag: $VERSION_TAG" echo "version_tag=$VERSION_TAG" >> $GITHUB_OUTPUT @@ -123,25 +122,25 @@ jobs: MESSAGE=$(echo $RESPONSE | jq -r '.message') if [[ "$MESSAGE" == "null" ]]; then echo "Tag $VERSION_TAG already exists on DockerHub." - echo "exists=true" >> $GITHUB_OUTPUT + echo "new_release=false" >> $GITHUB_OUTPUT elif [[ "$MESSAGE" == *"404"* ]]; then echo "Tag $VERSION_TAG does not exist on DockerHub." - echo "exists=false" >> $GITHUB_OUTPUT + echo "new_release=true" >> $GITHUB_OUTPUT else echo "Unexpected response from DockerHub. Please check manually." - echo "exists=false" >> $GITHUB_OUTPUT + echo "new_release=false" >> $GITHUB_OUTPUT fi env: VERSION_TAG: ${{ steps.get_version.outputs.version_tag }} - name: Build Image if: github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true' - uses: nick-invision/retry@v3 + uses: ultralytics/actions/retry@main with: timeout_minutes: 120 - retry_wait_seconds: 60 - max_attempts: 3 # retry twice - command: | + retry_delay_seconds: 60 + retries: 2 + run: | docker build \ --platform ${{ matrix.platforms }} \ -f docker/${{ matrix.dockerfile }} \ @@ -159,25 +158,64 @@ jobs: run: docker run ultralytics/ultralytics:${{ matrix.tags }} yolo benchmark model=yolo11n.pt imgsz=160 verbose=0.309 - name: Push Docker Image with Ultralytics version tag - if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.exists == 'false' && matrix.dockerfile != 'Dockerfile-conda' - run: | - docker push ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }} + if: (github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true')) && steps.check_tag.outputs.new_release == 'true' && matrix.dockerfile != 'Dockerfile-conda' + uses: ultralytics/actions/retry@main + with: + timeout_minutes: 15 + retry_delay_seconds: 300 + retries: 2 + run: | + docker push ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }} - name: Push Docker Image with latest tag if: github.event_name == 'push' || (github.event.inputs[matrix.dockerfile] == 'true' && github.event.inputs.push == 'true') + uses: ultralytics/actions/retry@main + with: + timeout_minutes: 15 + retry_delay_seconds: 300 + retries: 2 + run: | + docker push ultralytics/ultralytics:${{ matrix.tags }} + if [[ "${{ matrix.tags }}" == "latest" ]]; then + t=ultralytics/ultralytics:latest-runner + docker build -f docker/Dockerfile-runner -t $t . + docker push $t + fi + if [[ "${{ matrix.tags }}" == "latest-python" ]]; then + t=ultralytics/ultralytics:latest-jupyter + v=ultralytics/ultralytics:${{ steps.get_version.outputs.version }}-jupyter + docker build -f docker/Dockerfile-jupyter -t $t -t $v . + docker push $t + if [[ "${{ steps.check_tag.outputs.new_release }}" == "true" ]]; then + docker push $v + fi + fi + + trigger-actions: + runs-on: ubuntu-latest + needs: docker + # Only trigger actions on new Ultralytics releases + if: success() && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && needs.docker.outputs.new_release == 'true' + steps: + - name: Trigger Additional GitHub Actions + env: + GH_TOKEN: ${{ secrets._GITHUB_TOKEN }} run: | - docker push ultralytics/ultralytics:${{ matrix.tags }} - if [[ "${{ matrix.tags }}" == "latest" ]]; then - t=ultralytics/ultralytics:latest-runner - docker build -f docker/Dockerfile-runner -t $t . - docker push $t - fi + sleep 60 + gh workflow run deploy_cloud_run.yml \ + --repo ultralytics/assistant \ + --ref main - - name: Notify on failure - if: github.event_name == 'push' && failure() # do not notify on cancelled() as cancelling is performed by hand - uses: slackapi/slack-github-action@v1.27.0 + notify: + runs-on: ubuntu-latest + needs: [docker, trigger-actions] + if: always() + steps: + - name: Check for failure and notify + if: needs.docker.result == 'failure' && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && github.run_attempt == '1' + uses: slackapi/slack-github-action@v2.0.0 with: + webhook-type: incoming-webhook + webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} payload: | - {"text": " GitHub Actions error for ${{ github.workflow }} โŒ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"} - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} + text: " GitHub Actions error for ${{ github.workflow }} โŒ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n" diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 02bc506a144..5d34e3f88d1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Test and publish docs to https://docs.ultralytics.com # Ignores the following Docs rules to match Google-style docstrings: # D100: Missing docstring in public module @@ -20,38 +21,45 @@ on: pull_request: branches: [main] workflow_dispatch: + inputs: + publish_docs: + description: "Publish live to https://docs.ultralytics.com" + default: true + type: boolean jobs: Docs: if: github.repository == 'ultralytics/ultralytics' - runs-on: macos-14 + runs-on: ubuntu-latest + env: + GITHUB_REF: ${{ github.head_ref || github.ref }} steps: - - name: Git config - run: | - git config --global user.name "UltralyticsAssistant" - git config --global user.email "web@ultralytics.com" - name: Checkout Repository uses: actions/checkout@v4 with: + # Fetch depth 0 required to capture full docs author history repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }} - token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - ref: ${{ github.head_ref || github.ref }} + token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + ref: ${{ env.GITHUB_REF }} fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" - cache: "pip" # caching pip dependencies + - uses: astral-sh/setup-uv@v5 - name: Install Dependencies - run: pip install ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin + # Note "beautifulsoup4<=4.12.3" required due to errors errors with >=4.13 in https://github.com/ultralytics/ultralytics/pull/19067 + run: uv pip install --system "beautifulsoup4<=4.12.3" ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin - name: Ruff fixes continue-on-error: true run: ruff check --fix --unsafe-fixes --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 . - name: Update Docs Reference Section and Push Changes continue-on-error: true run: | + git config --global user.name "UltralyticsAssistant" + git config --global user.email "web@ultralytics.com" python docs/build_reference.py - git pull origin ${{ github.head_ref || github.ref }} + git pull origin "$GITHUB_REF" git add . git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token if ! git diff --staged --quiet; then @@ -70,7 +78,7 @@ jobs: continue-on-error: true if: always() run: | - git pull origin ${{ github.head_ref || github.ref }} + git pull origin "$GITHUB_REF" git add --update # only add updated files git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token if ! git diff --staged --quiet; then @@ -80,11 +88,10 @@ jobs: echo "No changes to commit" fi - name: Publish Docs to https://docs.ultralytics.com - if: github.event_name == 'push' + if: github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_docs == 'true') run: | - git clone https://github.com/ultralytics/docs.git docs-repo + git clone --depth 1 --branch gh-pages https://github.com/ultralytics/docs.git docs-repo cd docs-repo - git checkout gh-pages || git checkout -b gh-pages rm -rf * cp -R ../site/* . echo "${{ secrets.INDEXNOW_KEY_DOCS }}" > "${{ secrets.INDEXNOW_KEY_DOCS }}.txt" @@ -92,7 +99,8 @@ jobs: if git diff --staged --quiet; then echo "No changes to commit" else + git pull origin gh-pages LATEST_HASH=$(git rev-parse --short=7 HEAD) git commit -m "Update Docs for 'ultralytics ${{ steps.check_pypi.outputs.version }} - $LATEST_HASH'" - git push https://${{ secrets.PERSONAL_ACCESS_TOKEN }}@github.com/ultralytics/docs.git gh-pages + git push https://${{ secrets._GITHUB_TOKEN }}@github.com/ultralytics/docs.git gh-pages fi diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 9befe5c9689..212faae2217 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -1,4 +1,5 @@ -# Ultralytics ๐Ÿš€ - AGPL-3.0 License https://ultralytics.com/license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Ultralytics Actions https://github.com/ultralytics/actions # This workflow automatically formats code and documentation in PRs to official Ultralytics standards @@ -9,34 +10,33 @@ on: types: [opened, edited] discussion: types: [created] - pull_request_target: + pull_request: branches: [main] types: [opened, closed, synchronize, review_requested] jobs: format: - runs-on: macos-14 + runs-on: ubuntu-latest steps: - name: Run Ultralytics Formatting uses: ultralytics/actions@main with: - token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # note GITHUB_TOKEN automatically generated + token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN }} labels: true # autolabel issues and PRs python: true # format Python code and docstrings prettier: true # format YAML, JSON, Markdown and CSS spelling: true # check spelling links: false # check broken links summary: true # print PR summary with GPT4o (requires 'openai_api_key') - openai_azure_api_key: ${{ secrets.OPENAI_AZURE_API_KEY }} - openai_azure_endpoint: ${{ secrets.OPENAI_AZURE_ENDPOINT }} + openai_api_key: ${{ secrets.OPENAI_API_KEY }} first_issue_response: | - ๐Ÿ‘‹ Hello @${{ github.actor }}, thank you for your interest in Ultralytics ๐Ÿš€! We recommend a visit to the [Docs](https://docs.ultralytics.com) for new users where you can find many [Python](https://docs.ultralytics.com/usage/python/) and [CLI](https://docs.ultralytics.com/usage/cli/) usage examples and where many of the most common questions may already be answered. + ๐Ÿ‘‹ Hello @${{ github.actor }}, thank you for your interest in Ultralytics ๐Ÿš€! We recommend a visit to the [Docs](https://docs.ultralytics.com/) for new users where you can find many [Python](https://docs.ultralytics.com/usage/python/) and [CLI](https://docs.ultralytics.com/usage/cli/) usage examples and where many of the most common questions may already be answered. - If this is a ๐Ÿ› Bug Report, please provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/) to help us debug it. + If this is a ๐Ÿ› Bug Report, please provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum-reproducible-example/) to help us debug it. If this is a custom training โ“ Question, please provide as much information as possible, including dataset image examples and training logs, and verify you are following our [Tips for Best Training Results](https://docs.ultralytics.com/guides/model-training-tips/). - Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://ultralytics.com/discord) ๐ŸŽง. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com). Or dive into threads on our [Subreddit](https://reddit.com/r/ultralytics) to share knowledge with the community. + Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://discord.com/invite/ultralytics) ๐ŸŽง. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com/). Or dive into threads on our [Subreddit](https://reddit.com/r/Ultralytics) to share knowledge with the community. ## Upgrade @@ -48,15 +48,15 @@ jobs: ## Environments - YOLOv8 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): + YOLO may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda-zone)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): - - **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle + - **Notebooks** with free GPU: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/) - **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/) - **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) Docker Pulls ## Status - Ultralytics CI + Ultralytics CI - If this badge is green, all [Ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml?query=event%3Aschedule) tests are currently passing. CI tests verify correct operation of all YOLOv8 [Modes](https://docs.ultralytics.com/modes/) and [Tasks](https://docs.ultralytics.com/tasks/) on macOS, Windows, and Ubuntu every 24 hours and on every commit. + If this badge is green, all [Ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml?query=event%3Aschedule) tests are currently passing. CI tests verify correct operation of all YOLO [Modes](https://docs.ultralytics.com/modes/) and [Tasks](https://docs.ultralytics.com/tasks/) on macOS, Windows, and Ubuntu every 24 hours and on every commit. diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 4dd8aa38b05..932c6a9807a 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Continuous Integration (CI) GitHub Actions tests broken link checker using https://github.com/lycheeverse/lychee # Ignores the following status codes to reduce false positives: # - 401(Vimeo, 'unauthorized') @@ -24,17 +25,15 @@ jobs: - name: Download and install lychee run: | LYCHEE_URL=$(curl -s https://api.github.com/repos/lycheeverse/lychee/releases/latest | grep "browser_download_url" | grep "x86_64-unknown-linux-gnu.tar.gz" | cut -d '"' -f 4) - curl -L $LYCHEE_URL -o lychee.tar.gz - tar xzf lychee.tar.gz - sudo mv lychee /usr/local/bin + curl -L $LYCHEE_URL | tar xz -C /usr/local/bin - name: Test Markdown and HTML links with retry - uses: nick-invision/retry@v3 + uses: ultralytics/actions/retry@main with: - timeout_minutes: 5 - retry_wait_seconds: 60 - max_attempts: 3 - command: | + timeout_minutes: 60 + retry_delay_seconds: 900 + retries: 2 + run: | lychee \ --scheme https \ --timeout 60 \ @@ -55,16 +54,16 @@ jobs: --github-token ${{ secrets.GITHUB_TOKEN }} \ --header "User-Agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.183 Safari/537.36" \ './**/*.md' \ - './**/*.html' + './**/*.html' | tee -a $GITHUB_STEP_SUMMARY - name: Test Markdown, HTML, YAML, Python and Notebook links with retry if: github.event_name == 'workflow_dispatch' - uses: nick-invision/retry@v3 + uses: ultralytics/actions/retry@main with: - timeout_minutes: 5 - retry_wait_seconds: 60 - max_attempts: 3 - command: | + timeout_minutes: 60 + retry_delay_seconds: 900 + retries: 2 + run: | lychee \ --scheme https \ --timeout 60 \ @@ -72,7 +71,7 @@ jobs: --accept 401,403,429,500,502,999 \ --exclude-all-private \ --exclude 'https?://(www\.)?(linkedin\.com|twitter\.com|instagram\.com|kaggle\.com|fonts\.gstatic\.com|url\.com)' \ - --exclude-path '**/ci.yaml' \ + --exclude-path '**/ci.yml' \ --exclude-path docs/zh \ --exclude-path docs/es \ --exclude-path docs/ru \ @@ -90,4 +89,4 @@ jobs: './**/*.yml' \ './**/*.yaml' \ './**/*.py' \ - './**/*.ipynb' + './**/*.ipynb' | tee -a $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/merge-main-into-prs.yml b/.github/workflows/merge-main-into-prs.yml index 347ec1b99c8..fb36aa77314 100644 --- a/.github/workflows/merge-main-into-prs.yml +++ b/.github/workflows/merge-main-into-prs.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Automatically merges repository 'main' branch into all open PRs to keep them up-to-date # Action runs on updates to main branch so when one PR merges to main all others update @@ -33,7 +34,7 @@ jobs: import os import time - g = Github("${{ secrets.PERSONAL_ACCESS_TOKEN }}") + g = Github("${{ secrets._GITHUB_TOKEN }}") repo = g.get_repo("${{ github.repository }}") # Fetch the default branch name diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8276a7696a4..1a83a1bfe42 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Publish pip package to PyPI https://pypi.org/project/ultralytics/ name: Publish to PyPI @@ -13,129 +14,112 @@ on: description: Publish to PyPI jobs: - publish: + check: if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher' - name: Publish runs-on: ubuntu-latest + permissions: + contents: write + outputs: + increment: ${{ steps.check_pypi.outputs.increment }} + current_tag: ${{ steps.check_pypi.outputs.current_tag }} + previous_tag: ${{ steps.check_pypi.outputs.previous_tag }} steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # use your PAT here - - name: Git config - run: | - git config --global user.name "UltralyticsAssistant" - git config --global user.email "web@ultralytics.com" - - name: Set up Python environment - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.x" - cache: "pip" # caching pip dependencies - - name: Install dependencies - run: | - python -m pip install --upgrade pip wheel - pip install requests build twine toml - - name: Check PyPI version + - uses: astral-sh/setup-uv@v5 + - run: uv pip install --system --no-cache ultralytics-actions + - id: check_pypi shell: python run: | import os - import requests - import toml - - # Load version and package name from pyproject.toml - pyproject = toml.load('pyproject.toml') - package_name = pyproject['project']['name'] - local_version = pyproject['project'].get('version', 'dynamic') - - # If version is dynamic, extract it from the specified file - if local_version == 'dynamic': - version_attr = pyproject['tool']['setuptools']['dynamic']['version']['attr'] - module_path, attr_name = version_attr.rsplit('.', 1) - with open(f"{module_path.replace('.', '/')}/__init__.py") as f: - local_version = next(line.split('=')[1].strip().strip("'\"") for line in f if line.startswith(attr_name)) - - print(f"Local Version: {local_version}") - - # Get online version from PyPI - response = requests.get(f"https://pypi.org/pypi/{package_name}/json") - online_version = response.json()['info']['version'] if response.status_code == 200 else None - print(f"Online Version: {online_version or 'Not Found'}") - - # Determine if a new version should be published - publish = False - if online_version: - local_ver = tuple(map(int, local_version.split('.'))) - online_ver = tuple(map(int, online_version.split('.'))) - major_diff = local_ver[0] - online_ver[0] - minor_diff = local_ver[1] - online_ver[1] - patch_diff = local_ver[2] - online_ver[2] - - publish = ( - (major_diff == 0 and minor_diff == 0 and 0 < patch_diff <= 2) or - (major_diff == 0 and minor_diff == 1 and local_ver[2] == 0) or - (major_diff == 1 and local_ver[1] == 0 and local_ver[2] == 0) - ) - else: - publish = True # First release - + from actions.utils import check_pypi_version + local_version, online_version, publish = check_pypi_version() os.system(f'echo "increment={publish}" >> $GITHUB_OUTPUT') os.system(f'echo "current_tag=v{local_version}" >> $GITHUB_OUTPUT') os.system(f'echo "previous_tag=v{online_version}" >> $GITHUB_OUTPUT') - if publish: print('Ready to publish new version to PyPI โœ….') - id: check_pypi - - name: Publish to PyPI - continue-on-error: true - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - run: | - python -m build - python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} - - name: Publish new tag - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - run: | - git tag -a "${{ steps.check_pypi.outputs.current_tag }}" -m "$(git log -1 --pretty=%B)" # i.e. "v0.1.2 commit message" - git push origin "${{ steps.check_pypi.outputs.current_tag }}" - - name: Publish new release - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' + - name: Tag and Release + if: steps.check_pypi.outputs.increment == 'True' env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CURRENT_TAG: ${{ steps.check_pypi.outputs.current_tag }} PREVIOUS_TAG: ${{ steps.check_pypi.outputs.previous_tag }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | - curl -s "https://raw.githubusercontent.com/ultralytics/actions/main/utils/summarize_release.py" | python - - shell: bash + git config --global user.name "UltralyticsAssistant" + git config --global user.email "web@ultralytics.com" + git tag -a "$CURRENT_TAG" -m "$(git log -1 --pretty=%B)" + git push origin "$CURRENT_TAG" + ultralytics-actions-summarize-release + uv cache prune --ci + + build: + needs: check + if: needs.check.outputs.increment == 'True' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - uses: astral-sh/setup-uv@v5 + - run: uv pip install --system --no-cache build + - run: python -m build + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + - run: uv cache prune --ci + + publish: + needs: [check, build] + if: needs.check.outputs.increment == 'True' + runs-on: ubuntu-latest + environment: # for GitHub Deployments tab + name: Release - PyPI + url: https://pypi.org/p/ultralytics + permissions: + id-token: write # for PyPI trusted publishing + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + - uses: pypa/gh-action-pypi-publish@release/v1 + + notify: + needs: [check, publish] + if: always() && needs.check.outputs.increment == 'True' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 - name: Extract PR Details env: - GH_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - # Check if the event is a pull request or pull_request_target - if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then - PR_NUMBER=${{ github.event.pull_request.number }} - PR_TITLE=$(gh pr view $PR_NUMBER --json title --jq '.title') - else - # Use gh to find the PR associated with the commit - COMMIT_SHA=${{ github.event.after }} - PR_JSON=$(gh pr list --search "${COMMIT_SHA}" --state merged --json number,title --jq '.[0]') - PR_NUMBER=$(echo $PR_JSON | jq -r '.number') - PR_TITLE=$(echo $PR_JSON | jq -r '.title') - fi - echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV - echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV - - name: Notify on Slack (Success) - if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True' - uses: slackapi/slack-github-action@v1.27.0 + PR_JSON=$(gh pr list --search "${GITHUB_SHA}" --state merged --json number,title --jq '.[0]') + PR_NUMBER=$(echo "${PR_JSON}" | jq -r '.number') + PR_TITLE=$(echo "${PR_JSON}" | jq -r '.title') + echo "PR_NUMBER=${PR_NUMBER}" >> "${GITHUB_ENV}" + echo "PR_TITLE=${PR_TITLE}" >> "${GITHUB_ENV}" + - name: Notify Success + if: needs.publish.result == 'success' && github.event_name == 'push' + uses: slackapi/slack-github-action@v2.0.0 with: + webhook-type: incoming-webhook + webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} payload: | - {"text": " GitHub Actions success for ${{ github.workflow }} โœ…\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW '${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}' pip package published ๐Ÿ˜ƒ\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"} - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} - - name: Notify on Slack (Failure) - if: failure() - uses: slackapi/slack-github-action@v1.27.0 + text: " GitHub Actions success for ${{ github.workflow }} โœ…\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ needs.check.outputs.current_tag }}` pip package published ๐Ÿ˜ƒ\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n" + - name: Notify Failure + if: needs.publish.result != 'success' + uses: slackapi/slack-github-action@v2.0.0 with: + webhook-type: incoming-webhook + webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} payload: | - {"text": " GitHub Actions error for ${{ github.workflow }} โŒ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n"} - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} + text: " GitHub Actions error for ${{ github.workflow }} โŒ\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n" diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index dd8503541eb..1ec031f6f7b 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,10 +1,14 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license name: Close stale issues on: schedule: - cron: "0 0 * * *" # Runs at 00:00 UTC every day +permissions: + pull-requests: write + issues: write + jobs: stale: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 589906e0ba8..ceb43cc00c3 100644 --- a/.gitignore +++ b/.gitignore @@ -124,6 +124,7 @@ venv.bak/ # VSCode project settings .vscode/ +.devcontainer/ # Rope project settings .ropeproject @@ -157,12 +158,15 @@ weights/ *.torchscript *.tflite *.h5 +*.mnn *_saved_model/ *_web_model/ *_openvino_model/ *_paddle_model/ *_ncnn_model/ +*_imx_model/ pnnx* +*.rknn # Autogenerated files for tests /ultralytics/assets/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b3dbfe16952..15447bce784 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Welcome! We're thrilled that you're considering contributing to our [Ultralytics ## Code of Conduct -To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct/). Respect, kindness, and professionalism are at the heart of our community. +To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code-of-conduct/). Respect, kindness, and professionalism are at the heart of our community. ## Contributing via Pull Requests @@ -121,7 +121,7 @@ All pull requests must pass the GitHub Actions [Continuous Integration](https:// ## Reporting Bugs -We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/)โ€”a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem. +We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/)โ€”a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem. ## License @@ -163,4 +163,4 @@ the project's quality standards. Review the CI output and fix any issues. For de ### How do I report a bug in Ultralytics YOLO repositories? -To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section. +To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section. diff --git a/README.md b/README.md index 8fec98deaed..0be72727a86 100644 --- a/README.md +++ b/README.md @@ -72,26 +72,29 @@ metrics = model.val(path) # evaluate model performance on exported model [ไธญๆ–‡](https://docs.ultralytics.com/zh) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru) | [Deutsch](https://docs.ultralytics.com/de) | [Franรงais](https://docs.ultralytics.com/fr) | [Espaรฑol](https://docs.ultralytics.com/es) | [Portuguรชs](https://docs.ultralytics.com/pt) | [Tรผrkรงe](https://docs.ultralytics.com/tr) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar)
- Ultralytics CI + Ultralytics CI + Ultralytics Downloads Ultralytics YOLO Citation - Ultralytics Docker Pulls - Ultralytics Discord - Ultralytics Forums + Ultralytics Discord + Ultralytics Forums Ultralytics Reddit
- Run on Gradient - Open In Colab - Open In Kaggle + Run Ultralytics on Gradient + Open Ultralytics In Colab + Open Ultralytics In Kaggle + Open Ultralytics In Binder

[Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics) is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLO11 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection and tracking, instance segmentation, image classification and pose estimation tasks. -We hope that the resources here will help you get the most out of YOLO. Please browse the Ultralytics Docs for details, raise an issue on GitHub for support, questions, or discussions, become a member of the Ultralytics Discord, Reddit and Forums! +We hope that the resources here will help you get the most out of YOLO. Please browse the Ultralytics Docs for details, raise an issue on GitHub for support, questions, or discussions, become a member of the Ultralytics Discord, Reddit and Forums! To request an Enterprise License please complete the form at [Ultralytics Licensing](https://www.ultralytics.com/license). -YOLO11 performance plots + + YOLO11 performance plots +
Ultralytics GitHub @@ -106,7 +109,7 @@ To request an Enterprise License please complete the form at [Ultralytics Licens space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
@@ -117,9 +120,9 @@ See below for a quickstart install and usage examples, and see our [Docs](https:
Install -Pip install the ultralytics package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). +Pip install the Ultralytics package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/). -[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/) +[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Ultralytics Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/) ```bash pip install ultralytics @@ -127,7 +130,7 @@ pip install ultralytics For alternative installation methods including [Conda](https://anaconda.org/conda-forge/ultralytics), [Docker](https://hub.docker.com/r/ultralytics/ultralytics), and Git, please refer to the [Quickstart Guide](https://docs.ultralytics.com/quickstart/). -[![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics?logo=condaforge)](https://anaconda.org/conda-forge/ultralytics) [![Docker Image Version](https://img.shields.io/docker/v/ultralytics/ultralytics?sort=semver&logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics) +[![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics?logo=condaforge)](https://anaconda.org/conda-forge/ultralytics) [![Docker Image Version](https://img.shields.io/docker/v/ultralytics/ultralytics?sort=semver&logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics) [![Ultralytics Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/ultralytics?logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics)
@@ -142,7 +145,7 @@ YOLO may be used directly in the Command Line Interface (CLI) with a `yolo` comm yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg' ``` -`yolo` can be used for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See the YOLO [CLI Docs](https://docs.ultralytics.com/usage/cli/) for examples. +`yolo` can be used for a variety of tasks and modes and accepts additional arguments, e.g. `imgsz=640`. See the YOLO [CLI Docs](https://docs.ultralytics.com/usage/cli/) for examples. ### Python @@ -179,11 +182,13 @@ See YOLO [Python Docs](https://docs.ultralytics.com/usage/python/) for more exam ##
Models
-YOLO11 [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/) and [Pose](https://docs.ultralytics.com/tasks/pose/) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset are available here, as well as YOLO11 [Classify](https://docs.ultralytics.com/tasks/classify/) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) dataset. [Track](https://docs.ultralytics.com/modes/track/) mode is available for all Detect, Segment and Pose models. +YOLO11 [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/) and [Pose](https://docs.ultralytics.com/tasks/pose/) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset are available here, as well as YOLO11 [Classify](https://docs.ultralytics.com/tasks/classify/) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) dataset. [Track](https://docs.ultralytics.com/modes/track/) mode is available for all Detect, Segment and Pose models. All [Models](https://docs.ultralytics.com/models/) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. -Ultralytics YOLO supported tasks - -All [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. + + Ultralytics YOLO supported tasks + +
+
Detection (COCO) @@ -191,11 +196,11 @@ See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for usage examp | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.12 ยฑ 0.82 ms | 1.55 ยฑ 0.01 ms | 2.6 | 6.5 | -| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.01 ยฑ 1.17 ms | 2.46 ยฑ 0.00 ms | 9.4 | 21.5 | -| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.20 ยฑ 2.04 ms | 4.70 ยฑ 0.06 ms | 20.1 | 68.0 | -| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.64 ยฑ 1.39 ms | 6.16 ยฑ 0.08 ms | 25.3 | 86.9 | -| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.78 ยฑ 6.66 ms | 11.31 ยฑ 0.24 ms | 56.9 | 194.9 | +| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.1 ยฑ 0.8 | 1.5 ยฑ 0.0 | 2.6 | 6.5 | +| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.0 ยฑ 1.2 | 2.5 ยฑ 0.0 | 9.4 | 21.5 | +| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.2 ยฑ 2.0 | 4.7 ยฑ 0.1 | 20.1 | 68.0 | +| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.6 ยฑ 1.4 | 6.2 ยฑ 0.1 | 25.3 | 86.9 | +| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.8 ยฑ 6.7 | 11.3 ยฑ 0.2 | 56.9 | 194.9 | - **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val detect data=coco.yaml device=0` - **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val detect data=coco.yaml batch=1 device=0|cpu` @@ -208,14 +213,31 @@ See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for usage e | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.90 ยฑ 1.14 ms | 1.84 ยฑ 0.00 ms | 2.9 | 10.4 | -| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.56 ยฑ 4.89 ms | 2.94 ยฑ 0.01 ms | 10.1 | 35.5 | -| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.63 ยฑ 1.16 ms | 6.31 ยฑ 0.09 ms | 22.4 | 123.3 | -| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.16 ยฑ 3.17 ms | 7.78 ยฑ 0.16 ms | 27.6 | 142.2 | -| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.50 ยฑ 3.24 ms | 15.75 ยฑ 0.67 ms | 62.1 | 319.0 | +| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.9 ยฑ 1.1 | 1.8 ยฑ 0.0 | 2.9 | 10.4 | +| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.6 ยฑ 4.9 | 2.9 ยฑ 0.0 | 10.1 | 35.5 | +| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.6 ยฑ 1.2 | 6.3 ยฑ 0.1 | 22.4 | 123.3 | +| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.2 ยฑ 3.2 | 7.8 ยฑ 0.2 | 27.6 | 142.2 | +| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.5 ยฑ 3.2 | 15.8 ยฑ 0.7 | 62.1 | 319.0 | + +- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val segment data=coco.yaml device=0` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val segment data=coco.yaml batch=1 device=0|cpu` + +
+ +
Classification (ImageNet) + +See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usage examples with these models trained on [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), which include 1000 pretrained classes. -- **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val segment data=coco-seg.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu` +| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) at 640 | +| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | +| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.0 ยฑ 0.3 | 1.1 ยฑ 0.0 | 1.6 | 3.3 | +| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.9 ยฑ 0.2 | 1.3 ยฑ 0.0 | 5.5 | 12.1 | +| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.2 ยฑ 0.4 | 2.0 ยฑ 0.0 | 10.4 | 39.3 | +| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.2 ยฑ 0.3 | 2.8 ยฑ 0.0 | 12.9 | 49.4 | +| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.4 ยฑ 0.9 | 3.8 ยฑ 0.0 | 28.4 | 110.4 | + +- **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set.
Reproduce by `yolo val classify data=path/to/ImageNet device=0` +- **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu`
@@ -225,11 +247,11 @@ See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for usage examples wit | Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | | ---------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.40 ยฑ 0.51 ms | 1.72 ยฑ 0.01 ms | 2.9 | 7.6 | -| [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.54 ยฑ 0.59 ms | 2.57 ยฑ 0.00 ms | 9.9 | 23.2 | -| [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.28 ยฑ 0.77 ms | 4.94 ยฑ 0.05 ms | 20.9 | 71.7 | -| [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.69 ยฑ 1.10 ms | 6.42 ยฑ 0.13 ms | 26.2 | 90.7 | -| [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 487.97 ยฑ 13.91 ms | 12.06 ยฑ 0.20 ms | 58.8 | 203.3 | +| [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.4 ยฑ 0.5 | 1.7 ยฑ 0.0 | 2.9 | 7.6 | +| [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.5 ยฑ 0.6 | 2.6 ยฑ 0.0 | 9.9 | 23.2 | +| [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.3 ยฑ 0.8 | 4.9 ยฑ 0.1 | 20.9 | 71.7 | +| [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.7 ยฑ 1.1 | 6.4 ยฑ 0.1 | 26.2 | 90.7 | +| [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 488.0 ยฑ 13.9 | 12.1 ยฑ 0.2 | 58.8 | 203.3 | - **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val pose data=coco-pose.yaml device=0` - **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val pose data=coco-pose.yaml batch=1 device=0|cpu` @@ -242,50 +264,33 @@ See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for usage examples with | Model | size
(pixels) | mAPtest
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | | -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.56 ยฑ 0.80 ms | 4.43 ยฑ 0.01 ms | 2.7 | 17.2 | -| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.41 ยฑ 4.00 ms | 5.13 ยฑ 0.02 ms | 9.7 | 57.5 | -| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.81 ยฑ 2.87 ms | 10.07 ยฑ 0.38 ms | 20.9 | 183.5 | -| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.49 ยฑ 4.98 ms | 13.46 ยฑ 0.55 ms | 26.2 | 232.0 | -| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.63 ยฑ 7.67 ms | 28.59 ยฑ 0.96 ms | 58.8 | 520.2 | +| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.6 ยฑ 0.8 | 4.4 ยฑ 0.0 | 2.7 | 17.2 | +| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.4 ยฑ 4.0 | 5.1 ยฑ 0.0 | 9.7 | 57.5 | +| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.8 ยฑ 2.9 | 10.1 ยฑ 0.4 | 20.9 | 183.5 | +| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.5 ยฑ 5.0 | 13.5 ยฑ 0.6 | 26.2 | 232.0 | +| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.6 ยฑ 7.7 | 28.6 ยฑ 1.0 | 58.8 | 520.2 | - **mAPtest** values are for single-model multiscale on [DOTAv1](https://captain-whu.github.io/DOTA/index.html) dataset.
Reproduce by `yolo val obb data=DOTAv1.yaml device=0 split=test` and submit merged results to [DOTA evaluation](https://captain-whu.github.io/DOTA/evaluation.html). - **Speed** averaged over DOTAv1 val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val obb data=DOTAv1.yaml batch=1 device=0|cpu` -
Classification (ImageNet) - -See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usage examples with these models trained on [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/), which include 1000 pretrained classes. - -| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) at 640 | -| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | -| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.03 ยฑ 0.32 ms | 1.10 ยฑ 0.01 ms | 1.6 | 3.3 | -| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.89 ยฑ 0.18 ms | 1.34 ยฑ 0.01 ms | 5.5 | 12.1 | -| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.17 ยฑ 0.40 ms | 1.95 ยฑ 0.00 ms | 10.4 | 39.3 | -| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.17 ยฑ 0.29 ms | 2.76 ยฑ 0.00 ms | 12.9 | 49.4 | -| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.41 ยฑ 0.94 ms | 3.82 ยฑ 0.00 ms | 28.4 | 110.4 | - -- **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set.
Reproduce by `yolo val classify data=path/to/ImageNet device=0` -- **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` - -
- ##
Integrations
-Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [Roboflow](https://roboflow.com/?ref=ultralytics), ClearML, [Comet](https://bit.ly/yolov8-readme-comet), Neural Magic and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow. +Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/), [Comet](https://bit.ly/yolov8-readme-comet), [Roboflow](https://roboflow.com/?ref=ultralytics) and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow. -
- -Ultralytics active learning integrations + + Ultralytics active learning integrations +

- - Roboflow logo + + Ultralytics HUB logo space - - ClearML logo + + ClearML logo space Comet ML logo @@ -294,15 +299,15 @@ Our key integrations with leading AI platforms extend the functionality of Ultra NeuralMagic logo
-| Roboflow | ClearML โญ NEW | Comet โญ NEW | Neural Magic โญ NEW | -| :--------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | -| Label and export your custom datasets directly to YOLO11 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | Automatically track, visualize and even remotely train YOLO11 using [ClearML](https://clear.ml/) (open-source!) | Free forever, [Comet](https://bit.ly/yolov5-readme-comet) lets you save YOLO11 models, resume training, and interactively visualize and debug predictions | Run YOLO11 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) | +| Ultralytics HUB ๐Ÿš€ | W&B | Comet โญ NEW | Neural Magic | +| :--------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | +| Streamline YOLO workflows: Label, train, and deploy effortlessly with [Ultralytics HUB](https://www.ultralytics.com/hub). Try now! | Track experiments, hyperparameters, and results with [Weights & Biases](https://docs.wandb.ai/guides/integrations/ultralytics/) | Free forever, [Comet](https://bit.ly/yolov5-readme-comet) lets you save YOLO11 models, resume training, and interactively visualize and debug predictions | Run YOLO11 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) | ##
Ultralytics HUB
Experience seamless AI with [Ultralytics HUB](https://www.ultralytics.com/hub) โญ, the all-in-one solution for data visualization, YOLO11 ๐Ÿš€ model training and deployment, without any coding. Transform images into actionable insights and bring your AI visions to life with ease using our cutting-edge platform and user-friendly [Ultralytics App](https://www.ultralytics.com/app-install). Start your journey for **Free** now! - + Ultralytics HUB preview image ##
Contribute
@@ -339,5 +344,5 @@ For Ultralytics bug reports and feature requests please visit [GitHub Issues](ht space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord diff --git a/README.zh-CN.md b/README.zh-CN.md index fe45efb8859..1fb18e9cb15 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -7,26 +7,29 @@ [ไธญๆ–‡](https://docs.ultralytics.com/zh) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru) | [Deutsch](https://docs.ultralytics.com/de) | [Franรงais](https://docs.ultralytics.com/fr) | [Espaรฑol](https://docs.ultralytics.com/es) | [Portuguรชs](https://docs.ultralytics.com/pt) | [Tรผrkรงe](https://docs.ultralytics.com/tr) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar)
- Ultralytics CI + Ultralytics CI + Ultralytics Downloads Ultralytics YOLO Citation - Ultralytics Docker Pulls - Ultralytics Discord - Ultralytics Forums + Ultralytics Discord + Ultralytics Forums Ultralytics Reddit
Run Ultralytics on Gradient Open Ultralytics In Colab - Open Ultralytics In Kaggle + Open Ultralytics In Kaggle + Open Ultralytics In Binder

[Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics) ๆ˜ฏไธ€ไธชๅฐ–็ซฏ็š„ใ€ๆœ€ๅ…ˆ่ฟ›๏ผˆSOTA๏ผ‰็š„ๆจกๅž‹๏ผŒๅŸบไบŽไน‹ๅ‰ YOLO ็‰ˆๆœฌ็š„ๆˆๅŠŸ๏ผŒๅนถๅผ•ๅ…ฅไบ†ๆ–ฐๅŠŸ่ƒฝๅ’Œๆ”น่ฟ›ไปฅ่ฟ›ไธ€ๆญฅๆๅ‡ๆ€ง่ƒฝๅ’Œ็ตๆดปๆ€งใ€‚YOLO11 ่ขซ่ฎพ่ฎกๅพ—ๅฟซ้€Ÿใ€ๅ‡†็กฎไธ”ๆ˜“ไบŽไฝฟ็”จ๏ผŒๆ˜ฏ่ฟ›่กŒๅนฟๆณ›ๅฏน่ฑกๆฃ€ๆต‹ๅ’Œ่ทŸ่ธชใ€ๅฎžไพ‹ๅˆ†ๅ‰ฒใ€ๅ›พๅƒๅˆ†็ฑปๅ’Œๅงฟๆ€ไผฐ่ฎกไปปๅŠก็š„็†ๆƒณ้€‰ๆ‹ฉใ€‚ -ๆˆ‘ไปฌๅธŒๆœ›่ฟ™้‡Œ็š„่ต„ๆบ่ƒฝๅธฎๅŠฉไฝ ๅ……ๅˆ†ๅˆฉ็”จ YOLOใ€‚่ฏทๆต่งˆ Ultralytics ๆ–‡ๆกฃ ไปฅ่Žทๅ–่ฏฆ็ป†ไฟกๆฏ๏ผŒๅœจ GitHub ไธŠๆๅ‡บ้—ฎ้ข˜ๆˆ–่ฎจ่ฎบ๏ผŒๆˆไธบ Ultralytics Discordใ€Reddit ๅ’Œ ่ฎบๅ› ็š„ๆˆๅ‘˜๏ผ +ๆˆ‘ไปฌๅธŒๆœ›่ฟ™้‡Œ็š„่ต„ๆบ่ƒฝๅธฎๅŠฉไฝ ๅ……ๅˆ†ๅˆฉ็”จ YOLOใ€‚่ฏทๆต่งˆ Ultralytics ๆ–‡ๆกฃ ไปฅ่Žทๅ–่ฏฆ็ป†ไฟกๆฏ๏ผŒๅœจ GitHub ไธŠๆๅ‡บ้—ฎ้ข˜ๆˆ–่ฎจ่ฎบ๏ผŒๆˆไธบ Ultralytics Discordใ€Reddit ๅ’Œ ่ฎบๅ› ็š„ๆˆๅ‘˜๏ผ ๆƒณ็”ณ่ฏทไผไธš่ฎธๅฏ่ฏ๏ผŒ่ฏทๅฎŒๆˆ [Ultralytics Licensing](https://www.ultralytics.com/license) ไธŠ็š„่กจๅ•ใ€‚ -YOLO11 performance plots + + YOLO11 performance plots +
Ultralytics GitHub @@ -41,7 +44,7 @@ space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
@@ -54,7 +57,7 @@ ๅœจ [**Python>=3.8**](https://www.python.org/) ็Žฏๅขƒไธญไฝฟ็”จ [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/) ้€š่ฟ‡ pip ๅฎ‰่ฃ…ๅŒ…ๅซๆ‰€ๆœ‰[ไพ่ต–้กน](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) ็š„ ultralytics ๅŒ…ใ€‚ -[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/) +[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Ultralytics Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/) ```bash pip install ultralytics @@ -62,7 +65,7 @@ pip install ultralytics ๆœ‰ๅ…ณๅ…ถไป–ๅฎ‰่ฃ…ๆ–นๆณ•๏ผŒๅŒ…ๆ‹ฌ [Conda](https://anaconda.org/conda-forge/ultralytics)ใ€[Docker](https://hub.docker.com/r/ultralytics/ultralytics) ๅ’Œ Git๏ผŒ่ฏทๅ‚้˜… [ๅฟซ้€Ÿๅผ€ๅง‹ๆŒ‡ๅ—](https://docs.ultralytics.com/quickstart/)ใ€‚ -[![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics?logo=condaforge)](https://anaconda.org/conda-forge/ultralytics) [![Docker Image Version](https://img.shields.io/docker/v/ultralytics/ultralytics?sort=semver&logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics) +[![Conda Version](https://img.shields.io/conda/vn/conda-forge/ultralytics?logo=condaforge)](https://anaconda.org/conda-forge/ultralytics) [![Docker Image Version](https://img.shields.io/docker/v/ultralytics/ultralytics?sort=semver&logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics) [![Ultralytics Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/ultralytics?logo=docker)](https://hub.docker.com/r/ultralytics/ultralytics) @@ -114,11 +117,13 @@ path = model.export(format="onnx") # ่ฟ”ๅ›žๅฏผๅ‡บๆจกๅž‹็š„่ทฏๅพ„ ##
ๆจกๅž‹
-YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://docs.ultralytics.com/tasks/segment/) ๅ’Œ [ๅงฟๆ€](https://docs.ultralytics.com/tasks/pose/) ๆจกๅž‹ๅœจ [COCO](https://docs.ultralytics.com/datasets/detect/coco/) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒ้ข„่ฎญ็ปƒ๏ผŒ่ฟ™ไบ›ๆจกๅž‹ๅฏๅœจๆญคๅค„่Žทๅพ—๏ผŒๆญคๅค–่ฟ˜ๆœ‰ๅœจ [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) ๆ•ฐๆฎ้›†ไธŠ้ข„่ฎญ็ปƒ็š„ YOLO11 [ๅˆ†็ฑป](https://docs.ultralytics.com/tasks/classify/) ๆจกๅž‹ใ€‚ๆ‰€ๆœ‰ๆฃ€ๆต‹ใ€ๅˆ†ๅ‰ฒๅ’Œๅงฟๆ€ๆจกๅž‹ๅ‡ๆ”ฏๆŒ [่ทŸ่ธช](https://docs.ultralytics.com/modes/track/) ๆจกๅผใ€‚ +YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://docs.ultralytics.com/tasks/segment/) ๅ’Œ [ๅงฟๆ€](https://docs.ultralytics.com/tasks/pose/) ๆจกๅž‹ๅœจ [COCO](https://docs.ultralytics.com/datasets/detect/coco/) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒ้ข„่ฎญ็ปƒ๏ผŒ่ฟ™ไบ›ๆจกๅž‹ๅฏๅœจๆญคๅค„่Žทๅพ—๏ผŒๆญคๅค–่ฟ˜ๆœ‰ๅœจ [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) ๆ•ฐๆฎ้›†ไธŠ้ข„่ฎญ็ปƒ็š„ YOLO11 [ๅˆ†็ฑป](https://docs.ultralytics.com/tasks/classify/) ๆจกๅž‹ใ€‚ๆ‰€ๆœ‰ๆฃ€ๆต‹ใ€ๅˆ†ๅ‰ฒๅ’Œๅงฟๆ€ๆจกๅž‹ๅ‡ๆ”ฏๆŒ [่ทŸ่ธช](https://docs.ultralytics.com/modes/track/) ๆจกๅผใ€‚ๆ‰€ๆœ‰[ๆจกๅž‹](https://docs.ultralytics.com/models/)ๅœจ้ฆ–ๆฌกไฝฟ็”จๆ—ถ่‡ชๅŠจไปŽๆœ€ๆ–ฐ็š„ Ultralytics [ๅ‘ๅธƒ](https://github.com/ultralytics/assets/releases)ไธ‹่ฝฝใ€‚ -Ultralytics YOLO supported tasks - -ๆ‰€ๆœ‰[ๆจกๅž‹](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)ๅœจ้ฆ–ๆฌกไฝฟ็”จๆ—ถ่‡ชๅŠจไปŽๆœ€ๆ–ฐ็š„ Ultralytics [ๅ‘ๅธƒ](https://github.com/ultralytics/assets/releases)ไธ‹่ฝฝใ€‚ + + Ultralytics YOLO supported tasks + +
+
ๆฃ€ๆต‹ (COCO) @@ -126,11 +131,11 @@ YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://d | ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | mAPval
50-95 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) | | ------------------------------------------------------------------------------------ | ------------------- | -------------------- | ----------------------------- | ---------------------------------- | ---------------- | ----------------- | -| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.12 ยฑ 0.82 ms | 1.55 ยฑ 0.01 ms | 2.6 | 6.5 | -| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.01 ยฑ 1.17 ms | 2.46 ยฑ 0.00 ms | 9.4 | 21.5 | -| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.20 ยฑ 2.04 ms | 4.70 ยฑ 0.06 ms | 20.1 | 68.0 | -| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.64 ยฑ 1.39 ms | 6.16 ยฑ 0.08 ms | 25.3 | 86.9 | -| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.78 ยฑ 6.66 ms | 11.31 ยฑ 0.24 ms | 56.9 | 194.9 | +| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.1 ยฑ 0.8 | 1.5 ยฑ 0.0 | 2.6 | 6.5 | +| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.0 ยฑ 1.2 | 2.5 ยฑ 0.0 | 9.4 | 21.5 | +| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.2 ยฑ 2.0 | 4.7 ยฑ 0.1 | 20.1 | 68.0 | +| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.6 ยฑ 1.4 | 6.2 ยฑ 0.1 | 25.3 | 86.9 | +| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.8 ยฑ 6.7 | 11.3 ยฑ 0.2 | 56.9 | 194.9 | - **mAPval** ๅ€ผ้’ˆๅฏนๅ•ๆจกๅž‹ๅ•ๅฐบๅบฆๅœจ [COCO val2017](https://cocodataset.org/) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val detect data=coco.yaml device=0` - **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ COCO ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val detect data=coco.yaml batch=1 device=0|cpu` @@ -143,14 +148,31 @@ YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://d | ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | mAPbox
50-95 | mAPmask
50-95 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) | | -------------------------------------------------------------------------------------------- | ------------------- | -------------------- | --------------------- | ----------------------------- | ---------------------------------- | ---------------- | ----------------- | -| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.90 ยฑ 1.14 ms | 1.84 ยฑ 0.00 ms | 2.9 | 10.4 | -| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.56 ยฑ 4.89 ms | 2.94 ยฑ 0.01 ms | 10.1 | 35.5 | -| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.63 ยฑ 1.16 ms | 6.31 ยฑ 0.09 ms | 22.4 | 123.3 | -| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.16 ยฑ 3.17 ms | 7.78 ยฑ 0.16 ms | 27.6 | 142.2 | -| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.50 ยฑ 3.24 ms | 15.75 ยฑ 0.67 ms | 62.1 | 319.0 | +| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.9 ยฑ 1.1 | 1.8 ยฑ 0.0 | 2.9 | 10.4 | +| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.6 ยฑ 4.9 | 2.9 ยฑ 0.0 | 10.1 | 35.5 | +| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.6 ยฑ 1.2 | 6.3 ยฑ 0.1 | 22.4 | 123.3 | +| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.2 ยฑ 3.2 | 7.8 ยฑ 0.2 | 27.6 | 142.2 | +| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.5 ยฑ 3.2 | 15.8 ยฑ 0.7 | 62.1 | 319.0 | + +- **mAPval** ๅ€ผ้’ˆๅฏนๅ•ๆจกๅž‹ๅ•ๅฐบๅบฆๅœจ [COCO val2017](https://cocodataset.org/) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val segment data=coco.yaml device=0` +- **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ COCO ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val segment data=coco.yaml batch=1 device=0|cpu` + +
+ +
ๅˆ†็ฑป (ImageNet) + +่ฏทๅ‚้˜… [ๅˆ†็ฑปๆ–‡ๆกฃ](https://docs.ultralytics.com/tasks/classify/) ไปฅ่Žทๅ–ไฝฟ็”จ่ฟ™ไบ›ๅœจ [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) ๆ•ฐๆฎ้›†ไธŠ่ฎญ็ปƒ็š„ๆจกๅž‹็š„็คบไพ‹๏ผŒๅ…ถไธญๅŒ…ๅซ 1000 ไธช้ข„่ฎญ็ปƒ็ฑปๅˆซใ€‚ -- **mAPval** ๅ€ผ้’ˆๅฏนๅ•ๆจกๅž‹ๅ•ๅฐบๅบฆๅœจ [COCO val2017](https://cocodataset.org/) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val segment data=coco-seg.yaml device=0` -- **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ COCO ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu` +| ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | acc
top1 | acc
top5 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) at 640 | +| -------------------------------------------------------------------------------------------- | ------------------- | ---------------- | ---------------- | ----------------------------- | ---------------------------------- | ---------------- | ------------------------ | +| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.0 ยฑ 0.3 | 1.1 ยฑ 0.0 | 1.6 | 3.3 | +| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.9 ยฑ 0.2 | 1.3 ยฑ 0.0 | 5.5 | 12.1 | +| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.2 ยฑ 0.4 | 2.0 ยฑ 0.0 | 10.4 | 39.3 | +| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.2 ยฑ 0.3 | 2.8 ยฑ 0.0 | 12.9 | 49.4 | +| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.4 ยฑ 0.9 | 3.8 ยฑ 0.0 | 28.4 | 110.4 | + +- **acc** ๅ€ผไธบๅœจ [ImageNet](https://www.image-net.org/) ๆ•ฐๆฎ้›†้ชŒ่ฏ้›†ไธŠ็š„ๆจกๅž‹ๅ‡†็กฎ็އใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val classify data=path/to/ImageNet device=0` +- **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ ImageNet ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu`
@@ -158,13 +180,13 @@ YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://d ่ฏทๅ‚้˜… [ๅงฟๆ€ๆ–‡ๆกฃ](https://docs.ultralytics.com/tasks/pose/) ไปฅ่Žทๅ–ไฝฟ็”จ่ฟ™ไบ›ๅœจ [COCO-Pose](https://docs.ultralytics.com/datasets/pose/coco/) ๆ•ฐๆฎ้›†ไธŠ่ฎญ็ปƒ็š„ๆจกๅž‹็š„็คบไพ‹๏ผŒๅ…ถไธญๅŒ…ๅซ 1 ไธช้ข„่ฎญ็ปƒ็ฑปๅˆซ๏ผˆไบบ๏ผ‰ใ€‚ -| ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | mAPpose
50-95 | mAPpose
50 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) | -| ---------------------------------------------------------------------------------------------- | ------------------- | --------------------- | ------------------ | ----------------------------- | ---------------------------------- | ---------------- | ----------------- | -| [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.40 ยฑ 0.51 ms | 1.72 ยฑ 0.01 ms | 2.9 | 7.6 | -| [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.54 ยฑ 0.59 ms | 2.57 ยฑ 0.00 ms | 9.9 | 23.2 | -| [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.28 ยฑ 0.77 ms | 4.94 ยฑ 0.05 ms | 20.9 | 71.7 | -| [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.69 ยฑ 1.10 ms | 6.42 ยฑ 0.13 ms | 26.2 | 90.7 | -| [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 487.97 ยฑ 13.91 ms | 12.06 ยฑ 0.20 ms | 58.8 | 203.3 | +| ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | mAPpose
50-95 | mAPpose
50 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) | +| -------------------------------------------------------------------------------------------- | ------------------- | --------------------- | ------------------ | ----------------------------- | ---------------------------------- | ---------------- | ----------------- | +| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.6 ยฑ 0.8 | 4.4 ยฑ 0.0 | 2.7 | 17.2 | +| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.4 ยฑ 4.0 | 5.1 ยฑ 0.0 | 9.7 | 57.5 | +| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.8 ยฑ 2.9 | 10.1 ยฑ 0.4 | 20.9 | 183.5 | +| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.5 ยฑ 5.0 | 13.5 ยฑ 0.6 | 26.2 | 232.0 | +| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.6 ยฑ 7.7 | 28.6 ยฑ 1.0 | 58.8 | 520.2 | - **mAPval** ๅ€ผ้’ˆๅฏนๅ•ๆจกๅž‹ๅ•ๅฐบๅบฆๅœจ [COCO Keypoints val2017](https://cocodataset.org/) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val pose data=coco-pose.yaml device=0` - **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ COCO ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val pose data=coco-pose.yaml batch=1 device=0|cpu` @@ -177,50 +199,33 @@ YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://d | ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | mAPtest
50 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) | | -------------------------------------------------------------------------------------------- | ------------------- | ------------------ | ----------------------------- | ---------------------------------- | ---------------- | ----------------- | -| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.56 ยฑ 0.80 ms | 4.43 ยฑ 0.01 ms | 2.7 | 17.2 | -| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.41 ยฑ 4.00 ms | 5.13 ยฑ 0.02 ms | 9.7 | 57.5 | -| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.81 ยฑ 2.87 ms | 10.07 ยฑ 0.38 ms | 20.9 | 183.5 | -| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.49 ยฑ 4.98 ms | 13.46 ยฑ 0.55 ms | 26.2 | 232.0 | -| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.63 ยฑ 7.67 ms | 28.59 ยฑ 0.96 ms | 58.8 | 520.2 | +| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.56 ยฑ 0.80 | 4.43 ยฑ 0.01 | 2.7 | 17.2 | +| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.41 ยฑ 4.00 | 5.13 ยฑ 0.02 | 9.7 | 57.5 | +| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.81 ยฑ 2.87 | 10.07 ยฑ 0.38 | 20.9 | 183.5 | +| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.49 ยฑ 4.98 | 13.46 ยฑ 0.55 | 26.2 | 232.0 | +| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.63 ยฑ 7.67 | 28.59 ยฑ 0.96 | 58.8 | 520.2 | - **mAPtest** ๅ€ผ้’ˆๅฏนๅ•ๆจกๅž‹ๅคšๅฐบๅบฆๅœจ [DOTAv1](https://captain-whu.github.io/DOTA/index.html) ๆ•ฐๆฎ้›†ไธŠ่ฟ›่กŒใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val obb data=DOTAv1.yaml device=0 split=test` ๅนถๆไบคๅˆๅนถ็ป“ๆžœๅˆฐ [DOTA ่ฏ„ไผฐ](https://captain-whu.github.io/DOTA/evaluation.html)ใ€‚ - **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ DOTAv1 ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val obb data=DOTAv1.yaml batch=1 device=0|cpu` -
ๅˆ†็ฑป (ImageNet) - -่ฏทๅ‚้˜… [ๅˆ†็ฑปๆ–‡ๆกฃ](https://docs.ultralytics.com/tasks/classify/) ไปฅ่Žทๅ–ไฝฟ็”จ่ฟ™ไบ›ๅœจ [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) ๆ•ฐๆฎ้›†ไธŠ่ฎญ็ปƒ็š„ๆจกๅž‹็š„็คบไพ‹๏ผŒๅ…ถไธญๅŒ…ๅซ 1000 ไธช้ข„่ฎญ็ปƒ็ฑปๅˆซใ€‚ - -| ๆจกๅž‹ | ๅฐบๅฏธ
(ๅƒ็ด ) | acc
top1 | acc
top5 | ้€Ÿๅบฆ
CPU ONNX
(ms) | ้€Ÿๅบฆ
T4 TensorRT10
(ms) | ๅ‚ๆ•ฐ
(M) | FLOPs
(B) at 640 | -| -------------------------------------------------------------------------------------------- | ------------------- | ---------------- | ---------------- | ----------------------------- | ---------------------------------- | ---------------- | ------------------------ | -| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.03 ยฑ 0.32 ms | 1.10 ยฑ 0.01 ms | 1.6 | 3.3 | -| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.89 ยฑ 0.18 ms | 1.34 ยฑ 0.01 ms | 5.5 | 12.1 | -| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.17 ยฑ 0.40 ms | 1.95 ยฑ 0.00 ms | 10.4 | 39.3 | -| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.17 ยฑ 0.29 ms | 2.76 ยฑ 0.00 ms | 12.9 | 49.4 | -| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.41 ยฑ 0.94 ms | 3.82 ยฑ 0.00 ms | 28.4 | 110.4 | - -- **acc** ๅ€ผไธบๅœจ [ImageNet](https://www.image-net.org/) ๆ•ฐๆฎ้›†้ชŒ่ฏ้›†ไธŠ็š„ๆจกๅž‹ๅ‡†็กฎ็އใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val classify data=path/to/ImageNet device=0` -- **้€Ÿๅบฆ**ๅœจไฝฟ็”จ [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) ๅฎžไพ‹็š„ ImageNet ้ชŒ่ฏๅ›พๅƒไธŠๅนณๅ‡ใ€‚
ๅคๅˆถๅ‘ฝไปค `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` - -
- ##
้›†ๆˆ
-ๆˆ‘ไปฌไธŽ้ข†ๅ…ˆ็š„ AI ๅนณๅฐ็š„ๅ…ณ้”ฎ้›†ๆˆๆ‰ฉๅฑ•ไบ† Ultralytics ไบงๅ“็š„ๅŠŸ่ƒฝ๏ผŒๅขžๅผบไบ†ๆ•ฐๆฎ้›†ๆ ‡่ฎฐใ€่ฎญ็ปƒใ€ๅฏ่ง†ๅŒ–ๅ’Œๆจกๅž‹็ฎก็†็ญ‰ไปปๅŠก็š„่ƒฝๅŠ›ใ€‚ไบ†่งฃ Ultralytics ๅฆ‚ไฝ•ไธŽ [Roboflow](https://roboflow.com/?ref=ultralytics)ใ€ClearMLใ€[Comet](https://bit.ly/yolov8-readme-comet)ใ€Neural Magic ๅ’Œ [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) ๅˆไฝœ๏ผŒไผ˜ๅŒ–ๆ‚จ็š„ AI ๅทฅไฝœๆต็จ‹ใ€‚ +ๆˆ‘ไปฌไธŽ้ข†ๅ…ˆ็š„ AI ๅนณๅฐ็š„ๅ…ณ้”ฎ้›†ๆˆๆ‰ฉๅฑ•ไบ† Ultralytics ไบงๅ“็š„ๅŠŸ่ƒฝ๏ผŒๆๅ‡ไบ†ๆ•ฐๆฎ้›†ๆ ‡ๆณจใ€่ฎญ็ปƒใ€ๅฏ่ง†ๅŒ–ๅ’Œๆจกๅž‹็ฎก็†็ญ‰ไปปๅŠกใ€‚ๆŽข็ดข Ultralytics ๅฆ‚ไฝ•้€š่ฟ‡ไธŽ [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/)ใ€[Comet](https://bit.ly/yolov8-readme-comet)ใ€[Roboflow](https://roboflow.com/?ref=ultralytics) ๅ’Œ [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) ็š„ๅˆไฝœ๏ผŒไผ˜ๅŒ–ๆ‚จ็š„ AI ๅทฅไฝœๆต็จ‹ใ€‚ -
- -Ultralytics active learning integrations + + Ultralytics active learning integrations +

- - Roboflow logo + + Ultralytics HUB logo space - - ClearML logo + + W&B logo space Comet ML logo @@ -229,15 +234,15 @@ YOLO11 [ๆฃ€ๆต‹](https://docs.ultralytics.com/tasks/detect/)ใ€[ๅˆ†ๅ‰ฒ](https://d NeuralMagic logo
-| Roboflow | ClearML โญ NEW | Comet โญ NEW | Neural Magic โญ NEW | -| :--------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | -| Label and export your custom datasets directly to YOLO11 for training with [Roboflow](https://roboflow.com/?ref=ultralytics) | Automatically track, visualize and even remotely train YOLO11 using [ClearML](https://clear.ml/) (open-source!) | Free forever, [Comet](https://bit.ly/yolov5-readme-comet) lets you save YOLO11 models, resume training, and interactively visualize and debug predictions | Run YOLO11 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) | +| Ultralytics HUB ๐Ÿš€ | W&B | Comet โญ ๅ…จๆ–ฐ | Neural Magic | +| :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------: | +| ็ฎ€ๅŒ– YOLO ๅทฅไฝœๆต็จ‹๏ผš้€š่ฟ‡ [Ultralytics HUB](https://www.ultralytics.com/hub) ่ฝปๆพๆ ‡ๆณจใ€่ฎญ็ปƒๅ’Œ้ƒจ็ฝฒใ€‚็ซ‹ๅณ่ฏ•็”จ๏ผ | ไฝฟ็”จ [Weights & Biases](https://docs.wandb.ai/guides/integrations/ultralytics/) ่ทŸ่ธชๅฎž้ชŒใ€่ถ…ๅ‚ๆ•ฐๅ’Œ็ป“ๆžœ | ๆฐธไน…ๅ…่ดน๏ผŒ[Comet](https://bit.ly/yolov5-readme-comet) ๅ…่ฎธๆ‚จไฟๅญ˜ YOLO11 ๆจกๅž‹ใ€ๆขๅค่ฎญ็ปƒ๏ผŒๅนถไบคไบ’ๅผๅœฐๅฏ่ง†ๅŒ–ๅ’Œ่ฐƒ่ฏ•้ข„ๆต‹็ป“ๆžœ | ไฝฟ็”จ [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) ่ฟ่กŒ YOLO11 ๆŽจ็†๏ผŒ้€Ÿๅบฆๆๅ‡่‡ณ 6 ๅ€ | ##
Ultralytics HUB
ไฝ“้ชŒๆ— ็ผ AI ไฝฟ็”จ [Ultralytics HUB](https://www.ultralytics.com/hub) โญ๏ผŒไธ€ไธช้›†ๆ•ฐๆฎๅฏ่ง†ๅŒ–ใ€YOLO11 ๐Ÿš€ ๆจกๅž‹่ฎญ็ปƒๅ’Œ้ƒจ็ฝฒไบŽไธ€ไฝ“็š„่งฃๅ†ณๆ–นๆกˆ๏ผŒๆ— ้œ€็ผ–ๅ†™ไปฃ็ ใ€‚ๅˆฉ็”จๆˆ‘ไปฌๆœ€ๅ…ˆ่ฟ›็š„ๅนณๅฐๅ’Œ็”จๆˆทๅ‹ๅฅฝ็š„ [Ultralytics ๅบ”็”จ](https://www.ultralytics.com/app-install)๏ผŒๅฐ†ๅ›พๅƒ่ฝฌๆขไธบๅฏๆ“ไฝœ่ง่งฃ๏ผŒๅนถ่ฝปๆพๅฎž็Žฐๆ‚จ็š„ AI ๆ„ฟๆ™ฏใ€‚ๅ…่ดนๅผ€ๅง‹ๆ‚จ็š„ๆ—…็จ‹๏ผ - + Ultralytics HUB preview image ##
่ดก็Œฎ
@@ -274,5 +279,5 @@ Ultralytics ๆไพ›ไธค็ง่ฎธๅฏ้€‰้กนไปฅ้€‚ๅบ”ๅ„็ง็”จไพ‹๏ผš space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord diff --git a/docker/Dockerfile b/docker/Dockerfile index 3283c65076f..2cfbfd352e9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,7 +3,7 @@ # Image is CUDA-optimized for YOLO11 single/multi-GPU training and inference # Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3 -FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime +FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime # Set environment variables # Avoid DDP error "MKL_THREADING_LAYER=INTEL is incompatible with libgomp.so.1 library" https://github.com/pytorch/pytorch/issues/37377 @@ -11,7 +11,8 @@ ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ PIP_BREAK_SYSTEM_PACKAGES=1 \ - MKL_THREADING_LAYER=GNU + MKL_THREADING_LAYER=GNU \ + OMP_NUM_THREADS=1 # Downloads to user config dir ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \ @@ -39,23 +40,22 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt . # Install pip packages -RUN python3 -m pip install --upgrade pip wheel -# Pin TensorRT-cu12==10.1.0 to avoid 10.2.0 bug https://github.com/ultralytics/ultralytics/pull/14239 (note -cu12 must be used) -RUN pip install -e ".[export]" "tensorrt-cu12==10.1.0" "albumentations>=1.4.6" comet pycocotools +RUN pip install uv +# Note -cu12 must be used with tensorrt +RUN uv pip install --system -e ".[export]" tensorrt-cu12 "albumentations>=1.4.6" comet pycocotools # Run exports to AutoInstall packages # Edge TPU export fails the first time so is run twice here RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32 || yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32 RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32 # Requires <= Python 3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991 -RUN pip install "paddlepaddle>=2.6.0" x2paddle +RUN uv pip install --system "paddlepaddle>=2.6.0" x2paddle # Fix error: `np.bool` was a deprecated alias for the builtin `bool` segmentation error in Tests -RUN pip install numpy==1.23.5 +RUN uv pip install --system numpy==1.23.5 # Remove extra build files RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json - # Usage Examples ------------------------------------------------------------------------------------------------------- # Build and Push diff --git a/docker/Dockerfile-arm64 b/docker/Dockerfile-arm64 index b5bdbb0fb5c..dce27320381 100644 --- a/docker/Dockerfile-arm64 +++ b/docker/Dockerfile-arm64 @@ -34,8 +34,8 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt . # Install pip packages -RUN python3 -m pip install --upgrade pip wheel -RUN pip install -e ".[export]" +RUN pip install uv +RUN uv pip install --system -e ".[export]" --break-system-packages # Creates a symbolic link to make 'python' point to 'python3' RUN ln -sf /usr/bin/python3 /usr/bin/python diff --git a/docker/Dockerfile-cpu b/docker/Dockerfile-cpu index fe8d88521f9..79d5d50b707 100644 --- a/docker/Dockerfile-cpu +++ b/docker/Dockerfile-cpu @@ -2,8 +2,8 @@ # Builds ultralytics/ultralytics:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics # Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLO11 deployments -# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu -FROM ubuntu:23.10 +# Use official Python base image for reproducibility (3.11.10 for export and 3.12.6 for inference) +FROM python:3.11.10-slim-bookworm # Set environment variables ENV PYTHONUNBUFFERED=1 \ @@ -32,21 +32,21 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt . # Install pip packages -RUN python3 -m pip install --upgrade pip wheel -RUN pip install -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu +RUN pip install uv +RUN uv pip install --system -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-first-match # Run exports to AutoInstall packages RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32 RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32 # Requires Python<=3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991 -# RUN pip install "paddlepaddle>=2.6.0" x2paddle - -# Creates a symbolic link to make 'python' point to 'python3' -RUN ln -sf /usr/bin/python3 /usr/bin/python +RUN uv pip install --system "paddlepaddle>=2.6.0" x2paddle # Remove extra build files RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json +# Set default command to bash +CMD ["/bin/bash"] + # Usage Examples ------------------------------------------------------------------------------------------------------- # Build and Push diff --git a/docker/Dockerfile-jetson-jetpack4 b/docker/Dockerfile-jetson-jetpack4 index c140974807f..e11279dad90 100644 --- a/docker/Dockerfile-jetson-jetpack4 +++ b/docker/Dockerfile-jetson-jetpack4 @@ -43,13 +43,14 @@ ADD https://nvidia.box.com/shared/static/gjqofg7rkg97z3gc8jeyup6t8n9j8xjw.whl on ADD https://forums.developer.nvidia.com/uploads/short-url/hASzFOm9YsJx6VVFrDW1g44CMmv.whl tensorrt-8.2.0.6-cp38-none-linux_aarch64.whl # Install pip packages -RUN python3 -m pip install --upgrade pip wheel -RUN pip install \ +RUN python3 -m pip install --upgrade pip +RUN python3 -m pip install uv +RUN uv pip install --system \ onnxruntime_gpu-1.8.0-cp38-cp38-linux_aarch64.whl \ tensorrt-8.2.0.6-cp38-none-linux_aarch64.whl \ https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-1.11.0a0+gitbc2c6ed-cp38-cp38-linux_aarch64.whl \ https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.12.0a0+9b5a3fe-cp38-cp38-linux_aarch64.whl -RUN pip install -e ".[export]" +RUN uv pip install --system -e ".[export]" # Remove extra build files RUN rm -rf *.whl /root/.config/Ultralytics/persistent_cache.json diff --git a/docker/Dockerfile-jetson-jetpack5 b/docker/Dockerfile-jetson-jetpack5 index 9949d26b71e..bfedb6e0cf2 100644 --- a/docker/Dockerfile-jetson-jetpack5 +++ b/docker/Dockerfile-jetson-jetpack5 @@ -1,9 +1,9 @@ # Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license # Builds ultralytics/ultralytics:jetson-jetson-jetpack5 image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics -# Supports JetPack5.x for YOLO11 on Jetson Xavier NX, AGX Xavier, AGX Orin, Orin Nano and Orin NX +# Supports JetPack5.1.2 for YOLO11 on Jetson Xavier NX, AGX Xavier, AGX Orin, Orin Nano and Orin NX -# Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-pytorch -FROM nvcr.io/nvidia/l4t-pytorch:r35.2.1-pth2.0-py3 +# Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-jetpack +FROM nvcr.io/nvidia/l4t-jetpack:r35.4.1 # Set environment variables ENV PYTHONUNBUFFERED=1 \ @@ -16,13 +16,10 @@ ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \ https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.Unicode.ttf \ /root/.config/Ultralytics/ -# Install linux packages -# g++ required to build 'tflite_support' and 'lap' packages -# libusb-1.0-0 required for 'tflite_support' package when exporting to TFLite -# pkg-config and libhdf5-dev (not included) are needed to build 'h5py==3.11.0' aarch64 wheel required by 'tensorflow' +# Install dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ - gcc git zip unzip wget curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0 \ + git python3-pip libopenmpi-dev libopenblas-base libomp-dev \ && rm -rf /var/lib/apt/lists/* # Create working directory @@ -33,16 +30,14 @@ COPY . . RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt . -# Remove opencv-python from Ultralytics dependencies as it conflicts with opencv-python installed in base image -RUN sed -i '/opencv-python/d' pyproject.toml +# Pip install onnxruntime-gpu, torch, torchvision and ultralytics +RUN python3 -m pip install --upgrade pip uv +RUN uv pip install --system \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.18.0-cp38-cp38-linux_aarch64.whl \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl -# Download onnxruntime-gpu 1.15.1 for Jetson Linux 35.2.1 (JetPack 5.1). Other versions can be seen in https://elinux.org/Jetson_Zoo#ONNX_Runtime -ADD https://nvidia.box.com/shared/static/mvdcltm9ewdy2d5nurkiqorofz1s53ww.whl onnxruntime_gpu-1.15.1-cp38-cp38-linux_aarch64.whl - -# Install pip packages manually for TensorRT compatibility https://github.com/NVIDIA/TensorRT/issues/2567 -RUN python3 -m pip install --upgrade pip wheel -RUN pip install onnxruntime_gpu-1.15.1-cp38-cp38-linux_aarch64.whl -RUN pip install -e ".[export]" +RUN uv pip install --system -e ".[export]" # Remove extra build files RUN rm -rf *.whl /root/.config/Ultralytics/persistent_cache.json diff --git a/docker/Dockerfile-jetson-jetpack6 b/docker/Dockerfile-jetson-jetpack6 index e4da5461db8..fa6ec651b0a 100644 --- a/docker/Dockerfile-jetson-jetpack6 +++ b/docker/Dockerfile-jetson-jetpack6 @@ -1,9 +1,9 @@ # Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license # Builds ultralytics/ultralytics:jetson-jetpack6 image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics -# Supports JetPack6.x for YOLO11 on Jetson AGX Orin, Orin NX and Orin Nano Series +# Supports JetPack6.1 for YOLO11 on Jetson AGX Orin, Orin NX and Orin Nano Series # Start FROM https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-jetpack -FROM nvcr.io/nvidia/l4t-jetpack:r36.3.0 +FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 # Set environment variables ENV PYTHONUNBUFFERED=1 \ @@ -17,9 +17,11 @@ ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/Arial.ttf \ /root/.config/Ultralytics/ # Install dependencies -RUN apt-get update && \ +ADD https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb . +RUN dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ apt-get install -y --no-install-recommends \ - git python3-pip libopenmpi-dev libopenblas-base libomp-dev \ + git python3-pip libopenmpi-dev libopenblas-base libomp-dev libcusparselt0 libcusparselt-dev \ && rm -rf /var/lib/apt/lists/* # Create working directory @@ -30,16 +32,13 @@ COPY . . RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt . -# Download onnxruntime-gpu 1.18.0 from https://elinux.org/Jetson_Zoo and https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048 -ADD https://nvidia.box.com/shared/static/48dtuob7meiw6ebgfsfqakc9vse62sg4.whl onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl - # Pip install onnxruntime-gpu, torch, torchvision and ultralytics -RUN python3 -m pip install --upgrade pip wheel -RUN pip install \ - onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl \ - https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.3.0-cp310-cp310-linux_aarch64.whl \ - https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.18.0a0+6043bc2-cp310-cp310-linux_aarch64.whl -RUN pip install -e ".[export]" +RUN python3 -m pip install --upgrade pip uv +RUN uv pip install --system \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.20.0-cp310-cp310-linux_aarch64.whl \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.5.0a0+872d972e41.nv24.08-cp310-cp310-linux_aarch64.whl \ + https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl +RUN uv pip install --system -e ".[export]" # Remove extra build files RUN rm -rf *.whl /root/.config/Ultralytics/persistent_cache.json diff --git a/docker/Dockerfile-jupyter b/docker/Dockerfile-jupyter new file mode 100644 index 00000000000..c458ff88480 --- /dev/null +++ b/docker/Dockerfile-jupyter @@ -0,0 +1,33 @@ +# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Builds ultralytics/ultralytics:latest-jupyter image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics +# Image provides JupyterLab interface for interactive YOLO development and includes tutorial notebooks + +# Start from Python-based Ultralytics image for full Python environment +FROM ultralytics/ultralytics:latest-python + +# Install JupyterLab for interactive development +RUN uv pip install --system jupyterlab + +# Create persistent data directory structure +RUN mkdir /data + +# Configure YOLO directories +RUN mkdir /data/{datasets,weights,runs} && \ + yolo settings datasets_dir="/data/datasets" weights_dir="/data/weights" runs_dir="/data/runs" + +# Start JupyterLab with tutorial notebook +ENTRYPOINT ["/usr/local/bin/jupyter", "lab", "--allow-root", "--ip=*", "/ultralytics/examples/tutorial.ipynb"] + +# Usage Examples ------------------------------------------------------------------------------------------------------- + +# Build and Push +# t=ultralytics/ultralytics:latest-jupyter && sudo docker build -f docker/Dockerfile-jupyter -t $t . && sudo docker push $t + +# Run +# t=ultralytics/ultralytics:latest-jupyter && sudo docker run -it --ipc=host -p 8888:8888 $t + +# Pull and Run +# t=ultralytics/ultralytics:latest-jupyter && sudo docker pull $t && sudo docker run -it --ipc=host -p 8888:8888 $t + +# Pull and Run with local volume mounted +# t=ultralytics/ultralytics:latest-jupyter && sudo docker pull $t && sudo docker run -it --ipc=host -p 8888:8888 -v "$(pwd)"/datasets:/data/datasets $t diff --git a/docker/Dockerfile-python b/docker/Dockerfile-python index c275dcd9d12..796d1887924 100644 --- a/docker/Dockerfile-python +++ b/docker/Dockerfile-python @@ -32,14 +32,14 @@ RUN sed -i '/^\[http "https:\/\/github\.com\/"\]/,+1d' .git/config ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt . # Install pip packages -RUN python3 -m pip install --upgrade pip wheel -RUN pip install -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu +RUN pip install uv +RUN uv pip install --system -e ".[export]" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-first-match # Run exports to AutoInstall packages RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32 RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32 # Requires Python<=3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991 -RUN pip install "paddlepaddle>=2.6.0" x2paddle +RUN uv pip install --system "paddlepaddle>=2.6.0" x2paddle # Remove extra build files RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json diff --git a/docker/Dockerfile-runner b/docker/Dockerfile-runner index 642f1a1bae6..5de5ee06507 100644 --- a/docker/Dockerfile-runner +++ b/docker/Dockerfile-runner @@ -17,13 +17,13 @@ ENV PYTHONUNBUFFERED=1 \ WORKDIR /actions-runner # Download and unpack the latest runner from https://github.com/actions/runner -RUN FILENAME=actions-runner-linux-x64-2.317.0.tar.gz && \ - curl -o $FILENAME -L https://github.com/actions/runner/releases/download/v2.317.0/$FILENAME && \ +RUN FILENAME=actions-runner-linux-x64-2.320.0.tar.gz && \ + curl -o $FILENAME -L https://github.com/actions/runner/releases/download/v2.320.0/$FILENAME && \ tar xzf $FILENAME && \ rm $FILENAME # Install runner dependencies -RUN pip install pytest-cov +RUN uv pip install --system pytest-cov RUN ./bin/installdependencies.sh && \ apt-get -y install libicu-dev @@ -35,7 +35,6 @@ ENTRYPOINT sh -c './config.sh --url https://github.com/ultralytics/ultralytics \ --replace && \ ./run.sh' - # Usage Examples ------------------------------------------------------------------------------------------------------- # Build and Push diff --git a/docs/README.md b/docs/README.md index 03285c41b4f..b4eaffcc70c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,5 +1,5 @@
-Ultralytics logo +Ultralytics logo # ๐Ÿ“š Ultralytics Docs @@ -10,15 +10,15 @@ [![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) [![Ultralytics Actions](https://github.com/ultralytics/docs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/format.yml) -Discord Ultralytics Forums Ultralytics Reddit +Discord Ultralytics Forums Ultralytics Reddit ## ๐Ÿ› ๏ธ Installation [![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) -[![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) +[![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/) -To install the ultralytics package in developer mode, ensure you have Git and Python 3 installed on your system. Then, follow these steps: +To install the Ultralytics package in developer mode, ensure you have Git and Python 3 installed on your system. Then, follow these steps: 1. Clone the ultralytics repository to your local machine using Git: @@ -38,7 +38,7 @@ To install the ultralytics package in developer mode, ensure you have Git and Py pip install -e '.[dev]' ``` -- This command installs the ultralytics package along with all development dependencies, allowing you to modify the package code and have the changes immediately reflected in your Python environment. +- This command installs the Ultralytics package along with all development dependencies, allowing you to modify the package code and have the changes immediately reflected in your Python environment. ## ๐Ÿš€ Building and Serving Locally @@ -142,5 +142,5 @@ For Ultralytics bug reports and feature requests please visit [GitHub Issues](ht space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord diff --git a/docs/build_docs.py b/docs/build_docs.py index e342312bd6a..68a333b84fe 100644 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Automates the building and post-processing of MkDocs documentation, particularly for projects with multilingual content. It streamlines the workflow for generating localized versions of the documentation and updating HTML links to ensure @@ -24,6 +24,7 @@ - This script is built to be run in an environment where Python and MkDocs are installed and properly configured. """ +import json import os import re import shutil @@ -36,27 +37,41 @@ os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs DOCS = Path(__file__).parent.resolve() SITE = DOCS.parent / "site" +LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])") + + +def create_vercel_config(): + """Create vercel.json in the site directory with customized configuration settings.""" + config = {"trailingSlash": True} + with open(SITE / "vercel.json", "w") as f: + json.dump(config, f, indent=2) def prepare_docs_markdown(clone_repos=True): """Build docs using mkdocs.""" - if SITE.exists(): - print(f"Removing existing {SITE}") - shutil.rmtree(SITE) + print("Removing existing build artifacts") + shutil.rmtree(SITE, ignore_errors=True) + shutil.rmtree(DOCS / "repos", ignore_errors=True) - # Get hub-sdk repo if clone_repos: + # Get hub-sdk repo repo = "https://github.com/ultralytics/hub-sdk" - local_dir = DOCS.parent / Path(repo).name - if not local_dir.exists(): - os.system(f"git clone {repo} {local_dir}") - os.system(f"git -C {local_dir} pull") # update repo + local_dir = DOCS / "repos" / Path(repo).name + os.system(f"git clone {repo} {local_dir} --depth 1 --single-branch --branch main") shutil.rmtree(DOCS / "en/hub/sdk", ignore_errors=True) # delete if exists shutil.copytree(local_dir / "docs", DOCS / "en/hub/sdk") # for docs shutil.rmtree(DOCS.parent / "hub_sdk", ignore_errors=True) # delete if exists shutil.copytree(local_dir / "hub_sdk", DOCS.parent / "hub_sdk") # for mkdocstrings print(f"Cloned/Updated {repo} in {local_dir}") + # Get docs repo + repo = "https://github.com/ultralytics/docs" + local_dir = DOCS / "repos" / Path(repo).name + os.system(f"git clone {repo} {local_dir} --depth 1 --single-branch --branch main") + shutil.rmtree(DOCS / "en/compare", ignore_errors=True) # delete if exists + shutil.copytree(local_dir / "docs/en/compare", DOCS / "en/compare") # for docs + print(f"Cloned/Updated {repo} in {local_dir}") + # Add frontmatter for file in tqdm((DOCS / "en").rglob("*.md"), desc="Adding frontmatter"): update_markdown_files(file) @@ -64,7 +79,6 @@ def prepare_docs_markdown(clone_repos=True): def update_page_title(file_path: Path, new_title: str): """Update the title of an HTML file.""" - # Read the content of the file with open(file_path, encoding="utf-8") as file: content = file.read() @@ -99,7 +113,7 @@ def update_subdir_edit_links(subdir="", docs_url=""): if str(subdir[0]) == "/": subdir = str(subdir[0])[1:] html_files = (SITE / subdir).rglob("*.html") - for html_file in tqdm(html_files, desc="Processing subdir files"): + for html_file in tqdm(html_files, desc="Processing subdir files", mininterval=1.0): with html_file.open("r", encoding="utf-8") as file: soup = BeautifulSoup(file, "html.parser") @@ -155,15 +169,16 @@ def update_docs_html(): # Update 404 titles update_page_title(SITE / "404.html", new_title="Ultralytics Docs - Not Found") - # Update edit links - update_subdir_edit_links( - subdir="hub/sdk/", # do not use leading slash - docs_url="https://github.com/ultralytics/hub-sdk/tree/main/docs/", - ) + # Update edit button links + for subdir, docs_url in ( + ("hub/sdk/", "https://github.com/ultralytics/hub-sdk/tree/main/docs/"), # do not use leading slash + ("compare/", "https://github.com/ultralytics/docs/tree/main/docs/en/compare/"), + ): + update_subdir_edit_links(subdir=subdir, docs_url=docs_url) # Convert plaintext links to HTML hyperlinks files_modified = 0 - for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links"): + for html_file in tqdm(SITE.rglob("*.html"), desc="Converting plaintext links", mininterval=1.0): with open(html_file, encoding="utf-8") as file: content = file.read() updated_content = convert_plaintext_links_to_html(content) @@ -198,12 +213,9 @@ def convert_plaintext_links_to_html(content): for paragraph in main_content.find_all(["p", "li"]): # Focus on paragraphs and list items for text_node in paragraph.find_all(string=True, recursive=False): if text_node.parent.name not in {"a", "code"}: # Ignore links and code blocks - new_text = re.sub( - r'(https?://[^\s()<>]+(?:\.[^\s()<>]+)+)(?\1', - str(text_node), - ) - if "\1', str(text_node)) + if "\n?", r"\n", content, flags=re.DOTALL) + pass + elif file_type == "css": + # Remove CSS comments, preserving newline after comment + # content = re.sub(r"/\*.*?\*/\n?", r"\n", content, flags=re.DOTALL) + pass + elif file_type == "js": + # Remove JS single-line comments, preserving newline and URLs + # content = re.sub(r"(?
- Watch: How to Train [Image Classification](https://www.ultralytics.com/glossary/image-classification) Model using Caltech-256 Dataset with Ultralytics HUB + Watch: How to Train
Image Classification Model using Caltech-256 Dataset with Ultralytics HUB

## Key Features @@ -47,7 +47,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="caltech256", epochs=100, imgsz=416) @@ -57,7 +57,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the ```bash # Start training from a pretrained *.pt model - yolo classify train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416 + yolo classify train data=caltech256 model=yolo11n-cls.pt epochs=100 imgsz=416 ``` ## Sample Images and Annotations @@ -106,7 +106,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 [epochs](https://www.ul from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model + model = YOLO("yolo11n-cls.pt") # load a pretrained model # Train the model results = model.train(data="caltech256", epochs=100, imgsz=416) @@ -116,7 +116,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 [epochs](https://www.ul ```bash # Start training from a pretrained *.pt model - yolo classify train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416 + yolo classify train data=caltech256 model=yolo11n-cls.pt epochs=100 imgsz=416 ``` ### What are the most common use cases for the Caltech-256 dataset? @@ -141,6 +141,6 @@ Ultralytics YOLO models offer several advantages for training on the Caltech-256 - **High Accuracy**: YOLO models are known for their state-of-the-art performance in object detection tasks. - **Speed**: They provide real-time inference capabilities, making them suitable for applications requiring quick predictions. - **Ease of Use**: With Ultralytics HUB, users can train, validate, and deploy models without extensive coding. -- **Pretrained Models**: Starting from pretrained models, like `yolov8n-cls.pt`, can significantly reduce training time and improve model [accuracy](https://www.ultralytics.com/glossary/accuracy). +- **Pretrained Models**: Starting from pretrained models, like `yolo11n-cls.pt`, can significantly reduce training time and improve model [accuracy](https://www.ultralytics.com/glossary/accuracy). For more details, explore our [comprehensive training guide](../../modes/train.md). diff --git a/docs/en/datasets/classify/cifar10.md b/docs/en/datasets/classify/cifar10.md index 7bae78b38a3..e081bc16813 100644 --- a/docs/en/datasets/classify/cifar10.md +++ b/docs/en/datasets/classify/cifar10.md @@ -16,7 +16,7 @@ The [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute allowfullscreen>
- Watch: How to Train an [Image Classification](https://www.ultralytics.com/glossary/image-classification) Model with CIFAR-10 Dataset using Ultralytics YOLOv8 + Watch: How to Train an Image Classification Model with CIFAR-10 Dataset using Ultralytics YOLO11

## Key Features @@ -50,7 +50,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="cifar10", epochs=100, imgsz=32) @@ -60,7 +60,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size ```bash # Start training from a pretrained *.pt model - yolo classify train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32 + yolo classify train data=cifar10 model=yolo11n-cls.pt epochs=100 imgsz=32 ``` ## Sample Images and Annotations @@ -104,7 +104,7 @@ To train a YOLO model on the CIFAR-10 dataset using Ultralytics, you can follow from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="cifar10", epochs=100, imgsz=32) @@ -114,7 +114,7 @@ To train a YOLO model on the CIFAR-10 dataset using Ultralytics, you can follow ```bash # Start training from a pretrained *.pt model - yolo classify train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32 + yolo classify train data=cifar10 model=yolo11n-cls.pt epochs=100 imgsz=32 ``` For more details, refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/classify/cifar100.md b/docs/en/datasets/classify/cifar100.md index a6735bbcc4a..1f4713c458e 100644 --- a/docs/en/datasets/classify/cifar100.md +++ b/docs/en/datasets/classify/cifar100.md @@ -8,6 +8,17 @@ keywords: CIFAR-100, dataset, machine learning, computer vision, image classific The [CIFAR-100](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a significant extension of the CIFAR-10 dataset, composed of 60,000 32x32 color images in 100 different classes. It was developed by researchers at the CIFAR institute, offering a more challenging dataset for more complex machine learning and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. +

+
+ +
+ Watch: How to Train Ultralytics YOLO11 on CIFAR-100 | Step-by-Step Image Classification Tutorial ๐Ÿš€ +

+ ## Key Features - The CIFAR-100 dataset consists of 60,000 images, divided into 100 classes. @@ -39,7 +50,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 [epochs](https://www.ultr from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="cifar100", epochs=100, imgsz=32) @@ -49,7 +60,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 [epochs](https://www.ultr ```bash # Start training from a pretrained *.pt model - yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32 + yolo classify train data=cifar100 model=yolo11n-cls.pt epochs=100 imgsz=32 ``` ## Sample Images and Annotations @@ -97,7 +108,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="cifar100", epochs=100, imgsz=32) @@ -107,7 +118,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c ```bash # Start training from a pretrained *.pt model - yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32 + yolo classify train data=cifar100 model=yolo11n-cls.pt epochs=100 imgsz=32 ``` For a comprehensive list of available arguments, please refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/classify/fashion-mnist.md b/docs/en/datasets/classify/fashion-mnist.md index 531cd2c1bd8..6c49ceebb5e 100644 --- a/docs/en/datasets/classify/fashion-mnist.md +++ b/docs/en/datasets/classify/fashion-mnist.md @@ -16,7 +16,7 @@ The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is allowfullscreen>
- Watch: How to do [Image Classification](https://www.ultralytics.com/glossary/image-classification) on Fashion MNIST Dataset using Ultralytics YOLOv8 + Watch: How to do Image Classification on Fashion MNIST Dataset using Ultralytics YOLO11

## Key Features @@ -37,6 +37,7 @@ The Fashion-MNIST dataset is split into two subsets: Each training and test example is assigned to one of the following labels: +``` 0. T-shirt/top 1. Trouser 2. Pullover @@ -47,6 +48,7 @@ Each training and test example is assigned to one of the following labels: 7. Sneaker 8. Bag 9. Ankle boot +``` ## Applications @@ -64,7 +66,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 [epochs](https://www.u from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="fashion-mnist", epochs=100, imgsz=28) @@ -74,7 +76,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 [epochs](https://www.u ```bash # Start training from a pretrained *.pt model - yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28 + yolo classify train data=fashion-mnist model=yolo11n-cls.pt epochs=100 imgsz=28 ``` ## Sample Images and Annotations @@ -107,7 +109,7 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n-cls.pt") + model = YOLO("yolo11n-cls.pt") # Train the model on Fashion-MNIST results = model.train(data="fashion-mnist", epochs=100, imgsz=28) @@ -117,7 +119,7 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot === "CLI" ```bash - yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28 + yolo classify train data=fashion-mnist model=yolo11n-cls.pt epochs=100 imgsz=28 ``` For more detailed training parameters, refer to the [Training page](../../modes/train.md). @@ -128,7 +130,7 @@ The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is ### Can I use Ultralytics YOLO for image classification tasks like Fashion-MNIST? -Yes, Ultralytics YOLO models can be used for image classification tasks, including those involving the Fashion-MNIST dataset. YOLOv8, for example, supports various vision tasks such as detection, segmentation, and classification. To get started with image classification tasks, refer to the [Classification page](https://docs.ultralytics.com/tasks/classify/). +Yes, Ultralytics YOLO models can be used for image classification tasks, including those involving the Fashion-MNIST dataset. YOLO11, for example, supports various vision tasks such as detection, segmentation, and classification. To get started with image classification tasks, refer to the [Classification page](https://docs.ultralytics.com/tasks/classify/). ### What are the key features and structure of the Fashion-MNIST dataset? diff --git a/docs/en/datasets/classify/imagenet.md b/docs/en/datasets/classify/imagenet.md index 76e59b3f183..72c2e2a3b5b 100644 --- a/docs/en/datasets/classify/imagenet.md +++ b/docs/en/datasets/classify/imagenet.md @@ -10,13 +10,7 @@ keywords: ImageNet, deep learning, visual recognition, computer vision, pretrain ## ImageNet Pretrained Models -| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | -| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | -| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt) | 224 | 69.0 | 88.3 | 12.9 | 0.31 | 2.7 | 4.3 | -| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-cls.pt) | 224 | 73.8 | 91.7 | 23.4 | 0.35 | 6.4 | 13.5 | -| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-cls.pt) | 224 | 76.8 | 93.5 | 85.4 | 0.62 | 17.0 | 42.7 | -| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-cls.pt) | 224 | 76.8 | 93.5 | 163.0 | 0.87 | 37.5 | 99.7 | -| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-cls.pt) | 224 | 79.0 | 94.6 | 232.0 | 1.01 | 57.4 | 154.8 | +{% include "macros/yolo-cls-perf.md" %} ## Key Features @@ -49,7 +43,7 @@ To train a deep learning model on the ImageNet dataset for 100 [epochs](https:// from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagenet", epochs=100, imgsz=224) @@ -59,7 +53,7 @@ To train a deep learning model on the ImageNet dataset for 100 [epochs](https:// ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224 + yolo classify train data=imagenet model=yolo11n-cls.pt epochs=100 imgsz=224 ``` ## Sample Images and Annotations @@ -110,7 +104,7 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagenet", epochs=100, imgsz=224) @@ -120,14 +114,14 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224 + yolo classify train data=imagenet model=yolo11n-cls.pt epochs=100 imgsz=224 ``` For more in-depth training instruction, refer to our [Training page](../../modes/train.md). -### Why should I use the Ultralytics YOLOv8 pretrained models for my ImageNet dataset projects? +### Why should I use the Ultralytics YOLO11 pretrained models for my ImageNet dataset projects? -Ultralytics YOLOv8 pretrained models offer state-of-the-art performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for various computer vision tasks. For example, the YOLOv8n-cls model, with a top-1 accuracy of 69.0% and a top-5 accuracy of 88.3%, is optimized for real-time applications. Pretrained models reduce the computational resources required for training from scratch and accelerate development cycles. Learn more about the performance metrics of YOLOv8 models in the [ImageNet Pretrained Models section](#imagenet-pretrained-models). +Ultralytics YOLO11 pretrained models offer state-of-the-art performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for various computer vision tasks. For example, the YOLO11n-cls model, with a top-1 accuracy of 69.0% and a top-5 accuracy of 88.3%, is optimized for real-time applications. Pretrained models reduce the computational resources required for training from scratch and accelerate development cycles. Learn more about the performance metrics of YOLO11 models in the [ImageNet Pretrained Models section](#imagenet-pretrained-models). ### How is the ImageNet dataset structured, and why is it important? diff --git a/docs/en/datasets/classify/imagenet10.md b/docs/en/datasets/classify/imagenet10.md index 4e40e6655f8..217d56c54b9 100644 --- a/docs/en/datasets/classify/imagenet10.md +++ b/docs/en/datasets/classify/imagenet10.md @@ -35,7 +35,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22 from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagenet10", epochs=5, imgsz=224) @@ -45,7 +45,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22 ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224 + yolo classify train data=imagenet10 model=yolo11n-cls.pt epochs=5 imgsz=224 ``` ## Sample Images and Annotations @@ -94,7 +94,7 @@ To test your deep learning model on the ImageNet10 dataset with an image size of from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagenet10", epochs=5, imgsz=224) @@ -104,7 +104,7 @@ To test your deep learning model on the ImageNet10 dataset with an image size of ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224 + yolo classify train data=imagenet10 model=yolo11n-cls.pt epochs=5 imgsz=224 ``` Refer to the [Training](../../modes/train.md) page for a comprehensive list of available arguments. diff --git a/docs/en/datasets/classify/imagenette.md b/docs/en/datasets/classify/imagenette.md index bf371502ad5..dd2af1c3d1b 100644 --- a/docs/en/datasets/classify/imagenette.md +++ b/docs/en/datasets/classify/imagenette.md @@ -37,7 +37,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagenette", epochs=100, imgsz=224) @@ -47,7 +47,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224 + yolo classify train data=imagenette model=yolo11n-cls.pt epochs=100 imgsz=224 ``` ## Sample Images and Annotations @@ -72,7 +72,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model with ImageNette160 results = model.train(data="imagenette160", epochs=100, imgsz=160) @@ -82,7 +82,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag ```bash # Start training from a pretrained *.pt model with ImageNette160 - yolo classify train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160 + yolo classify train data=imagenette160 model=yolo11n-cls.pt epochs=100 imgsz=160 ``` !!! example "Train Example with ImageNette320" @@ -93,7 +93,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model with ImageNette320 results = model.train(data="imagenette320", epochs=100, imgsz=320) @@ -103,7 +103,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag ```bash # Start training from a pretrained *.pt model with ImageNette320 - yolo classify train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320 + yolo classify train data=imagenette320 model=yolo11n-cls.pt epochs=100 imgsz=320 ``` These smaller versions of the dataset allow for rapid iterations during the development process while still providing valuable and realistic image classification tasks. @@ -130,7 +130,7 @@ To train a YOLO model on the ImageNette dataset for 100 [epochs](https://www.ult from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagenette", epochs=100, imgsz=224) @@ -140,7 +140,7 @@ To train a YOLO model on the ImageNette dataset for 100 [epochs](https://www.ult ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224 + yolo classify train data=imagenette model=yolo11n-cls.pt epochs=100 imgsz=224 ``` For more details, see the [Training](../../modes/train.md) documentation page. @@ -167,7 +167,7 @@ Yes, the ImageNette dataset is also available in two resized versions: ImageNett from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") + model = YOLO("yolo11n-cls.pt") # Train the model with ImageNette160 results = model.train(data="imagenette160", epochs=100, imgsz=160) @@ -177,7 +177,7 @@ Yes, the ImageNette dataset is also available in two resized versions: ImageNett ```bash # Start training from a pretrained *.pt model with ImageNette160 - yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160 + yolo detect train data=imagenette160 model=yolo11n-cls.pt epochs=100 imgsz=160 ``` For more information, refer to [Training with ImageNette160 and ImageNette320](#imagenette160-and-imagenette320). diff --git a/docs/en/datasets/classify/imagewoof.md b/docs/en/datasets/classify/imagewoof.md index 2ed0273b605..2e33f44542d 100644 --- a/docs/en/datasets/classify/imagewoof.md +++ b/docs/en/datasets/classify/imagewoof.md @@ -34,7 +34,7 @@ To train a CNN model on the ImageWoof dataset for 100 [epochs](https://www.ultra from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="imagewoof", epochs=100, imgsz=224) @@ -44,7 +44,7 @@ To train a CNN model on the ImageWoof dataset for 100 [epochs](https://www.ultra ```bash # Start training from a pretrained *.pt model - yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224 + yolo classify train data=imagewoof model=yolo11n-cls.pt epochs=100 imgsz=224 ``` ## Dataset Variants @@ -67,7 +67,7 @@ To use these variants in your training, simply replace 'imagewoof' in the datase from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # For medium-sized dataset model.train(data="imagewoof320", epochs=100, imgsz=224) @@ -80,7 +80,7 @@ To use these variants in your training, simply replace 'imagewoof' in the datase ```bash # Load a pretrained model and train on the small-sized dataset - yolo classify train model=yolov8n-cls.pt data=imagewoof320 epochs=100 imgsz=224 + yolo classify train model=yolo11n-cls.pt data=imagewoof320 epochs=100 imgsz=224 ``` It's important to note that using smaller images will likely yield lower performance in terms of classification accuracy. However, it's an excellent way to iterate quickly in the early stages of model development and prototyping. @@ -116,7 +116,7 @@ To train a [Convolutional Neural Network](https://www.ultralytics.com/glossary/c ```python from ultralytics import YOLO - model = YOLO("yolov8n-cls.pt") # Load a pretrained model + model = YOLO("yolo11n-cls.pt") # Load a pretrained model results = model.train(data="imagewoof", epochs=100, imgsz=224) ``` @@ -124,7 +124,7 @@ To train a [Convolutional Neural Network](https://www.ultralytics.com/glossary/c === "CLI" ```bash - yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224 + yolo classify train data=imagewoof model=yolo11n-cls.pt epochs=100 imgsz=224 ``` For more details on available training arguments, refer to the [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/classify/index.md b/docs/en/datasets/classify/index.md index 3567d6a2952..e8876ce9ebf 100644 --- a/docs/en/datasets/classify/index.md +++ b/docs/en/datasets/classify/index.md @@ -86,7 +86,7 @@ This structured approach ensures that the model can effectively learn from well- from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="path/to/dataset", epochs=100, imgsz=640) @@ -96,7 +96,7 @@ This structured approach ensures that the model can effectively learn from well- ```bash # Start training from a pretrained *.pt model - yolo detect train data=path/to/data model=yolov8n-cls.pt epochs=100 imgsz=640 + yolo detect train data=path/to/data model=yolo11n-cls.pt epochs=100 imgsz=640 ``` ## Supported Datasets @@ -113,6 +113,7 @@ Ultralytics supports the following datasets with automatic download: - [Imagenette](imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing. - [Imagewoof](imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks. - [MNIST](mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks. +- [MNIST160](mnist.md): First 8 images of each MNIST category from the MNIST dataset. Dataset contains 160 images total. ### Adding your own dataset @@ -170,7 +171,7 @@ To use your own dataset with Ultralytics YOLO, ensure it follows the specified d from ultralytics import YOLO # Load a model -model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) +model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="path/to/your/dataset", epochs=100, imgsz=640) @@ -182,7 +183,7 @@ More details can be found in the [Adding your own dataset](#adding-your-own-data Ultralytics YOLO offers several benefits for image classification, including: -- **Pretrained Models**: Load pretrained models like `yolov8n-cls.pt` to jump-start your training process. +- **Pretrained Models**: Load pretrained models like `yolo11n-cls.pt` to jump-start your training process. - **Ease of Use**: Simple API and CLI commands for training and evaluation. - **High Performance**: State-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, ideal for real-time applications. - **Support for Multiple Datasets**: Seamless integration with various popular datasets like CIFAR-10, ImageNet, and more. @@ -202,7 +203,7 @@ Training a model using Ultralytics YOLO can be done easily in both Python and CL from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model + model = YOLO("yolo11n-cls.pt") # load a pretrained model # Train the model results = model.train(data="path/to/dataset", epochs=100, imgsz=640) @@ -213,7 +214,7 @@ Training a model using Ultralytics YOLO can be done easily in both Python and CL ```bash # Start training from a pretrained *.pt model - yolo detect train data=path/to/data model=yolov8n-cls.pt epochs=100 imgsz=640 + yolo detect train data=path/to/data model=yolo11n-cls.pt epochs=100 imgsz=640 ``` These examples demonstrate the straightforward process of training a YOLO model using either approach. For more information, visit the [Usage](#usage) section. diff --git a/docs/en/datasets/classify/mnist.md b/docs/en/datasets/classify/mnist.md index 07f0a70a1d6..356fdc4f75c 100644 --- a/docs/en/datasets/classify/mnist.md +++ b/docs/en/datasets/classify/mnist.md @@ -6,7 +6,7 @@ keywords: MNIST, dataset, handwritten digits, image classification, deep learnin # MNIST Dataset -The [MNIST](http://yann.lecun.com/exdb/mnist/) (Modified National Institute of Standards and Technology) dataset is a large database of handwritten digits that is commonly used for training various image processing systems and machine learning models. It was created by "re-mixing" the samples from NIST's original datasets and has become a benchmark for evaluating the performance of image classification algorithms. +The [MNIST](https://en.wikipedia.org/wiki/MNIST_database) (Modified National Institute of Standards and Technology) dataset is a large database of handwritten digits that is commonly used for training various image processing systems and machine learning models. It was created by "re-mixing" the samples from NIST's original datasets and has become a benchmark for evaluating the performance of image classification algorithms. ## Key Features @@ -42,7 +42,7 @@ To train a CNN model on the MNIST dataset for 100 [epochs](https://www.ultralyti from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="mnist", epochs=100, imgsz=32) @@ -52,7 +52,7 @@ To train a CNN model on the MNIST dataset for 100 [epochs](https://www.ultralyti ```bash # Start training from a pretrained *.pt model - yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28 + yolo classify train data=mnist model=yolo11n-cls.pt epochs=100 imgsz=28 ``` ## Sample Images and Annotations @@ -83,13 +83,13 @@ research or development work, please cite the following paper: } ``` -We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](http://yann.lecun.com/exdb/mnist/). +We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](https://en.wikipedia.org/wiki/MNIST_database). ## FAQ ### What is the MNIST dataset, and why is it important in machine learning? -The [MNIST](http://yann.lecun.com/exdb/mnist/) dataset, or Modified National Institute of Standards and Technology dataset, is a widely-used collection of handwritten digits designed for training and testing image classification systems. It includes 60,000 training images and 10,000 testing images, all of which are grayscale and 28x28 pixels in size. The dataset's importance lies in its role as a standard benchmark for evaluating image classification algorithms, helping researchers and engineers to compare methods and track progress in the field. +The [MNIST](https://en.wikipedia.org/wiki/MNIST_database) dataset, or Modified National Institute of Standards and Technology dataset, is a widely-used collection of handwritten digits designed for training and testing image classification systems. It includes 60,000 training images and 10,000 testing images, all of which are grayscale and 28x28 pixels in size. The dataset's importance lies in its role as a standard benchmark for evaluating image classification algorithms, helping researchers and engineers to compare methods and track progress in the field. ### How can I use Ultralytics YOLO to train a model on the MNIST dataset? @@ -103,7 +103,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="mnist", epochs=100, imgsz=32) @@ -113,7 +113,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the ```bash # Start training from a pretrained *.pt model - yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28 + yolo classify train data=mnist model=yolo11n-cls.pt epochs=100 imgsz=28 ``` For a detailed list of available training arguments, refer to the [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/african-wildlife.md b/docs/en/datasets/detect/african-wildlife.md index 14a066b14b7..519b8cfa672 100644 --- a/docs/en/datasets/detect/african-wildlife.md +++ b/docs/en/datasets/detect/african-wildlife.md @@ -1,13 +1,24 @@ --- comments: true description: Explore our African Wildlife Dataset featuring images of buffalo, elephant, rhino, and zebra for training computer vision models. Ideal for research and conservation. -keywords: African Wildlife Dataset, South African animals, object detection, computer vision, YOLOv8, wildlife research, conservation, dataset +keywords: African Wildlife Dataset, South African animals, object detection, computer vision, YOLO11, wildlife research, conservation, dataset --- # African Wildlife Dataset This dataset showcases four common animal classes typically found in South African nature reserves. It includes images of African wildlife such as buffalo, elephant, rhino, and zebra, providing valuable insights into their characteristics. Essential for training [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) algorithms, this dataset aids in identifying animals in various habitats, from zoos to forests, and supports wildlife research. +

+
+ +
+ Watch: African Wildlife Animals Detection using Ultralytics YOLO11 +

+ ## Dataset Structure The African wildlife objects detection dataset is split into three subsets: @@ -32,7 +43,7 @@ A YAML (Yet Another Markup Language) file defines the dataset configuration, inc ## Usage -To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page. +To train a YOLO11n model on the African wildlife dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page. !!! example "Train Example" @@ -42,7 +53,7 @@ To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https: from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="african-wildlife.yaml", epochs=100, imgsz=640) @@ -52,7 +63,7 @@ To train a YOLOv8n model on the African wildlife dataset for 100 [epochs](https: ```bash # Start training from a pretrained *.pt model - yolo detect train data=african-wildlife.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=african-wildlife.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` !!! example "Inference Example" @@ -96,9 +107,9 @@ The dataset has been released available under the [AGPL-3.0 License](https://git The African Wildlife Dataset includes images of four common animal species found in South African nature reserves: buffalo, elephant, rhino, and zebra. It is a valuable resource for training computer vision algorithms in object detection and animal identification. The dataset supports various tasks like object tracking, research, and conservation efforts. For more information on its structure and applications, refer to the [Dataset Structure](#dataset-structure) section and [Applications](#applications) of the dataset. -### How do I train a YOLOv8 model using the African Wildlife Dataset? +### How do I train a YOLO11 model using the African Wildlife Dataset? -You can train a YOLOv8 model on the African Wildlife Dataset by using the `african-wildlife.yaml` configuration file. Below is an example of how to train the YOLOv8n model for 100 epochs with an image size of 640: +You can train a YOLO11 model on the African Wildlife Dataset by using the `african-wildlife.yaml` configuration file. Below is an example of how to train the YOLO11n model for 100 epochs with an image size of 640: !!! example @@ -108,7 +119,7 @@ You can train a YOLOv8 model on the African Wildlife Dataset by using the `afric from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="african-wildlife.yaml", epochs=100, imgsz=640) @@ -118,7 +129,7 @@ You can train a YOLOv8 model on the African Wildlife Dataset by using the `afric ```bash # Start training from a pretrained *.pt model - yolo detect train data=african-wildlife.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=african-wildlife.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For additional training parameters and options, refer to the [Training](../../modes/train.md) documentation. diff --git a/docs/en/datasets/detect/argoverse.md b/docs/en/datasets/detect/argoverse.md index a834be90edd..4280b09a25f 100644 --- a/docs/en/datasets/detect/argoverse.md +++ b/docs/en/datasets/detect/argoverse.md @@ -43,7 +43,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the Argoverse dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -53,7 +53,7 @@ To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.u from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="Argoverse.yaml", epochs=100, imgsz=640) @@ -63,7 +63,7 @@ To train a YOLOv8n model on the Argoverse dataset for 100 [epochs](https://www.u ```bash # Start training from a pretrained *.pt model - yolo detect train data=Argoverse.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=Argoverse.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -104,7 +104,7 @@ The [Argoverse](https://www.argoverse.org/) dataset, developed by Argo AI, suppo ### How can I train an Ultralytics YOLO model using the Argoverse dataset? -To train a YOLOv8 model with the Argoverse dataset, use the provided YAML configuration file and the following code: +To train a YOLO11 model with the Argoverse dataset, use the provided YAML configuration file and the following code: !!! example "Train Example" @@ -114,7 +114,7 @@ To train a YOLOv8 model with the Argoverse dataset, use the provided YAML config from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="Argoverse.yaml", epochs=100, imgsz=640) @@ -125,7 +125,7 @@ To train a YOLOv8 model with the Argoverse dataset, use the provided YAML config ```bash # Start training from a pretrained *.pt model - yolo detect train data=Argoverse.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=Argoverse.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For a detailed explanation of the arguments, refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/brain-tumor.md b/docs/en/datasets/detect/brain-tumor.md index 9f108e73882..cb06b9c09e7 100644 --- a/docs/en/datasets/detect/brain-tumor.md +++ b/docs/en/datasets/detect/brain-tumor.md @@ -6,6 +6,8 @@ keywords: brain tumor dataset, MRI scans, CT scans, brain tumor detection, medic # Brain Tumor Dataset +Open Brain Tumor Dataset In Colab + A brain tumor detection dataset consists of medical images from MRI or CT scans, containing information about brain tumor presence, location, and characteristics. This dataset is essential for training [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) algorithms to automate brain tumor identification, aiding in early diagnosis and treatment planning.

@@ -42,7 +44,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page. +To train a YOLO11n model on the brain tumor dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page. !!! example "Train Example" @@ -52,7 +54,7 @@ To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="brain-tumor.yaml", epochs=100, imgsz=640) @@ -62,7 +64,7 @@ To train a YOLOv8n model on the brain tumor dataset for 100 [epochs](https://www ```bash # Start training from a pretrained *.pt model - yolo detect train data=brain-tumor.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=brain-tumor.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` !!! example "Inference Example" @@ -106,9 +108,9 @@ The dataset has been released available under the [AGPL-3.0 License](https://git The brain tumor dataset is divided into two subsets: the **training set** consists of 893 images with corresponding annotations, while the **testing set** comprises 223 images with paired annotations. This structured division aids in developing robust and accurate computer vision models for detecting brain tumors. For more information on the dataset structure, visit the [Dataset Structure](#dataset-structure) section. -### How can I train a YOLOv8 model on the brain tumor dataset using Ultralytics? +### How can I train a YOLO11 model on the brain tumor dataset using Ultralytics? -You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an image size of 640px using both Python and CLI methods. Below are the examples for both: +You can train a YOLO11 model on the brain tumor dataset for 100 epochs with an image size of 640px using both Python and CLI methods. Below are the examples for both: !!! example "Train Example" @@ -118,7 +120,7 @@ You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an i from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="brain-tumor.yaml", epochs=100, imgsz=640) @@ -129,7 +131,7 @@ You can train a YOLOv8 model on the brain tumor dataset for 100 epochs with an i ```bash # Start training from a pretrained *.pt model - yolo detect train data=brain-tumor.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=brain-tumor.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For a detailed list of available arguments, refer to the [Training](../../modes/train.md) page. @@ -138,9 +140,9 @@ For a detailed list of available arguments, refer to the [Training](../../modes/ Using the brain tumor dataset in AI projects enables early diagnosis and treatment planning for brain tumors. It helps in automating brain tumor identification through computer vision, facilitating accurate and timely medical interventions, and supporting personalized treatment strategies. This application holds significant potential in improving patient outcomes and medical efficiencies. -### How do I perform inference using a fine-tuned YOLOv8 model on the brain tumor dataset? +### How do I perform inference using a fine-tuned YOLO11 model on the brain tumor dataset? -Inference using a fine-tuned YOLOv8 model can be performed with either Python or CLI approaches. Here are the examples: +Inference using a fine-tuned YOLO11 model can be performed with either Python or CLI approaches. Here are the examples: !!! example "Inference Example" diff --git a/docs/en/datasets/detect/coco.md b/docs/en/datasets/detect/coco.md index d0901428387..9af5207d618 100644 --- a/docs/en/datasets/detect/coco.md +++ b/docs/en/datasets/detect/coco.md @@ -21,13 +21,7 @@ The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is ## COCO Pretrained Models -| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | +{% include "macros/yolo-det-perf.md" %} ## Key Features @@ -60,7 +54,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the COCO dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -70,7 +64,7 @@ To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultral from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco.yaml", epochs=100, imgsz=640) @@ -80,7 +74,7 @@ To train a YOLOv8n model on the COCO dataset for 100 [epochs](https://www.ultral ```bash # Start training from a pretrained *.pt model - yolo detect train data=coco.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -122,7 +116,7 @@ The [COCO dataset](https://cocodataset.org/#home) (Common Objects in Context) is ### How can I train a YOLO model using the COCO dataset? -To train a YOLOv8 model using the COCO dataset, you can use the following code snippets: +To train a YOLO11 model using the COCO dataset, you can use the following code snippets: !!! example "Train Example" @@ -132,7 +126,7 @@ To train a YOLOv8 model using the COCO dataset, you can use the following code s from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco.yaml", epochs=100, imgsz=640) @@ -142,7 +136,7 @@ To train a YOLOv8 model using the COCO dataset, you can use the following code s ```bash # Start training from a pretrained *.pt model - yolo detect train data=coco.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` Refer to the [Training page](../../modes/train.md) for more details on available arguments. @@ -156,13 +150,15 @@ The COCO dataset includes: - Standardized evaluation metrics for object detection (mAP) and segmentation (mean Average Recall, mAR). - **Mosaicing** technique in training batches to enhance model generalization across various object sizes and contexts. -### Where can I find pretrained YOLOv8 models trained on the COCO dataset? +### Where can I find pretrained YOLO11 models trained on the COCO dataset? -Pretrained YOLOv8 models on the COCO dataset can be downloaded from the links provided in the documentation. Examples include: +Pretrained YOLO11 models on the COCO dataset can be downloaded from the links provided in the documentation. Examples include: -- [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) -- [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) -- [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) +- [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) +- [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) +- [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) +- [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) +- [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) These models vary in size, mAP, and inference speed, providing options for different performance and resource requirements. diff --git a/docs/en/datasets/detect/coco8.md b/docs/en/datasets/detect/coco8.md index 4a8ad5a8522..b6b7a5a0f12 100644 --- a/docs/en/datasets/detect/coco8.md +++ b/docs/en/datasets/detect/coco8.md @@ -1,7 +1,7 @@ --- comments: true description: Explore the Ultralytics COCO8 dataset, a versatile and manageable set of 8 images perfect for testing object detection models and training pipelines. -keywords: COCO8, Ultralytics, dataset, object detection, YOLOv8, training, validation, machine learning, computer vision +keywords: COCO8, Ultralytics, dataset, object detection, YOLO11, training, validation, machine learning, computer vision --- # COCO8 Dataset @@ -21,7 +21,7 @@ keywords: COCO8, Ultralytics, dataset, object detection, YOLOv8, training, valid Watch: Ultralytics COCO Dataset Overview

-This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). +This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics). ## Dataset YAML @@ -35,7 +35,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -45,7 +45,7 @@ To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultra from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -55,7 +55,7 @@ To train a YOLOv8n model on the COCO8 dataset for 100 [epochs](https://www.ultra ```bash # Start training from a pretrained *.pt model - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -95,9 +95,9 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th The Ultralytics COCO8 dataset is a compact yet versatile object detection dataset consisting of the first 8 images from the COCO train 2017 set, with 4 images for training and 4 for validation. It is designed for testing and debugging object detection models and experimentation with new detection approaches. Despite its small size, COCO8 offers enough diversity to act as a sanity check for your training pipelines before deploying larger datasets. For more details, view the [COCO8 dataset](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8.yaml). -### How do I train a YOLOv8 model using the COCO8 dataset? +### How do I train a YOLO11 model using the COCO8 dataset? -To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or CLI commands. Here's how you can start: +To train a YOLO11 model using the COCO8 dataset, you can employ either Python or CLI commands. Here's how you can start: !!! example "Train Example" @@ -107,7 +107,7 @@ To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -117,19 +117,19 @@ To train a YOLOv8 model using the COCO8 dataset, you can employ either Python or ```bash # Start training from a pretrained *.pt model - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. ### Why should I use Ultralytics HUB for managing my COCO8 training? -Ultralytics HUB is an all-in-one web tool designed to simplify the training and deployment of YOLO models, including the Ultralytics YOLOv8 models on the COCO8 dataset. It offers cloud training, real-time tracking, and seamless dataset management. HUB allows you to start training with a single click and avoids the complexities of manual setups. Discover more about [Ultralytics HUB](https://hub.ultralytics.com/) and its benefits. +Ultralytics HUB is an all-in-one web tool designed to simplify the training and deployment of YOLO models, including the Ultralytics YOLO11 models on the COCO8 dataset. It offers cloud training, real-time tracking, and seamless dataset management. HUB allows you to start training with a single click and avoids the complexities of manual setups. Discover more about [Ultralytics HUB](https://hub.ultralytics.com/) and its benefits. ### What are the benefits of using mosaic augmentation in training with the COCO8 dataset? Mosaic augmentation, demonstrated in the COCO8 dataset, combines multiple images into a single image during training. This technique increases the variety of objects and scenes in each training batch, improving the model's ability to generalize across different object sizes, aspect ratios, and contexts. This results in a more robust object detection model. For more details, refer to the [training guide](#usage). -### How can I validate my YOLOv8 model trained on the COCO8 dataset? +### How can I validate my YOLO11 model trained on the COCO8 dataset? -Validation of your YOLOv8 model trained on the COCO8 dataset can be performed using the model's validation commands. You can invoke the validation mode via CLI or Python script to evaluate the model's performance using precise metrics. For detailed instructions, visit the [Validation](../../modes/val.md) page. +Validation of your YOLO11 model trained on the COCO8 dataset can be performed using the model's validation commands. You can invoke the validation mode via CLI or Python script to evaluate the model's performance using precise metrics. For detailed instructions, visit the [Validation](../../modes/val.md) page. diff --git a/docs/en/datasets/detect/globalwheat2020.md b/docs/en/datasets/detect/globalwheat2020.md index ef7ff7ac310..e744b8d666a 100644 --- a/docs/en/datasets/detect/globalwheat2020.md +++ b/docs/en/datasets/detect/globalwheat2020.md @@ -38,7 +38,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the Global Wheat Head Dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -48,7 +48,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="GlobalWheat2020.yaml", epochs=100, imgsz=640) @@ -58,7 +58,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset for 100 [epochs](https ```bash # Start training from a pretrained *.pt model - yolo detect train data=GlobalWheat2020.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=GlobalWheat2020.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -96,9 +96,9 @@ We would like to acknowledge the researchers and institutions that contributed t The Global Wheat Head Dataset is primarily used for developing and training deep learning models aimed at wheat head detection. This is crucial for applications in wheat phenotyping and crop management, allowing for more accurate estimations of wheat head density, size, and overall crop yield potential. Accurate detection methods help in assessing crop health and maturity, essential for efficient crop management. -### How do I train a YOLOv8n model on the Global Wheat Head Dataset? +### How do I train a YOLO11n model on the Global Wheat Head Dataset? -To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the following code snippets. Make sure you have the `GlobalWheat2020.yaml` configuration file specifying dataset paths and classes: +To train a YOLO11n model on the Global Wheat Head Dataset, you can use the following code snippets. Make sure you have the `GlobalWheat2020.yaml` configuration file specifying dataset paths and classes: !!! example "Train Example" @@ -108,7 +108,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the follo from ultralytics import YOLO # Load a pre-trained model (recommended for training) - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model results = model.train(data="GlobalWheat2020.yaml", epochs=100, imgsz=640) @@ -118,7 +118,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset, you can use the follo ```bash # Start training from a pretrained *.pt model - yolo detect train data=GlobalWheat2020.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=GlobalWheat2020.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/index.md b/docs/en/datasets/detect/index.md index 61640480f61..d5408112b26 100644 --- a/docs/en/datasets/detect/index.md +++ b/docs/en/datasets/detect/index.md @@ -16,7 +16,7 @@ The Ultralytics YOLO format is a dataset configuration format that allows you to ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco8 # dataset root dir +path: ../datasets/coco8 # dataset root dir (absolute or relative; if relative, it's relative to default datasets_dir) train: images/train # train images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images test: # test images (optional) @@ -56,7 +56,7 @@ Here's how you can use these formats to train your model: from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -66,7 +66,7 @@ Here's how you can use these formats to train your model: ```bash # Start training from a pretrained *.pt model - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Supported Datasets @@ -89,6 +89,7 @@ Here is a list of the supported datasets and a brief description for each: - [Brain-tumor](brain-tumor.md): A dataset for detecting brain tumors includes MRI or CT scan images with details on tumor presence, location, and characteristics. - [African-wildlife](african-wildlife.md): A dataset featuring images of African wildlife, including buffalo, elephant, rhino, and zebras. - [Signature](signature.md): A dataset featuring images of various documents with annotated signatures, supporting document verification and fraud detection research. +- [Medical-pills](medical-pills.md): A dataset featuring images of medical-pills, annotated for applications such as pharmaceutical quality assurance, pill sorting, and regulatory compliance. ### Adding your own dataset @@ -158,11 +159,11 @@ Ultralytics YOLO supports a wide range of datasets, including: - [Objects365](objects365.md) - [OpenImagesV7](open-images-v7.md) -Each dataset page provides detailed information on the structure and usage tailored for efficient YOLOv8 training. Explore the full list in the [Supported Datasets](#supported-datasets) section. +Each dataset page provides detailed information on the structure and usage tailored for efficient YOLO11 training. Explore the full list in the [Supported Datasets](#supported-datasets) section. -### How do I start training a YOLOv8 model using my dataset? +### How do I start training a YOLO11 model using my dataset? -To start training a YOLOv8 model, ensure your dataset is formatted correctly and the paths are defined in a YAML file. Use the following script to begin training: +To start training a YOLO11 model, ensure your dataset is formatted correctly and the paths are defined in a YAML file. Use the following script to begin training: !!! example @@ -171,18 +172,18 @@ To start training a YOLOv8 model, ensure your dataset is formatted correctly and ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") # Load a pretrained model + model = YOLO("yolo11n.pt") # Load a pretrained model results = model.train(data="path/to/your_dataset.yaml", epochs=100, imgsz=640) ``` === "CLI" ```bash - yolo detect train data=path/to/your_dataset.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=path/to/your_dataset.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` Refer to the [Usage](#usage) section for more details on utilizing different modes, including CLI commands. ### Where can I find practical examples of using Ultralytics YOLO for object detection? -Ultralytics provides numerous examples and practical guides for using YOLOv8 in diverse applications. For a comprehensive overview, visit the [Ultralytics Blog](https://www.ultralytics.com/blog) where you can find case studies, detailed tutorials, and community stories showcasing object detection, segmentation, and more with YOLOv8. For specific examples, check the [Usage](../../modes/predict.md) section in the documentation. +Ultralytics provides numerous examples and practical guides for using YOLO11 in diverse applications. For a comprehensive overview, visit the [Ultralytics Blog](https://www.ultralytics.com/blog) where you can find case studies, detailed tutorials, and community stories showcasing object detection, segmentation, and more with YOLO11. For specific examples, check the [Usage](../../modes/predict.md) section in the documentation. diff --git a/docs/en/datasets/detect/lvis.md b/docs/en/datasets/detect/lvis.md index c4a4ff76ed3..7bcfd088eb0 100644 --- a/docs/en/datasets/detect/lvis.md +++ b/docs/en/datasets/detect/lvis.md @@ -56,7 +56,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the LVIS dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -66,7 +66,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultral from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="lvis.yaml", epochs=100, imgsz=640) @@ -76,7 +76,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 [epochs](https://www.ultral ```bash # Start training from a pretrained *.pt model - yolo detect train data=lvis.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=lvis.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -114,9 +114,9 @@ We would like to acknowledge the LVIS Consortium for creating and maintaining th The [LVIS dataset](https://www.lvisdataset.org/) is a large-scale dataset with fine-grained vocabulary-level annotations developed by Facebook AI Research (FAIR). It is primarily used for object detection and instance segmentation, featuring over 1203 object categories and 2 million instance annotations. Researchers and practitioners use it to train and benchmark models like Ultralytics YOLO for advanced computer vision tasks. The dataset's extensive size and diversity make it an essential resource for pushing the boundaries of model performance in detection and segmentation. -### How can I train a YOLOv8n model using the LVIS dataset? +### How can I train a YOLO11n model using the LVIS dataset? -To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size of 640, follow the example below. This process utilizes Ultralytics' framework, which offers comprehensive training features. +To train a YOLO11n model on the LVIS dataset for 100 epochs with an image size of 640, follow the example below. This process utilizes Ultralytics' framework, which offers comprehensive training features. !!! example "Train Example" @@ -126,7 +126,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size o from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="lvis.yaml", epochs=100, imgsz=640) @@ -137,7 +137,7 @@ To train a YOLOv8n model on the LVIS dataset for 100 epochs with an image size o ```bash # Start training from a pretrained *.pt model - yolo detect train data=lvis.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=lvis.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For detailed training configurations, refer to the [Training](../../modes/train.md) documentation. @@ -148,7 +148,7 @@ The images in the LVIS dataset are the same as those in the [COCO dataset](./coc ### Why should I use Ultralytics YOLO for training on the LVIS dataset? -Ultralytics YOLO models, including the latest YOLOv8, are optimized for real-time object detection with state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. They support a wide range of annotations, such as the fine-grained ones provided by the LVIS dataset, making them ideal for advanced computer vision applications. Moreover, Ultralytics offers seamless integration with various [training](../../modes/train.md), [validation](../../modes/val.md), and [prediction](../../modes/predict.md) modes, ensuring efficient model development and deployment. +Ultralytics YOLO models, including the latest YOLO11, are optimized for real-time object detection with state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. They support a wide range of annotations, such as the fine-grained ones provided by the LVIS dataset, making them ideal for advanced computer vision applications. Moreover, Ultralytics offers seamless integration with various [training](../../modes/train.md), [validation](../../modes/val.md), and [prediction](../../modes/predict.md) modes, ensuring efficient model development and deployment. ### Can I see some sample annotations from the LVIS dataset? diff --git a/docs/en/datasets/detect/medical-pills.md b/docs/en/datasets/detect/medical-pills.md new file mode 100644 index 00000000000..c32aabf2f72 --- /dev/null +++ b/docs/en/datasets/detect/medical-pills.md @@ -0,0 +1,147 @@ +--- +comments: true +description: Explore the medical-pills detection dataset with labeled images. Essential for training AI models for pharmaceutical identification and automation. +keywords: medical-pills dataset, pill detection, pharmaceutical imaging, AI in healthcare, computer vision, object detection, medical automation, dataset for training +--- + +# Medical Pills Dataset + +Open Medical Pills Dataset In Colab + +The medical-pills detection dataset is a proof-of-concept (POC) dataset, carefully curated to demonstrate the potential of AI in pharmaceutical applications. It contains labeled images specifically designed to train [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) [models](https://docs.ultralytics.com/models/) for identifying medical-pills. + +

+
+ +
+ Watch: How to train Ultralytics YOLO11 Model on Medical Pills Detection Dataset in Google Colab +

+ +This dataset serves as a foundational resource for automating essential [tasks](https://docs.ultralytics.com/tasks/) such as quality control, packaging automation, and efficient sorting in pharmaceutical workflows. By integrating this dataset into projects, researchers and developers can explore innovative [solutions](https://docs.ultralytics.com/solutions/) that enhance [accuracy](https://www.ultralytics.com/glossary/accuracy), streamline operations, and ultimately contribute to improved healthcare outcomes. + +## Dataset Structure + +The medical-pills dataset is divided into two subsets: + +- **Training set**: Consisting of 92 images, each annotated with the class `pill`. +- **Validation set**: Comprising 23 images with corresponding annotations. + +## Applications + +Using computer vision for medical-pills detection enables automation in the pharmaceutical industry, supporting tasks like: + +- **Pharmaceutical Sorting**: Automating the sorting of pills based on size, shape, or color to enhance production efficiency. +- **AI Research and Development**: Serving as a benchmark for developing and testing computer vision algorithms in pharmaceutical use cases. +- **Digital Inventory Systems**: Powering smart inventory solutions by integrating automated pill recognition for real-time stock monitoring and replenishment planning. + +## Dataset YAML + +A YAML configuration file is provided to define the dataset's structure, including paths and classes. For the medical-pills dataset, the `medical-pills.yaml` file can be accessed at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/medical-pills.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/medical-pills.yaml). + +!!! example "ultralytics/cfg/datasets/medical-pills.yaml" + + ```yaml + --8<-- "ultralytics/cfg/datasets/medical-pills.yaml" + ``` + +## Usage + +To train a YOLO11n model on the medical-pills dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the following examples. For detailed arguments, refer to the model's [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) + + # Train the model + results = model.train(data="medical-pills.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo detect train data=medical-pills.yaml model=yolo11n.pt epochs=100 imgsz=640 + ``` + +!!! example "Inference Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("path/to/best.pt") # load a fine-tuned model + + # Inference using the model + results = model.predict("https://ultralytics.com/assets/medical-pills-sample.jpg") + ``` + + === "CLI" + + ```bash + # Start prediction with a fine-tuned *.pt model + yolo detect predict model='path/to/best.pt' imgsz=640 source="https://ultralytics.com/assets/medical-pills-sample.jpg" + ``` + +## Sample Images and Annotations + +The medical-pills dataset features labeled images showcasing the diversity of pills. Below is an example of a labeled image from the dataset: + +![Medical-pills dataset sample image](https://github.com/ultralytics/docs/releases/download/0/medical-pills-dataset-sample-image.avif) + +- **Mosaiced Image**: Displayed is a training batch comprising mosaiced dataset images. Mosaicing enhances training diversity by consolidating multiple images into one, improving model generalization. + +## Citations and Acknowledgments + +The dataset is available under the [AGPL-3.0 License](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). + +If you use the Medical-pills dataset in your research or development work, please cite it using the mentioned details: + +!!! quote "" + + === "BibTeX" + + ```bibtex + @dataset{Jocher_Ultralytics_Datasets_2024, + author = {Jocher, Glenn and Rizwan, Muhammad}, + license = {AGPL-3.0}, + month = {Dec}, + title = {Ultralytics Datasets: Medical-pills Detection Dataset}, + url = {https://docs.ultralytics.com/datasets/detect/medical-pills/}, + version = {1.0.0}, + year = {2024} + } + ``` + +## FAQ + +### What is the structure of the medical-pills dataset? + +The dataset includes 92 images for training and 23 images for validation. Each image is annotated with the class `pill`, enabling effective training and evaluation of models. + +### How can I train a YOLO11 model on the medical-pills dataset? + +You can train a YOLO11 model for 100 epochs with an image size of 640px using the Python or CLI methods provided. Refer to the [Training Example](#usage) section for detailed instructions. + +### What are the benefits of using the medical-pills dataset in AI projects? + +The dataset enables automation in pill detection, contributing to counterfeit prevention, quality assurance, and pharmaceutical process optimization. + +### How do I perform inference on the medical-pills dataset? + +Inference can be done using Python or CLI methods with a fine-tuned YOLO11 model. Refer to the [Inference Example](#usage) section for code snippets. + +### Where can I find the YAML configuration file for the medical-pills dataset? + +The YAML file is available at [medical-pills.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/medical-pills.yaml), containing dataset paths, classes, and additional configuration details. diff --git a/docs/en/datasets/detect/objects365.md b/docs/en/datasets/detect/objects365.md index 49947617afe..96e6f3140c7 100644 --- a/docs/en/datasets/detect/objects365.md +++ b/docs/en/datasets/detect/objects365.md @@ -1,7 +1,7 @@ --- comments: true description: Explore the Objects365 Dataset with 2M images and 30M bounding boxes across 365 categories. Enhance your object detection models with diverse, high-quality data. -keywords: Objects365 dataset, object detection, machine learning, deep learning, computer vision, annotated images, bounding boxes, YOLOv8, high-resolution images, dataset configuration +keywords: Objects365 dataset, object detection, machine learning, deep learning, computer vision, annotated images, bounding boxes, YOLO11, high-resolution images, dataset configuration --- # Objects365 Dataset @@ -38,7 +38,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the Objects365 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -48,7 +48,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="Objects365.yaml", epochs=100, imgsz=640) @@ -58,7 +58,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 [epochs](https://www. ```bash # Start training from a pretrained *.pt model - yolo detect train data=Objects365.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=Objects365.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -97,9 +97,9 @@ We would like to acknowledge the team of researchers who created and maintain th The [Objects365 dataset](https://www.objects365.org/) is designed for object detection tasks in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and computer vision. It provides a large-scale, high-quality dataset with 2 million annotated images and 30 million bounding boxes across 365 categories. Leveraging such a diverse dataset helps improve the performance and generalization of object detection models, making it invaluable for research and development in the field. -### How can I train a YOLOv8 model on the Objects365 dataset? +### How can I train a YOLO11 model on the Objects365 dataset? -To train a YOLOv8n model using the Objects365 dataset for 100 epochs with an image size of 640, follow these instructions: +To train a YOLO11n model using the Objects365 dataset for 100 epochs with an image size of 640, follow these instructions: !!! example "Train Example" @@ -109,7 +109,7 @@ To train a YOLOv8n model using the Objects365 dataset for 100 epochs with an ima from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="Objects365.yaml", epochs=100, imgsz=640) @@ -119,7 +119,7 @@ To train a YOLOv8n model using the Objects365 dataset for 100 epochs with an ima ```bash # Start training from a pretrained *.pt model - yolo detect train data=Objects365.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=Objects365.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` Refer to the [Training](../../modes/train.md) page for a comprehensive list of available arguments. diff --git a/docs/en/datasets/detect/open-images-v7.md b/docs/en/datasets/detect/open-images-v7.md index 1e6f1f7e4fb..1751a2d0a43 100644 --- a/docs/en/datasets/detect/open-images-v7.md +++ b/docs/en/datasets/detect/open-images-v7.md @@ -1,7 +1,7 @@ --- comments: true -description: Explore the comprehensive Open Images V7 dataset by Google. Learn about its annotations, applications, and use YOLOv8 pretrained models for computer vision tasks. -keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object detection, image segmentation, visual relationships, AI research, Ultralytics +description: Explore the comprehensive Open Images V7 dataset by Google. Learn about its annotations, applications, and use YOLO11 pretrained models for computer vision tasks. +keywords: Open Images V7, Google dataset, computer vision, YOLO11 models, object detection, image segmentation, visual relationships, AI research, Ultralytics --- # Open Images V7 Dataset @@ -16,7 +16,7 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object allowfullscreen>
- Watch: [Object Detection](https://www.ultralytics.com/glossary/object-detection) using OpenImagesV7 Pretrained Model + Watch: Object Detection using OpenImagesV7 Pretrained Model

## Open Images V7 Pretrained Models @@ -29,6 +29,35 @@ keywords: Open Images V7, Google dataset, computer vision, YOLOv8 models, object | [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 | | [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 | +You can use these pretrained for inference or fine-tuning as follows. + +!!! example "Pretrained Model Usage Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load an Open Images Dataset V7 pretrained YOLOv8n model + model = YOLO("yolov8n-oiv7.pt") + + # Run prediction + results = model.predict(source="image.jpg") + + # Start training from the pretrained checkpoint + results = model.train(data="coco8.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Predict using an Open Images Dataset V7 pretrained model + yolo detect predict source=image.jpg model=yolov8n-oiv7.pt + + # Start training from an Open Images Dataset V7 pretrained checkpoint + yolo detect train data=coco8.yaml model=yolov8n-oiv7.pt epochs=100 imgsz=640 + ``` + ![Open Images V7 classes visual](https://github.com/ultralytics/docs/releases/download/0/open-images-v7-classes-visual.avif) ## Key Features @@ -69,7 +98,7 @@ Typically, datasets come with a YAML (Yet Another Markup Language) file that del ## Usage -To train a YOLOv8n model on the Open Images V7 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the Open Images V7 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! warning @@ -87,8 +116,8 @@ To train a YOLOv8n model on the Open Images V7 dataset for 100 [epochs](https:// ```python from ultralytics import YOLO - # Load a COCO-pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a COCO-pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Train the model on the Open Images V7 dataset results = model.train(data="open-images-v7.yaml", epochs=100, imgsz=640) @@ -97,8 +126,8 @@ To train a YOLOv8n model on the Open Images V7 dataset for 100 [epochs](https:// === "CLI" ```bash - # Train a COCO-pretrained YOLOv8n model on the Open Images V7 dataset - yolo detect train data=open-images-v7.yaml model=yolov8n.pt epochs=100 imgsz=640 + # Train a COCO-pretrained YOLO11n model on the Open Images V7 dataset + yolo detect train data=open-images-v7.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -136,9 +165,9 @@ A heartfelt acknowledgment goes out to the Google AI team for creating and maint Open Images V7 is an extensive and versatile dataset created by Google, designed to advance research in computer vision. It includes image-level labels, object bounding boxes, object segmentation masks, visual relationships, and localized narratives, making it ideal for various computer vision tasks such as object detection, segmentation, and relationship detection. -### How do I train a YOLOv8 model on the Open Images V7 dataset? +### How do I train a YOLO11 model on the Open Images V7 dataset? -To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python and CLI commands. Here's an example of training the YOLOv8n model for 100 epochs with an image size of 640: +To train a YOLO11 model on the Open Images V7 dataset, you can use both Python and CLI commands. Here's an example of training the YOLO11n model for 100 epochs with an image size of 640: !!! example "Train Example" @@ -147,8 +176,8 @@ To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python a ```python from ultralytics import YOLO - # Load a COCO-pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a COCO-pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Train the model on the Open Images V7 dataset results = model.train(data="open-images-v7.yaml", epochs=100, imgsz=640) @@ -158,8 +187,8 @@ To train a YOLOv8 model on the Open Images V7 dataset, you can use both Python a === "CLI" ```bash - # Train a COCO-pretrained YOLOv8n model on the Open Images V7 dataset - yolo detect train data=open-images-v7.yaml model=yolov8n.pt epochs=100 imgsz=640 + # Train a COCO-pretrained YOLO11n model on the Open Images V7 dataset + yolo detect train data=open-images-v7.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For more details on arguments and settings, refer to the [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/roboflow-100.md b/docs/en/datasets/detect/roboflow-100.md index 6b3c540e030..ba228242a65 100644 --- a/docs/en/datasets/detect/roboflow-100.md +++ b/docs/en/datasets/detect/roboflow-100.md @@ -67,7 +67,7 @@ Dataset benchmarking evaluates machine learning model performance on specific da if path.exists(): # Fix YAML file and run training benchmark.fix_yaml(str(path)) - os.system(f"yolo detect train data={path} model=yolov8s.pt epochs=1 batch=16") + os.system(f"yolo detect train data={path} model=yolo11s.pt epochs=1 batch=16") # Run validation and evaluate os.system(f"yolo detect val data={path} model=runs/detect/train/weights/best.pt > {val_log_file} 2>&1") @@ -165,7 +165,7 @@ To use the Roboflow 100 dataset for benchmarking, you can implement the RF100Ben if path.exists(): # Fix YAML file and run training benchmark.fix_yaml(str(path)) - os.system(f"yolo detect train data={path} model=yolov8s.pt epochs=1 batch=16") + os.system(f"yolo detect train data={path} model=yolo11n.pt epochs=1 batch=16") # Run validation and evaluate os.system(f"yolo detect val data={path} model=runs/detect/train/weights/best.pt > {val_log_file} 2>&1") diff --git a/docs/en/datasets/detect/signature.md b/docs/en/datasets/detect/signature.md index 5746d57e026..834e711a45b 100644 --- a/docs/en/datasets/detect/signature.md +++ b/docs/en/datasets/detect/signature.md @@ -1,7 +1,7 @@ --- comments: true description: Discover the Signature Detection Dataset for training models to identify and verify human signatures in various documents. Perfect for document verification and fraud prevention. -keywords: Signature Detection Dataset, document verification, fraud detection, computer vision, YOLOv8, Ultralytics, annotated signatures, training dataset +keywords: Signature Detection Dataset, document verification, fraud detection, computer vision, YOLO11, Ultralytics, annotated signatures, training dataset --- # Signature Detection Dataset @@ -31,7 +31,7 @@ A YAML (Yet Another Markup Language) file defines the dataset configuration, inc ## Usage -To train a YOLOv8n model on the signature detection dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page. +To train a YOLO11n model on the signature detection dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page. !!! example "Train Example" @@ -41,7 +41,7 @@ To train a YOLOv8n model on the signature detection dataset for 100 [epochs](htt from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="signature.yaml", epochs=100, imgsz=640) @@ -51,7 +51,7 @@ To train a YOLOv8n model on the signature detection dataset for 100 [epochs](htt ```bash # Start training from a pretrained *.pt model - yolo detect train data=signature.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=signature.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` !!! example "Inference Example" @@ -95,9 +95,9 @@ The dataset has been released available under the [AGPL-3.0 License](https://git The Signature Detection Dataset is a collection of annotated images aimed at detecting human signatures within various document types. It can be applied in computer vision tasks such as [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking, primarily for document verification, fraud detection, and archival research. This dataset helps train models to recognize signatures in different contexts, making it valuable for both research and practical applications. -### How do I train a YOLOv8n model on the Signature Detection Dataset? +### How do I train a YOLO11n model on the Signature Detection Dataset? -To train a YOLOv8n model on the Signature Detection Dataset, follow these steps: +To train a YOLO11n model on the Signature Detection Dataset, follow these steps: 1. Download the `signature.yaml` dataset configuration file from [signature.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/signature.yaml). 2. Use the following Python script or CLI command to start training: @@ -110,7 +110,7 @@ To train a YOLOv8n model on the Signature Detection Dataset, follow these steps: from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model results = model.train(data="signature.yaml", epochs=100, imgsz=640) @@ -119,7 +119,7 @@ To train a YOLOv8n model on the Signature Detection Dataset, follow these steps: === "CLI" ```bash - yolo detect train data=signature.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=signature.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For more details, refer to the [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/sku-110k.md b/docs/en/datasets/detect/sku-110k.md index c6cddc483fa..90557bb6b22 100644 --- a/docs/en/datasets/detect/sku-110k.md +++ b/docs/en/datasets/detect/sku-110k.md @@ -6,7 +6,7 @@ keywords: SKU-110k, dataset, object detection, retail shelf images, deep learnin # SKU-110k Dataset -The [SKU-110k](https://github.com/eg4000/SKU110K_CVPR19) dataset is a collection of densely packed retail shelf images, designed to support research in [object detection](https://www.ultralytics.com/glossary/object-detection) tasks. Developed by Eran Goldman et al., the dataset contains over 110,000 unique store keeping unit (SKU) categories with densely packed objects, often looking similar or even identical, positioned in close proximity. +The [SKU-110k](https://github.com/eg4000/SKU110K_CVPR19) dataset is a collection of densely packed retail shelf images, designed to support research in [object detection](https://www.ultralytics.com/glossary/object-detection) tasks. Developed by Eran Goldman et al., the dataset contains over 110,000 unique store keeping unit (SKU) categories with densely packed objects, often looking similar or even identical, positioned in proximity.


@@ -51,7 +51,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the SKU-110K dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -61,7 +61,7 @@ To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ul from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="SKU-110K.yaml", epochs=100, imgsz=640) @@ -71,7 +71,7 @@ To train a YOLOv8n model on the SKU-110K dataset for 100 [epochs](https://www.ul ```bash # Start training from a pretrained *.pt model - yolo detect train data=SKU-110K.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=SKU-110K.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -107,11 +107,11 @@ We would like to acknowledge Eran Goldman et al. for creating and maintaining th ### What is the SKU-110k dataset and why is it important for object detection? -The SKU-110k dataset consists of densely packed retail shelf images designed to aid research in object detection tasks. Developed by Eran Goldman et al., it includes over 110,000 unique SKU categories. Its importance lies in its ability to challenge state-of-the-art object detectors with diverse object appearances and close proximity, making it an invaluable resource for researchers and practitioners in computer vision. Learn more about the dataset's structure and applications in our [SKU-110k Dataset](#sku-110k-dataset) section. +The SKU-110k dataset consists of densely packed retail shelf images designed to aid research in object detection tasks. Developed by Eran Goldman et al., it includes over 110,000 unique SKU categories. Its importance lies in its ability to challenge state-of-the-art object detectors with diverse object appearances and proximity, making it an invaluable resource for researchers and practitioners in computer vision. Learn more about the dataset's structure and applications in our [SKU-110k Dataset](#sku-110k-dataset) section. -### How do I train a YOLOv8 model using the SKU-110k dataset? +### How do I train a YOLO11 model using the SKU-110k dataset? -Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an example to train a YOLOv8n model for 100 epochs with an image size of 640: +Training a YOLO11 model on the SKU-110k dataset is straightforward. Here's an example to train a YOLO11n model for 100 epochs with an image size of 640: !!! example "Train Example" @@ -121,7 +121,7 @@ Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an ex from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="SKU-110K.yaml", epochs=100, imgsz=640) @@ -132,7 +132,7 @@ Training a YOLOv8 model on the SKU-110k dataset is straightforward. Here's an ex ```bash # Start training from a pretrained *.pt model - yolo detect train data=SKU-110K.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=SKU-110K.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/visdrone.md b/docs/en/datasets/detect/visdrone.md index 99b182cb4e0..cbc89d835c8 100644 --- a/docs/en/datasets/detect/visdrone.md +++ b/docs/en/datasets/detect/visdrone.md @@ -47,7 +47,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the VisDrone dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -57,7 +57,7 @@ To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ul from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="VisDrone.yaml", epochs=100, imgsz=640) @@ -67,7 +67,7 @@ To train a YOLOv8n model on the VisDrone dataset for 100 [epochs](https://www.ul ```bash # Start training from a pretrained *.pt model - yolo detect train data=VisDrone.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=VisDrone.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -113,9 +113,9 @@ The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large- - **Diversity**: Collected across 14 cities, in urban and rural settings, under different weather and lighting conditions. - **Tasks**: Split into five main tasksโ€”object detection in images and videos, single-object and multi-object tracking, and crowd counting. -### How can I use the VisDrone Dataset to train a YOLOv8 model with Ultralytics? +### How can I use the VisDrone Dataset to train a YOLO11 model with Ultralytics? -To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image size of 640, you can follow these steps: +To train a YOLO11 model on the VisDrone dataset for 100 epochs with an image size of 640, you can follow these steps: !!! example "Train Example" @@ -125,7 +125,7 @@ To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image siz from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model results = model.train(data="VisDrone.yaml", epochs=100, imgsz=640) @@ -135,7 +135,7 @@ To train a YOLOv8 model on the VisDrone dataset for 100 epochs with an image siz ```bash # Start training from a pretrained *.pt model - yolo detect train data=VisDrone.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=VisDrone.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For additional configuration options, please refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/detect/voc.md b/docs/en/datasets/detect/voc.md index 7dc67fb5a44..449810e6980 100644 --- a/docs/en/datasets/detect/voc.md +++ b/docs/en/datasets/detect/voc.md @@ -39,7 +39,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n model on the VOC dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -49,7 +49,7 @@ To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultraly from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="VOC.yaml", epochs=100, imgsz=640) @@ -59,7 +59,7 @@ To train a YOLOv8n model on the VOC dataset for 100 [epochs](https://www.ultraly ```bash # Start training from a pretrained *.pt model - yolo detect train data=VOC.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=VOC.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -99,9 +99,9 @@ We would like to acknowledge the PASCAL VOC Consortium for creating and maintain The [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) (Visual Object Classes) dataset is a renowned benchmark for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification in computer vision. It includes comprehensive annotations like bounding boxes, class labels, and segmentation masks across 20 different object categories. Researchers use it widely to evaluate the performance of models like Faster R-CNN, YOLO, and Mask R-CNN due to its standardized evaluation metrics such as mean Average Precision (mAP). -### How do I train a YOLOv8 model using the VOC dataset? +### How do I train a YOLO11 model using the VOC dataset? -To train a YOLOv8 model with the VOC dataset, you need the dataset configuration in a YAML file. Here's an example to start training a YOLOv8n model for 100 epochs with an image size of 640: +To train a YOLO11 model with the VOC dataset, you need the dataset configuration in a YAML file. Here's an example to start training a YOLO11n model for 100 epochs with an image size of 640: !!! example "Train Example" @@ -111,7 +111,7 @@ To train a YOLOv8 model with the VOC dataset, you need the dataset configuration from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="VOC.yaml", epochs=100, imgsz=640) @@ -121,7 +121,7 @@ To train a YOLOv8 model with the VOC dataset, you need the dataset configuration ```bash # Start training from a pretrained *.pt model - yolo detect train data=VOC.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=VOC.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ### What are the primary challenges included in the VOC dataset? diff --git a/docs/en/datasets/detect/xview.md b/docs/en/datasets/detect/xview.md index df8e493357f..41b6c20ad8e 100644 --- a/docs/en/datasets/detect/xview.md +++ b/docs/en/datasets/detect/xview.md @@ -52,7 +52,7 @@ To train a model on the xView dataset for 100 [epochs](https://www.ultralytics.c from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="xView.yaml", epochs=100, imgsz=640) @@ -62,7 +62,7 @@ To train a model on the xView dataset for 100 [epochs](https://www.ultralytics.c ```bash # Start training from a pretrained *.pt model - yolo detect train data=xView.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=xView.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -114,7 +114,7 @@ To train a model on the xView dataset using Ultralytics YOLO, follow these steps from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="xView.yaml", epochs=100, imgsz=640) @@ -125,7 +125,7 @@ To train a model on the xView dataset using Ultralytics YOLO, follow these steps ```bash # Start training from a pretrained *.pt model - yolo detect train data=xView.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=xView.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For detailed arguments and settings, refer to the model [Training](../../modes/train.md) page. diff --git a/docs/en/datasets/explorer/api.md b/docs/en/datasets/explorer/api.md index 4c550991367..f20489d444b 100644 --- a/docs/en/datasets/explorer/api.md +++ b/docs/en/datasets/explorer/api.md @@ -6,6 +6,10 @@ keywords: Ultralytics, Explorer API, dataset exploration, SQL queries, similarit # Ultralytics Explorer API +!!! warning "Community Note โš ๏ธ" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐Ÿš€ + ## Introduction Open In Colab @@ -36,7 +40,7 @@ pip install ultralytics[explorer] from ultralytics import Explorer # Create an Explorer object -explorer = Explorer(data="coco128.yaml", model="yolov8n.pt") +explorer = Explorer(data="coco128.yaml", model="yolo11n.pt") # Create embeddings for your dataset explorer.create_embeddings_table() @@ -75,7 +79,7 @@ You get a pandas dataframe with the `limit` number of most similar data points t from ultralytics import Explorer # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() similar = exp.get_similar(img="https://ultralytics.com/images/bus.jpg", limit=10) @@ -95,7 +99,7 @@ You get a pandas dataframe with the `limit` number of most similar data points t from ultralytics import Explorer # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() similar = exp.get_similar(idx=1, limit=10) @@ -118,7 +122,7 @@ You can also plot the similar images using the `plot_similar` method. This metho from ultralytics import Explorer # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() plt = exp.plot_similar(img="https://ultralytics.com/images/bus.jpg", limit=10) @@ -131,7 +135,7 @@ You can also plot the similar images using the `plot_similar` method. This metho from ultralytics import Explorer # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() plt = exp.plot_similar(idx=1, limit=10) @@ -150,7 +154,7 @@ Note: This works using LLMs under the hood so the results are probabilistic and from ultralytics.data.explorer import plot_query_result # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() df = exp.ask_ai("show me 100 images with exactly one person and 2 dogs. There can be other objects too") @@ -171,7 +175,7 @@ You can run SQL queries on your dataset using the `sql_query` method. This metho from ultralytics import Explorer # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() df = exp.sql_query("WHERE labels LIKE '%person%' AND labels LIKE '%dog%'") @@ -188,7 +192,7 @@ You can also plot the results of a SQL query using the `plot_sql_query` method. from ultralytics import Explorer # create an Explorer object - exp = Explorer(data="coco128.yaml", model="yolov8n.pt") + exp = Explorer(data="coco128.yaml", model="yolo11n.pt") exp.create_embeddings_table() # plot the SQL Query @@ -235,7 +239,7 @@ Here are some examples of what you can do with the table: ```python from ultralytics import Explorer - exp = Explorer(model="yolov8n.pt") + exp = Explorer(model="yolo11n.pt") exp.create_embeddings_table() table = exp.table @@ -361,7 +365,7 @@ You can use the Ultralytics Explorer API to perform similarity searches by creat from ultralytics import Explorer # Create an Explorer object -explorer = Explorer(data="coco128.yaml", model="yolov8n.pt") +explorer = Explorer(data="coco128.yaml", model="yolo11n.pt") explorer.create_embeddings_table() # Search for similar images to a given image @@ -383,7 +387,7 @@ The Ask AI feature allows users to filter datasets using natural language querie from ultralytics import Explorer # Create an Explorer object -explorer = Explorer(data="coco128.yaml", model="yolov8n.pt") +explorer = Explorer(data="coco128.yaml", model="yolo11n.pt") explorer.create_embeddings_table() # Query with natural language diff --git a/docs/en/datasets/explorer/dashboard.md b/docs/en/datasets/explorer/dashboard.md index 3bc3a21e469..92c1ba78b35 100644 --- a/docs/en/datasets/explorer/dashboard.md +++ b/docs/en/datasets/explorer/dashboard.md @@ -6,6 +6,10 @@ keywords: Ultralytics Explorer GUI, semantic search, vector similarity, SQL quer # Explorer GUI +!!! warning "Community Note โš ๏ธ" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐Ÿš€ + Explorer GUI is like a playground build using [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries and even search using natural language using our ask AI feature powered by LLMs.

diff --git a/docs/en/datasets/explorer/explorer.ipynb b/docs/en/datasets/explorer/explorer.ipynb deleted file mode 100644 index fbca01737c3..00000000000 --- a/docs/en/datasets/explorer/explorer.ipynb +++ /dev/null @@ -1,604 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "aa923c26-81c8-4565-9277-1cb686e3702e", - "metadata": { - "id": "aa923c26-81c8-4565-9277-1cb686e3702e" - }, - "source": [ - "# VOC Exploration Example\n", - "

\n", - "\n", - " \n", - " \n", - "\n", - " [ไธญๆ–‡](https://docs.ultralytics.com/zh/) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko/) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja/) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi/) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar/)\n", - "\n", - " \"Run\n", - " \"Open\n", - " \"Open\n", - "\n", - "Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries.\n", - "\n", - "We hope that the resources in this notebook will help you get the most out of Ultralytics. Please browse the Explorer Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", - "\n", - "Try `yolo explorer` powered by Exlorer API\n", - "\n", - "Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser!\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f", - "metadata": { - "id": "2454d9ba-9db4-4b37-98e8-201ba285c92f" - }, - "source": [ - "## Setup\n", - "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41", - "metadata": { - "id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41" - }, - "outputs": [], - "source": [ - "%pip install ultralytics[explorer] openai\n", - "import ultralytics\n", - "\n", - "ultralytics.checks()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae602549-3419-4909-9f82-35cba515483f", - "metadata": { - "id": "ae602549-3419-4909-9f82-35cba515483f" - }, - "outputs": [], - "source": [ - "from ultralytics import Explorer" - ] - }, - { - "cell_type": "markdown", - "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c", - "metadata": { - "id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c" - }, - "source": [ - "## Similarity search\n", - "Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once and it is reused automatically.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "334619da-6deb-4b32-9fe0-74e0a79cee20", - "metadata": { - "id": "334619da-6deb-4b32-9fe0-74e0a79cee20" - }, - "outputs": [], - "source": [ - "exp = Explorer(\"VOC.yaml\", model=\"yolov8n.pt\")\n", - "exp.create_embeddings_table()" - ] - }, - { - "cell_type": "markdown", - "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d", - "metadata": { - "id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d" - }, - "source": [ - "One the embeddings table is built, you can get run semantic search in any of the following ways:\n", - "- On a given index / list of indices in the dataset like - `exp.get_similar(idx=[1,10], limit=10)`\n", - "- On any image/ list of images not in the dataset - `exp.get_similar(img=[\"path/to/img1\", \"path/to/img2\"], limit=10)`\n", - "In case of multiple inputs, the aggregade of their embeddings is used.\n", - "\n", - "You get a pandas dataframe with the `limit` number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b485f05b-d92d-42bc-8da7-5e361667b341", - "metadata": { - "id": "b485f05b-d92d-42bc-8da7-5e361667b341" - }, - "outputs": [], - "source": [ - "similar = exp.get_similar(idx=1, limit=10)\n", - "similar.head()" - ] - }, - { - "cell_type": "markdown", - "id": "acf4b489-2161-4176-a1fe-d1d067d8083d", - "metadata": { - "id": "acf4b489-2161-4176-a1fe-d1d067d8083d" - }, - "source": [ - "You can use the also plot the similar samples directly using the `plot_similar` util\n", - "

\n", - "\n", - " \n", - "

\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7", - "metadata": { - "id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7" - }, - "outputs": [], - "source": [ - "exp.plot_similar(idx=6500, limit=20)\n", - "# exp.plot_similar(idx=[100,101], limit=10) # Can also pass list of idxs or imgs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "260e09bf-4960-4089-a676-cb0e76ff3c0d", - "metadata": { - "id": "260e09bf-4960-4089-a676-cb0e76ff3c0d" - }, - "outputs": [], - "source": [ - "exp.plot_similar(\n", - " img=\"https://ultralytics.com/images/bus.jpg\", limit=10, labels=False\n", - ") # Can also pass any external images" - ] - }, - { - "cell_type": "markdown", - "id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a", - "metadata": { - "id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a" - }, - "source": [ - "

\n", - "\n", - "\n", - "

" - ] - }, - { - "cell_type": "markdown", - "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553", - "metadata": { - "id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553" - }, - "source": [ - "## 2. Ask AI: Search or filter with Natural Language\n", - "You can prompt the Explorer object with the kind of data points you want to see and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None.\n", - "

\n", - "\"Screenshot\n", - "\n", - "

\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c", - "metadata": { - "id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c" - }, - "outputs": [], - "source": [ - "df = exp.ask_ai(\"show me images containing more than 10 objects with at least 2 persons\")\n", - "df.head(5)" - ] - }, - { - "cell_type": "markdown", - "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f", - "metadata": { - "id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f" - }, - "source": [ - "for plotting these results you can use `plot_query_result` util\n", - "Example:\n", - "```\n", - "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n", - "Image.fromarray(plt)\n", - "```\n", - "

\n", - " \n", - "\n", - "

" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511", - "metadata": { - "id": "b1cfab84-9835-4da0-8e9a-42b30cf84511" - }, - "outputs": [], - "source": [ - "# plot\n", - "from PIL import Image\n", - "\n", - "from ultralytics.data.explorer import plot_query_result\n", - "\n", - "plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n", - "Image.fromarray(plt)" - ] - }, - { - "cell_type": "markdown", - "id": "35315ae6-d827-40e4-8813-279f97a83b34", - "metadata": { - "id": "35315ae6-d827-40e4-8813-279f97a83b34" - }, - "source": [ - "## 3. Run SQL queries on your Dataset!\n", - "Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries.\n", - "It accepts either of the formats:\n", - "- Queries beginning with \"WHERE\" will automatically select all columns. This can be thought of as a short-hand query\n", - "- You can also write full queries where you can specify which columns to select\n", - "\n", - "This can be used to investigate model performance and specific data points. For example:\n", - "- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog.\n", - "\n", - "You can combine SQL query and semantic search to filter down to specific type of results\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf", - "metadata": { - "id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf" - }, - "outputs": [], - "source": [ - "table = exp.sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\")\n", - "table" - ] - }, - { - "cell_type": "markdown", - "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab", - "metadata": { - "id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab" - }, - "source": [ - "Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query`\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18b977e7-d048-4b22-b8c4-084a03b04f23", - "metadata": { - "id": "18b977e7-d048-4b22-b8c4-084a03b04f23" - }, - "outputs": [], - "source": [ - "exp.plot_sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\", labels=True)" - ] - }, - { - "cell_type": "markdown", - "id": "f26804c5-840b-4fd1-987f-e362f29e3e06", - "metadata": { - "id": "f26804c5-840b-4fd1-987f-e362f29e3e06" - }, - "source": [ - "## 3. Working with embeddings Table (Advanced)\n", - "Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre and post filters, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a", - "metadata": { - "id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a" - }, - "outputs": [], - "source": [ - "table = exp.table\n", - "table.schema" - ] - }, - { - "cell_type": "markdown", - "id": "238db292-8610-40b3-9af7-dfd6be174892", - "metadata": { - "id": "238db292-8610-40b3-9af7-dfd6be174892" - }, - "source": [ - "### Run raw queries\n", - "Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features.\n", - "\n", - "A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector.\n", - "\n", - "Metric\n", - "In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics:\n", - "- L2\n", - "- Cosine\n", - "- Dot\n", - "Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/)\n", - "\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d74430fe-5aee-45a1-8863-3f2c31338792", - "metadata": { - "id": "d74430fe-5aee-45a1-8863-3f2c31338792" - }, - "outputs": [], - "source": [ - "dummy_img_embedding = [i for i in range(256)]\n", - "table.search(dummy_img_embedding).limit(5).to_pandas()" - ] - }, - { - "cell_type": "markdown", - "id": "587486b4-0d19-4214-b994-f032fb2e8eb5", - "metadata": { - "id": "587486b4-0d19-4214-b994-f032fb2e8eb5" - }, - "source": [ - "### Inter-conversion to popular data formats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c", - "metadata": { - "id": "bb2876ea-999b-4eba-96bc-c196ba02c41c" - }, - "outputs": [], - "source": [ - "df = table.to_pandas()\n", - "pa_table = table.to_arrow()" - ] - }, - { - "cell_type": "markdown", - "id": "42659d63-ad76-49d6-8dfc-78d77278db72", - "metadata": { - "id": "42659d63-ad76-49d6-8dfc-78d77278db72" - }, - "source": [ - "### Work with Embeddings\n", - "You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca", - "metadata": { - "id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "embeddings = table.to_pandas()[\"vector\"].tolist()\n", - "embeddings = np.array(embeddings)" - ] - }, - { - "cell_type": "markdown", - "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602", - "metadata": { - "id": "e8df0a49-9596-4399-954b-b8ae1fd7a602" - }, - "source": [ - "### Scatterplot\n", - "One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example\n", - "\n", - "\"Screenshot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8", - "metadata": { - "id": "d9a150e8-8092-41b3-82f8-2247f8187fc8" - }, - "outputs": [], - "source": [ - "!pip install scikit-learn --q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "196079c3-45a9-4325-81ab-af79a881e37a", - "metadata": { - "id": "196079c3-45a9-4325-81ab-af79a881e37a" - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from sklearn.decomposition import PCA\n", - "\n", - "# Reduce dimensions using PCA to 3 components for visualization in 3D\n", - "pca = PCA(n_components=3)\n", - "reduced_data = pca.fit_transform(embeddings)\n", - "\n", - "# Create a 3D scatter plot using Matplotlib's Axes3D\n", - "fig = plt.figure(figsize=(8, 6))\n", - "ax = fig.add_subplot(111, projection=\"3d\")\n", - "\n", - "# Scatter plot\n", - "ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)\n", - "ax.set_title(\"3D Scatter Plot of Reduced 256-Dimensional Data (PCA)\")\n", - "ax.set_xlabel(\"Component 1\")\n", - "ax.set_ylabel(\"Component 2\")\n", - "ax.set_zlabel(\"Component 3\")\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "1c843c23-e3f2-490e-8d6c-212fa038a149", - "metadata": { - "id": "1c843c23-e3f2-490e-8d6c-212fa038a149" - }, - "source": [ - "## 4. Similarity Index\n", - "Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation-\n", - "* It tries to estimate how similar each data point is with the rest of the dataset.\n", - "* It does that by counting how many image embeddings lie closer than `max_dist` to the current image in the generated embedding space, considering `top_k` similar images at a time.\n", - "\n", - "For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`.\n", - "Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look at the plot first\n", - "\"Screenshot\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc", - "metadata": { - "id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc" - }, - "outputs": [], - "source": [ - "exp.plot_similarity_index(max_dist=0.2, top_k=0.01)" - ] - }, - { - "cell_type": "markdown", - "id": "28228a9a-b727-45b5-8ca7-8db662c0b937", - "metadata": { - "id": "28228a9a-b727-45b5-8ca7-8db662c0b937" - }, - "source": [ - "Now let's look at the output of the operation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a", - "metadata": { - "id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d", - "metadata": { - "id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d" - }, - "outputs": [], - "source": [ - "sim_idx" - ] - }, - { - "cell_type": "markdown", - "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4", - "metadata": { - "id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4" - }, - "source": [ - "Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58d2557b-d401-43cf-937d-4f554c7bc808", - "metadata": { - "id": "58d2557b-d401-43cf-937d-4f554c7bc808" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "sim_count = np.array(sim_idx[\"count\"])\n", - "sim_idx[\"im_file\"][sim_count > 30]" - ] - }, - { - "cell_type": "markdown", - "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e", - "metadata": { - "id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e" - }, - "source": [ - "You should see something like this\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2", - "metadata": { - "id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2" - }, - "outputs": [], - "source": [ - "exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/en/datasets/explorer/explorer.md b/docs/en/datasets/explorer/explorer.md new file mode 100644 index 00000000000..cc4a1e7e849 --- /dev/null +++ b/docs/en/datasets/explorer/explorer.md @@ -0,0 +1,278 @@ +--- +comments: true +description: Dive into advanced data exploration with Ultralytics Explorer. Perform semantic searches, execute SQL queries, and leverage AI-powered natural language insights for seamless data analysis. +keywords: Ultralytics Explorer, data exploration, semantic search, vector similarity, SQL queries, AI, natural language queries, machine learning, OpenAI, LLMs, Ultralytics HUB +--- + +# VOC Exploration Example + + + +Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries. + +Try `yolo explorer` powered by Explorer API + +Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser! + +!!! warning "Community Note โš ๏ธ" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐Ÿš€ + +## Setup + +Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware. + +```bash +%pip install ultralytics[explorer] openai +yolo checks +``` + +## Similarity Search + +Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once, and it is reused automatically. + +```python +exp = Explorer("VOC.yaml", model="yolo11n.pt") +exp.create_embeddings_table() +``` + +One the embeddings table is built, you can get run semantic search in any of the following ways: + +- On a given index / list of indices in the dataset like - exp.get_similar(idx=[1,10], limit=10) +- On any image/ list of images not in the dataset - exp.get_similar(img=["path/to/img1", "path/to/img2"], limit=10) In case of multiple inputs, the aggregate of their embeddings is used. + +You get a pandas dataframe with the limit number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering + +![Similarity search table](https://github.com/ultralytics/docs/releases/download/0/similarity-search-table.avif) + +```python +# Search dataset by index +similar = exp.get_similar(idx=1, limit=10) +similar.head() +``` + +You can use the also plot the similar samples directly using the `plot_similar` util + +![Similarity search image 1](https://github.com/ultralytics/docs/releases/download/0/similarity-search-image-1.avif) + +```python +exp.plot_similar(idx=6500, limit=20) +exp.plot_similar(idx=[100, 101], limit=10) # Can also pass list of idxs or imgs + +exp.plot_similar(img="https://ultralytics.com/images/bus.jpg", limit=10, labels=False) # Can also pass external images +``` + +![Similarity search image 2](https://github.com/ultralytics/docs/releases/download/0/similarity-search-image-2.avif) + +## Ask AI: Search or filter with Natural Language + +You can prompt the Explorer object with the kind of data points you want to see, and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None. + +![Ask ai table](https://github.com/ultralytics/docs/releases/download/0/ask-ai-nlp-table.avif) + +```python +df = exp.ask_ai("show me images containing more than 10 objects with at least 2 persons") +df.head(5) +``` + +for plotting these results you can use `plot_query_result` util Example: + +```python +plt = plot_query_result(exp.ask_ai("show me 10 images containing exactly 2 persons")) +Image.fromarray(plt) +``` + +![Ask ai image 1](https://github.com/ultralytics/docs/releases/download/0/ask-ai-nlp-image-1.avif) + +```python +# plot +from PIL import Image + +from ultralytics.data.explorer import plot_query_result + +plt = plot_query_result(exp.ask_ai("show me 10 images containing exactly 2 persons")) +Image.fromarray(plt) +``` + +## Run SQL queries on your Dataset + +Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries. It accepts either of the formats: + +- Queries beginning with "WHERE" will automatically select all columns. This can be thought of as a shorthand query +- You can also write full queries where you can specify which columns to select + +This can be used to investigate model performance and specific data points. For example: + +- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog. + +You can combine SQL query and semantic search to filter down to specific type of results + +```python +table = exp.sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10") +exp.plot_sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10", labels=True) +``` + +![SQL queries table](https://github.com/ultralytics/docs/releases/download/0/sql-queries-table.avif) + +```python +table = exp.sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10") +print(table) +``` + +Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query` + +![SQL queries image 1](https://github.com/ultralytics/docs/releases/download/0/sql-query-image-1.avif) + +```python +exp.plot_sql_query("WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10", labels=True) +``` + +## Working with embeddings Table (Advanced) + +Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre- and post-filters, etc. + +```python +table = exp.table +print(table.schema) +``` + +### Run raw queriesยถ + +Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features. + +A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector. + +Metric In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics: + +- L2 +- Cosine +- Dot Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/) + +![Raw-queries-table](https://github.com/ultralytics/docs/releases/download/0/raw-queries-table.avif) + +```python +dummy_img_embedding = [i for i in range(256)] +table.search(dummy_img_embedding).limit(5).to_pandas() +``` + +### Interconversion to popular data formats + +```python +df = table.to_pandas() +pa_table = table.to_arrow() +``` + +### Work with Embeddings + +You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector` + +```python +import numpy as np + +embeddings = table.to_pandas()["vector"].tolist() +embeddings = np.array(embeddings) +``` + +### Scatterplot + +One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example + +![Scatterplot Example](https://github.com/ultralytics/docs/releases/download/0/scatterplot-sql-queries.avif) + +```python +import matplotlib.pyplot as plt +from sklearn.decomposition import PCA # pip install scikit-learn + +# Reduce dimensions using PCA to 3 components for visualization in 3D +pca = PCA(n_components=3) +reduced_data = pca.fit_transform(embeddings) + +# Create a 3D scatter plot using Matplotlib's Axes3D +fig = plt.figure(figsize=(8, 6)) +ax = fig.add_subplot(111, projection="3d") + +# Scatter plot +ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5) +ax.set_title("3D Scatter Plot of Reduced 256-Dimensional Data (PCA)") +ax.set_xlabel("Component 1") +ax.set_ylabel("Component 2") +ax.set_zlabel("Component 3") + +plt.show() +``` + +### Similarity Index + +Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation- + +- It tries to estimate how similar each data point is with the rest of the dataset. +- It does that by counting how many image embeddings lie closer than max_dist to the current image in the generated embedding space, considering top_k similar images at a time. + +For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`. Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look + +```python +sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01) +exp.plot_similarity_index(max_dist=0.2, top_k=0.01) +``` + +![Similarity Index](https://github.com/ultralytics/docs/releases/download/0/similarity-index.avif) + +at the plot first + +```python +exp.plot_similarity_index(max_dist=0.2, top_k=0.01) +``` + +Now let's look at the output of the operation + +```python +sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False) + +sim_idx +``` + +Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them. + +```python +import numpy as np + +sim_count = np.array(sim_idx["count"]) +sim_idx["im_file"][sim_count > 30] +``` + +You should see something like this + +![similarity-index-image](https://github.com/ultralytics/docs/releases/download/0/similarity-index-image.avif) + +```python +exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images +``` diff --git a/docs/en/datasets/explorer/index.md b/docs/en/datasets/explorer/index.md index d7e7ab66d4e..6db5fa16730 100644 --- a/docs/en/datasets/explorer/index.md +++ b/docs/en/datasets/explorer/index.md @@ -6,6 +6,10 @@ keywords: Ultralytics Explorer, CV datasets, semantic search, SQL queries, vecto # Ultralytics Explorer +!!! warning "Community Note โš ๏ธ" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐Ÿš€ +

Ultralytics Explorer Screenshot 1

diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md index a53d2040d17..7d7a30a7139 100644 --- a/docs/en/datasets/index.md +++ b/docs/en/datasets/index.md @@ -19,7 +19,11 @@ Ultralytics provides support for various datasets to facilitate computer vision Watch: Ultralytics Datasets Overview

-## NEW ๐Ÿš€ Ultralytics Explorer +## Ultralytics Explorer + +!!! warning "Community Note โš ๏ธ" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐Ÿš€ Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md). @@ -46,10 +50,11 @@ Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your da - [VisDrone](detect/visdrone.md): A dataset containing object detection and multi-object tracking data from drone-captured imagery with over 10K images and video sequences. - [VOC](detect/voc.md): The Pascal Visual Object Classes (VOC) dataset for object detection and segmentation with 20 object classes and over 11K images. - [xView](detect/xview.md): A dataset for object detection in overhead imagery with 60 object categories and over 1 million annotated objects. -- [Roboflow 100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation. +- [RF100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation. - [Brain-tumor](detect/brain-tumor.md): A dataset for detecting brain tumors includes MRI or CT scan images with details on tumor presence, location, and characteristics. - [African-wildlife](detect/african-wildlife.md): A dataset featuring images of African wildlife, including buffalo, elephant, rhino, and zebras. - [Signature](detect/signature.md): A dataset featuring images of various documents with annotated signatures, supporting document verification and fraud detection research. +- [Medical-pills](detect/medical-pills.md): A dataset containing labeled images of medical-pills, designed to aid in tasks like pharmaceutical quality control, sorting, and ensuring compliance with industry standards. ## [Instance Segmentation](segment/index.md) @@ -70,6 +75,7 @@ Pose estimation is a technique used to determine the pose of the object relative - [COCO8-pose](pose/coco8-pose.md): A smaller dataset for pose estimation tasks, containing a subset of 8 COCO images with human pose annotations. - [Tiger-pose](pose/tiger-pose.md): A compact dataset consisting of 263 images focused on tigers, annotated with 12 keypoints per tiger for pose estimation tasks. - [Hand-Keypoints](pose/hand-keypoints.md): A concise dataset featuring over 26,000 images centered on human hands, annotated with 21 keypoints per hand, designed for pose estimation tasks. +- [Dog-pose](pose/dog-pose.md): A comprehensive dataset featuring approximately 6,000 images focused on dogs, annotated with 24 keypoints per dog, tailored for pose estimation tasks. ## [Classification](classify/index.md) @@ -85,6 +91,7 @@ Pose estimation is a technique used to determine the pose of the object relative - [Imagenette](classify/imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing. - [Imagewoof](classify/imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks. - [MNIST](classify/mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks. +- [MNIST160](classify/mnist.md): First 8 images of each MNIST category from the MNIST dataset. Dataset contains 160 images total. ## [Oriented Bounding Boxes (OBB)](obb/index.md) @@ -104,6 +111,17 @@ Multi-object tracking is a computer vision technique that involves detecting and Contributing a new dataset involves several steps to ensure that it aligns well with the existing infrastructure. Below are the necessary steps: +

+
+ +
+ Watch: How to Contribute to Ultralytics Datasets ๐Ÿš€ +

+ ### Steps to Contribute a New Dataset 1. **Collect Images**: Gather the images that belong to the dataset. These could be collected from various sources, such as public databases or your own collection. diff --git a/docs/en/datasets/obb/dota-v2.md b/docs/en/datasets/obb/dota-v2.md index 76024cac105..a2c739479fd 100644 --- a/docs/en/datasets/obb/dota-v2.md +++ b/docs/en/datasets/obb/dota-v2.md @@ -31,7 +31,7 @@ keywords: DOTA dataset, object detection, aerial images, oriented bounding boxes - Very small instances (less than 10 pixels) are also annotated. - Addition of a new category: "container crane". - A total of 403,318 instances. -- Released for the DOAI Challenge 2019 on Object Detection in Aerial Images. +- Released for the [DOAI Challenge 2019 on Object Detection in Aerial Images](https://captain-whu.github.io/DOAI2019/challenge.html). ### DOTA-v2.0 @@ -108,8 +108,8 @@ To train a model on the DOTA v1 dataset, you can utilize the following code snip ```python from ultralytics import YOLO - # Create a new YOLOv8n-OBB model from scratch - model = YOLO("yolov8n-obb.yaml") + # Create a new YOLO11n-OBB model from scratch + model = YOLO("yolo11n-obb.yaml") # Train the model on the DOTAv1 dataset results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024) @@ -118,8 +118,8 @@ To train a model on the DOTA v1 dataset, you can utilize the following code snip === "CLI" ```bash - # Train a new YOLOv8n-OBB model on the DOTAv1 dataset - yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024 + # Train a new YOLO11n-OBB model on the DOTAv1 dataset + yolo obb train data=DOTAv1.yaml model=yolo11n-obb.pt epochs=100 imgsz=1024 ``` ## Sample Data and Annotations @@ -176,8 +176,8 @@ To train a model on the DOTA dataset, you can use the following example with Ult ```python from ultralytics import YOLO - # Create a new YOLOv8n-OBB model from scratch - model = YOLO("yolov8n-obb.yaml") + # Create a new YOLO11n-OBB model from scratch + model = YOLO("yolo11n-obb.yaml") # Train the model on the DOTAv1 dataset results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024) @@ -186,8 +186,8 @@ To train a model on the DOTA dataset, you can use the following example with Ult === "CLI" ```bash - # Train a new YOLOv8n-OBB model on the DOTAv1 dataset - yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024 + # Train a new YOLO11n-OBB model on the DOTAv1 dataset + yolo obb train data=DOTAv1.yaml model=yolo11n-obb.pt epochs=100 imgsz=1024 ``` For more details on how to split and preprocess the DOTA images, refer to the [split DOTA images section](#split-dota-images). diff --git a/docs/en/datasets/obb/dota8.md b/docs/en/datasets/obb/dota8.md index f24ea5bce2e..199c91bf06e 100644 --- a/docs/en/datasets/obb/dota8.md +++ b/docs/en/datasets/obb/dota8.md @@ -1,7 +1,7 @@ --- comments: true -description: Explore the DOTA8 dataset - a small, versatile oriented object detection dataset ideal for testing and debugging object detection models using Ultralytics YOLOv8. -keywords: DOTA8 dataset, Ultralytics, YOLOv8, object detection, debugging, training models, oriented object detection, dataset YAML +description: Explore the DOTA8 dataset - a small, versatile oriented object detection dataset ideal for testing and debugging object detection models using Ultralytics YOLO11. +keywords: DOTA8 dataset, Ultralytics, YOLO11, object detection, debugging, training models, oriented object detection, dataset YAML --- # DOTA8 Dataset @@ -10,7 +10,7 @@ keywords: DOTA8 dataset, Ultralytics, YOLOv8, object detection, debugging, train [Ultralytics](https://www.ultralytics.com/) DOTA8 is a small, but versatile oriented [object detection](https://www.ultralytics.com/glossary/object-detection) dataset composed of the first 8 images of 8 images of the split DOTAv1 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. -This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). +This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics). ## Dataset YAML @@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-obb model on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -34,7 +34,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.u from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-obb.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="dota8.yaml", epochs=100, imgsz=640) @@ -44,7 +44,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 [epochs](https://www.u ```bash # Start training from a pretrained *.pt model - yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + yolo obb train data=dota8.yaml model=yolo11n-obb.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -84,11 +84,11 @@ A special note of gratitude to the team behind the DOTA datasets for their comme ### What is the DOTA8 dataset and how can it be used? -The DOTA8 dataset is a small, versatile oriented object detection dataset made up of the first 8 images from the DOTAv1 split set, with 4 images designated for training and 4 for validation. It's ideal for testing and debugging object detection models like Ultralytics YOLOv8. Due to its manageable size and diversity, it helps in identifying pipeline errors and running sanity checks before deploying larger datasets. Learn more about object detection with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics). +The DOTA8 dataset is a small, versatile oriented object detection dataset made up of the first 8 images from the DOTAv1 split set, with 4 images designated for training and 4 for validation. It's ideal for testing and debugging object detection models like Ultralytics YOLO11. Due to its manageable size and diversity, it helps in identifying pipeline errors and running sanity checks before deploying larger datasets. Learn more about object detection with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics). -### How do I train a YOLOv8 model using the DOTA8 dataset? +### How do I train a YOLO11 model using the DOTA8 dataset? -To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For comprehensive argument options, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-obb model on the DOTA8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For comprehensive argument options, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -98,7 +98,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image s from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-obb.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="dota8.yaml", epochs=100, imgsz=640) @@ -108,7 +108,7 @@ To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image s ```bash # Start training from a pretrained *.pt model - yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + yolo obb train data=dota8.yaml model=yolo11n-obb.pt epochs=100 imgsz=640 ``` ### What are the key features of the DOTA dataset and where can I access the YAML file? @@ -119,6 +119,6 @@ The DOTA dataset is known for its large-scale benchmark and the challenges it pr Mosaicing combines multiple images into one during training, increasing the variety of objects and contexts within each batch. This improves a model's ability to generalize to different object sizes, aspect ratios, and scenes. This technique can be visually demonstrated through a training batch composed of mosaiced DOTA8 dataset images, helping in robust model development. Explore more about mosaicing and training techniques on our [Training](../../modes/train.md) page. -### Why should I use Ultralytics YOLOv8 for object detection tasks? +### Why should I use Ultralytics YOLO11 for object detection tasks? -Ultralytics YOLOv8 provides state-of-the-art real-time object detection capabilities, including features like oriented bounding boxes (OBB), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), and a highly versatile training pipeline. It's suitable for various applications and offers pretrained models for efficient fine-tuning. Explore further about the advantages and usage in the [Ultralytics YOLOv8 documentation](https://github.com/ultralytics/ultralytics). +Ultralytics YOLO11 provides state-of-the-art real-time object detection capabilities, including features like oriented bounding boxes (OBB), [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), and a highly versatile training pipeline. It's suitable for various applications and offers pretrained models for efficient fine-tuning. Explore further about the advantages and usage in the [Ultralytics YOLO11 documentation](https://github.com/ultralytics/ultralytics). diff --git a/docs/en/datasets/obb/index.md b/docs/en/datasets/obb/index.md index edeffb83aff..7ecaf3450e4 100644 --- a/docs/en/datasets/obb/index.md +++ b/docs/en/datasets/obb/index.md @@ -39,8 +39,8 @@ To train a model using these OBB formats: ```python from ultralytics import YOLO - # Create a new YOLOv8n-OBB model from scratch - model = YOLO("yolov8n-obb.yaml") + # Create a new YOLO11n-OBB model from scratch + model = YOLO("yolo11n-obb.yaml") # Train the model on the DOTAv1 dataset results = model.train(data="DOTAv1.yaml", epochs=100, imgsz=1024) @@ -49,8 +49,8 @@ To train a model using these OBB formats: === "CLI" ```bash - # Train a new YOLOv8n-OBB model on the DOTAv1 dataset - yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=1024 + # Train a new YOLO11n-OBB model on the DOTAv1 dataset + yolo obb train data=DOTAv1.yaml model=yolo11n-obb.pt epochs=100 imgsz=1024 ``` ## Supported Datasets @@ -92,7 +92,7 @@ It's imperative to validate the compatibility of the dataset with your model and Oriented Bounding Boxes (OBB) are a type of bounding box annotation where the box can be rotated to align more closely with the object being detected, rather than just being axis-aligned. This is particularly useful in aerial or satellite imagery where objects might not be aligned with the image axes. In Ultralytics YOLO models, OBBs are represented by their four corner points in the YOLO OBB format. This allows for more accurate object detection since the bounding boxes can rotate to fit the objects better. -### How do I convert my existing DOTA dataset labels to YOLO OBB format for use with Ultralytics YOLOv8? +### How do I convert my existing DOTA dataset labels to YOLO OBB format for use with Ultralytics YOLO11? You can convert DOTA dataset labels to YOLO OBB format using the `convert_dota_to_yolo_obb` function from Ultralytics. This conversion ensures compatibility with the Ultralytics YOLO models, enabling you to leverage the OBB capabilities for enhanced object detection. Here's a quick example: @@ -104,9 +104,9 @@ convert_dota_to_yolo_obb("path/to/DOTA") This script will reformat your DOTA annotations into a YOLO-compatible format. -### How do I train a YOLOv8 model with oriented bounding boxes (OBB) on my dataset? +### How do I train a YOLO11 model with oriented bounding boxes (OBB) on my dataset? -Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO OBB format and then using the Ultralytics API to train the model. Here's an example in both Python and CLI: +Training a YOLO11 model with OBBs involves ensuring your dataset is in the YOLO OBB format and then using the Ultralytics API to train the model. Here's an example in both Python and CLI: !!! example @@ -115,8 +115,8 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO ```python from ultralytics import YOLO - # Create a new YOLOv8n-OBB model from scratch - model = YOLO("yolov8n-obb.yaml") + # Create a new YOLO11n-OBB model from scratch + model = YOLO("yolo11n-obb.yaml") # Train the model on the custom dataset results = model.train(data="your_dataset.yaml", epochs=100, imgsz=640) @@ -125,8 +125,8 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO === "CLI" ```bash - # Train a new YOLOv8n-OBB model on the custom dataset - yolo obb train data=your_dataset.yaml model=yolov8n-obb.yaml epochs=100 imgsz=640 + # Train a new YOLO11n-OBB model on the custom dataset + yolo obb train data=your_dataset.yaml model=yolo11n-obb.yaml epochs=100 imgsz=640 ``` This ensures your model leverages the detailed OBB annotations for improved detection [accuracy](https://www.ultralytics.com/glossary/accuracy). @@ -142,6 +142,6 @@ Currently, Ultralytics supports the following datasets for OBB training: These datasets are tailored for scenarios where OBBs offer a significant advantage, such as aerial and satellite image analysis. -### Can I use my own dataset with oriented bounding boxes for YOLOv8 training, and if so, how? +### Can I use my own dataset with oriented bounding boxes for YOLO11 training, and if so, how? -Yes, you can use your own dataset with oriented bounding boxes for YOLOv8 training. Ensure your dataset annotations are converted to the YOLO OBB format, which involves defining bounding boxes by their four corner points. You can then create a YAML configuration file specifying the dataset paths, classes, and other necessary details. For more information on creating and configuring your datasets, refer to the [Supported Datasets](#supported-datasets) section. +Yes, you can use your own dataset with oriented bounding boxes for YOLO11 training. Ensure your dataset annotations are converted to the YOLO OBB format, which involves defining bounding boxes by their four corner points. You can then create a YAML configuration file specifying the dataset paths, classes, and other necessary details. For more information on creating and configuring your datasets, refer to the [Supported Datasets](#supported-datasets) section. diff --git a/docs/en/datasets/pose/coco.md b/docs/en/datasets/pose/coco.md index 20042b40e25..81226c77ae7 100644 --- a/docs/en/datasets/pose/coco.md +++ b/docs/en/datasets/pose/coco.md @@ -12,14 +12,7 @@ The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialize ## COCO-Pose Pretrained Models -| Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | -| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | -| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | -| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | -| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | -| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | +{% include "macros/yolo-pose-perf.md" %} ## Key Features @@ -31,8 +24,8 @@ The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialize The COCO-Pose dataset is split into three subsets: -1. **Train2017**: This subset contains a portion of the 118K images from the COCO dataset, annotated for training pose estimation models. -2. **Val2017**: This subset has a selection of images used for validation purposes during model training. +1. **Train2017**: This subset contains 56599 images from the COCO dataset, annotated for training pose estimation models. +2. **Val2017**: This subset has 2346 images used for validation purposes during model training. 3. **Test2017**: This subset consists of images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7384) for performance evaluation. ## Applications @@ -51,7 +44,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-pose model on the COCO-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -61,7 +54,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https:// from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640) @@ -71,7 +64,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 [epochs](https:// ```bash # Start training from a pretrained *.pt model - yolo pose train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -109,11 +102,11 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th ### What is the COCO-Pose dataset and how is it used with Ultralytics YOLO for pose estimation? -The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset designed for pose estimation tasks. It builds upon the COCO Keypoints 2017 images and annotations, allowing for the training of models like Ultralytics YOLO for detailed pose estimation. For instance, you can use the COCO-Pose dataset to train a YOLOv8n-pose model by loading a pretrained model and training it with a YAML configuration. For training examples, refer to the [Training](../../modes/train.md) documentation. +The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset designed for pose estimation tasks. It builds upon the COCO Keypoints 2017 images and annotations, allowing for the training of models like Ultralytics YOLO for detailed pose estimation. For instance, you can use the COCO-Pose dataset to train a YOLO11n-pose model by loading a pretrained model and training it with a YAML configuration. For training examples, refer to the [Training](../../modes/train.md) documentation. -### How can I train a YOLOv8 model on the COCO-Pose dataset? +### How can I train a YOLO11 model on the COCO-Pose dataset? -Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using either Python or CLI commands. For example, to train a YOLOv8n-pose model for 100 epochs with an image size of 640, you can follow the steps below: +Training a YOLO11 model on the COCO-Pose dataset can be accomplished using either Python or CLI commands. For example, to train a YOLO11n-pose model for 100 epochs with an image size of 640, you can follow the steps below: !!! example "Train Example" @@ -123,7 +116,7 @@ Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using eithe from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640) @@ -133,27 +126,27 @@ Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using eithe ```bash # Start training from a pretrained *.pt model - yolo pose train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` For more details on the training process and available arguments, check the [training page](../../modes/train.md). ### What are the different metrics provided by the COCO-Pose dataset for evaluating model performance? -The COCO-Pose dataset provides several standardized evaluation metrics for pose estimation tasks, similar to the original COCO dataset. Key metrics include the Object Keypoint Similarity (OKS), which evaluates the [accuracy](https://www.ultralytics.com/glossary/accuracy) of predicted keypoints against ground truth annotations. These metrics allow for thorough performance comparisons between different models. For instance, the COCO-Pose pretrained models such as YOLOv8n-pose, YOLOv8s-pose, and others have specific performance metrics listed in the documentation, like mAPpose50-95 and mAPpose50. +The COCO-Pose dataset provides several standardized evaluation metrics for pose estimation tasks, similar to the original COCO dataset. Key metrics include the Object Keypoint Similarity (OKS), which evaluates the [accuracy](https://www.ultralytics.com/glossary/accuracy) of predicted keypoints against ground truth annotations. These metrics allow for thorough performance comparisons between different models. For instance, the COCO-Pose pretrained models such as YOLO11n-pose, YOLO11s-pose, and others have specific performance metrics listed in the documentation, like mAPpose50-95 and mAPpose50. ### How is the dataset structured and split for the COCO-Pose dataset? The COCO-Pose dataset is split into three subsets: -1. **Train2017**: Contains a portion of the 118K COCO images, annotated for training pose estimation models. -2. **Val2017**: Selected images for validation purposes during model training. -3. **Test2017**: Images used for testing and benchmarking trained models. Ground truth annotations for this subset are not publicly available; results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7384) for performance evaluation. +1. **Train2017**: Contains 56599 COCO images, annotated for training pose estimation models. +2. **Val2017**: 2346 images for validation purposes during model training. +3. **Test2017**: Images used for testing and benchmarking trained models. Ground truth annotations for this subset are not publicly available; results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7403) for performance evaluation. These subsets help organize the training, validation, and testing phases effectively. For configuration details, explore the `coco-pose.yaml` file available on [GitHub](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml). ### What are the key features and applications of the COCO-Pose dataset? -The COCO-Pose dataset extends the COCO Keypoints 2017 annotations to include 17 keypoints for human figures, enabling detailed pose estimation. Standardized evaluation metrics (e.g., OKS) facilitate comparisons across different models. Applications of the COCO-Pose dataset span various domains, such as sports analytics, healthcare, and human-computer interaction, wherever detailed pose estimation of human figures is required. For practical use, leveraging pretrained models like those provided in the documentation (e.g., YOLOv8n-pose) can significantly streamline the process ([Key Features](#key-features)). +The COCO-Pose dataset extends the COCO Keypoints 2017 annotations to include 17 keypoints for human figures, enabling detailed pose estimation. Standardized evaluation metrics (e.g., OKS) facilitate comparisons across different models. Applications of the COCO-Pose dataset span various domains, such as sports analytics, healthcare, and human-computer interaction, wherever detailed pose estimation of human figures is required. For practical use, leveraging pretrained models like those provided in the documentation (e.g., YOLO11n-pose) can significantly streamline the process ([Key Features](#key-features)). If you use the COCO-Pose dataset in your research or development work, please cite the paper with the following [BibTeX entry](#citations-and-acknowledgments). diff --git a/docs/en/datasets/pose/coco8-pose.md b/docs/en/datasets/pose/coco8-pose.md index 95157b794e6..ab24f862072 100644 --- a/docs/en/datasets/pose/coco8-pose.md +++ b/docs/en/datasets/pose/coco8-pose.md @@ -1,7 +1,7 @@ --- comments: true -description: Explore the compact, versatile COCO8-Pose dataset for testing and debugging object detection models. Ideal for quick experiments with YOLOv8. -keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOLOv8, machine learning, computer vision, training data +description: Explore the compact, versatile COCO8-Pose dataset for testing and debugging object detection models. Ideal for quick experiments with YOLO11. +keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOLO11, machine learning, computer vision, training data --- # COCO8-Pose Dataset @@ -10,7 +10,7 @@ keywords: COCO8-Pose, Ultralytics, pose detection dataset, object detection, YOL [Ultralytics](https://www.ultralytics.com/) COCO8-Pose is a small, but versatile pose detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging [object detection](https://www.ultralytics.com/glossary/object-detection) models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. -This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). +This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics). ## Dataset YAML @@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-pose model on the COCO8-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -34,7 +34,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https:/ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640) @@ -44,7 +44,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 [epochs](https:/ ```bash # Start training from a pretrained *.pt model - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -80,13 +80,13 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th ## FAQ -### What is the COCO8-Pose dataset, and how is it used with Ultralytics YOLOv8? +### What is the COCO8-Pose dataset, and how is it used with Ultralytics YOLO11? -The COCO8-Pose dataset is a small, versatile pose detection dataset that includes the first 8 images from the COCO train 2017 set, with 4 images for training and 4 for validation. It's designed for testing and debugging object detection models and experimenting with new detection approaches. This dataset is ideal for quick experiments with [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/). For more details on dataset configuration, check out the dataset YAML file [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml). +The COCO8-Pose dataset is a small, versatile pose detection dataset that includes the first 8 images from the COCO train 2017 set, with 4 images for training and 4 for validation. It's designed for testing and debugging object detection models and experimenting with new detection approaches. This dataset is ideal for quick experiments with [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolo11/). For more details on dataset configuration, check out the dataset YAML file [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml). -### How do I train a YOLOv8 model using the COCO8-Pose dataset in Ultralytics? +### How do I train a YOLO11 model using the COCO8-Pose dataset in Ultralytics? -To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, follow these examples: +To train a YOLO11n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, follow these examples: !!! example "Train Example" @@ -96,7 +96,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") + model = YOLO("yolo11n-pose.pt") # Train the model results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640) @@ -105,7 +105,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i === "CLI" ```bash - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page. @@ -120,12 +120,12 @@ The COCO8-Pose dataset offers several benefits: For more about its features and usage, see the [Dataset Introduction](#introduction) section. -### How does mosaicing benefit the YOLOv8 training process using the COCO8-Pose dataset? +### How does mosaicing benefit the YOLO11 training process using the COCO8-Pose dataset? Mosaicing, demonstrated in the sample images of the COCO8-Pose dataset, combines multiple images into one, increasing the variety of objects and scenes within each training batch. This technique helps improve the model's ability to generalize across various object sizes, aspect ratios, and contexts, ultimately enhancing model performance. See the [Sample Images and Annotations](#sample-images-and-annotations) section for example images. ### Where can I find the COCO8-Pose dataset YAML file and how do I use it? -The COCO8-Pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLOv8 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolov8-model-using-the-coco8-pose-dataset-in-ultralytics) section. +The COCO8-Pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLO11 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolo11-model-using-the-coco8-pose-dataset-in-ultralytics) section. For more FAQs and detailed documentation, visit the [Ultralytics Documentation](https://docs.ultralytics.com/). diff --git a/docs/en/datasets/pose/dog-pose.md b/docs/en/datasets/pose/dog-pose.md new file mode 100644 index 00000000000..fa6acb0755f --- /dev/null +++ b/docs/en/datasets/pose/dog-pose.md @@ -0,0 +1,141 @@ +--- +comments: true +description: Discover the Dog-Pose dataset for pose detection. Featuring 6,773 training and 1,703 test images, it's a robust dataset for training YOLO11 models. +keywords: Dog-Pose, Ultralytics, pose detection dataset, YOLO11, machine learning, computer vision, training data +--- + +# Dog-Pose Dataset + +## Introduction + +The [Ultralytics](https://www.ultralytics.com/) Dog-pose dataset is a high-quality and extensive dataset specifically curated for dog keypoint estimation. With 6,773 training images and 1,703 test images, this dataset provides a solid foundation for training robust pose estimation models. Each annotated image includes 24 keypoints with 3 dimensions per keypoint (x, y, visibility), making it a valuable resource for advanced research and development in computer vision. + +Ultralytics Dog-pose display image + +This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics). + +## Dataset YAML + +A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It includes paths, keypoint details, and other relevant information. In the case of the Dog-pose dataset, The `dog-pose.yaml` is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml). + +!!! example "ultralytics/cfg/datasets/dog-pose.yaml" + + ```yaml + --8<-- "ultralytics/cfg/datasets/dog-pose.yaml" + ``` + +## Usage + +To train a YOLO11n-pose model on the Dog-pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) + + # Train the model + results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Start training from a pretrained *.pt model + yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 + ``` + +## Sample Images and Annotations + +Here are some examples of images from the Dog-pose dataset, along with their corresponding annotations: + +Dataset sample image + +- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts. + +The example showcases the variety and complexity of the images in the Dog-pose dataset and the benefits of using mosaicing during the training process. + +## Citations and Acknowledgments + +If you use the Dog-pose dataset in your research or development work, please cite the following paper: + +!!! quote "" + + === "BibTeX" + + ```bibtex + @inproceedings{khosla2011fgvc, + title={Novel dataset for Fine-Grained Image Categorization}, + author={Aditya Khosla and Nityananda Jayadevaprakash and Bangpeng Yao and Li Fei-Fei}, + booktitle={First Workshop on Fine-Grained Visual Categorization (FGVC), IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2011} + } + @inproceedings{deng2009imagenet, + title={ImageNet: A Large-Scale Hierarchical Image Database}, + author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei}, + booktitle={IEEE Computer Vision and Pattern Recognition (CVPR)}, + year={2009} + } + ``` + +We would like to acknowledge the Stanford team for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the Dog-pose dataset and its creators, visit the [Stanford Dogs Dataset website](http://vision.stanford.edu/aditya86/ImageNetDogs/). + +## FAQ + +### What is the Dog-pose dataset, and how is it used with Ultralytics YOLO11? + +The Dog-Pose dataset features 6,000 images annotated with 17 keypoints for dog pose estimation. Ideal for training and validating models with [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolo11/), it supports applications like animal behavior analysis and veterinary studies. + +### How do I train a YOLO11 model using the Dog-pose dataset in Ultralytics? + +To train a YOLO11n-pose model on the Dog-pose dataset for 100 epochs with an image size of 640, follow these examples: + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a model + model = YOLO("yolo11n-pose.pt") + + # Train the model + results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 + ``` + +For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page. + +### What are the benefits of using the Dog-pose dataset? + +The Dog-pose dataset offers several benefits: + +**Large and Diverse Dataset**: With 6,000 images, it provides a substantial amount of data covering a wide range of dog poses, breeds, and contexts, enabling robust model training and evaluation. + +**Pose-specific Annotations**: Offers detailed annotations for pose estimation, ensuring high-quality data for training pose detection models. + +**Real-World Scenarios**: Includes images from varied environments, enhancing the model's ability to generalize to real-world applications. + +**Model Performance Improvement**: The diversity and scale of the dataset help improve model accuracy and robustness, particularly for tasks involving fine-grained pose estimation. + +For more about its features and usage, see the [Dataset Introduction](#introduction) section. + +### How does mosaicing benefit the YOLO11 training process using the Dog-pose dataset? + +Mosaicing, as illustrated in the sample images from the Dog-pose dataset, merges multiple images into a single composite, enriching the diversity of objects and scenes in each training batch. This approach enhances the model's capacity to generalize across different object sizes, aspect ratios, and contexts, leading to improved performance. For example images, refer to the [Sample Images and Annotations](#sample-images-and-annotations) section. + +### Where can I find the Dog-pose dataset YAML file and how do I use it? + +The Dog-pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLO11 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolo11-model-using-the-dog-pose-dataset-in-ultralytics) section. + +For more FAQs and detailed documentation, visit the [Ultralytics Documentation](https://docs.ultralytics.com/). diff --git a/docs/en/datasets/pose/hand-keypoints.md b/docs/en/datasets/pose/hand-keypoints.md index c14bd5c5302..559cdcec657 100644 --- a/docs/en/datasets/pose/hand-keypoints.md +++ b/docs/en/datasets/pose/hand-keypoints.md @@ -8,7 +8,18 @@ keywords: Hand KeyPoints, pose estimation, dataset, keypoints, MediaPipe, YOLO, ## Introduction -The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high accuracy and consistency, and the dataset is compatible [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) formats. +The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high [accuracy](https://www.ultralytics.com/glossary/accuracy) and consistency, and the dataset is compatible [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) formats. + +

+
+ +
+ Watch: Hand Keypoints Estimation with Ultralytics YOLO11 | Human Hand Pose Estimation Tutorial +

## Hand Landmarks @@ -30,7 +41,7 @@ Each hand has a total of 21 keypoints. ## Key Features - **Large Dataset**: 26,768 images with hand keypoint annotations. -- **YOLOv8 Compatibility**: Ready for use with YOLOv8 models. +- **YOLO11 Compatibility**: Ready for use with YOLO11 models. - **21 Keypoints**: Detailed hand pose representation. ## Dataset Structure @@ -56,7 +67,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-pose model on the Hand Keypoints dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -66,7 +77,7 @@ To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](htt from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="hand-keypoints.yaml", epochs=100, imgsz=640) @@ -76,7 +87,7 @@ To train a YOLOv8n-pose model on the Hand Keypoints dataset for 100 [epochs](htt ```bash # Start training from a pretrained *.pt model - yolo pose train data=hand-keypoints.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=hand-keypoints.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -109,9 +120,9 @@ We would also like to acknowledge the creator of this dataset, [Rion Dsilva](htt ## FAQ -### How do I train a YOLOv8 model on the Hand Keypoints dataset? +### How do I train a YOLO11 model on the Hand Keypoints dataset? -To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python or the command line interface (CLI). Here's an example for training a YOLOv8n-pose model for 100 epochs with an image size of 640: +To train a YOLO11 model on the Hand Keypoints dataset, you can use either Python or the command line interface (CLI). Here's an example for training a YOLO11n-pose model for 100 epochs with an image size of 640: !!! Example @@ -121,7 +132,7 @@ To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="hand-keypoints.yaml", epochs=100, imgsz=640) @@ -131,7 +142,7 @@ To train a YOLOv8 model on the Hand Keypoints dataset, you can use either Python ```bash # Start training from a pretrained *.pt model - yolo pose train data=hand-keypoints.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=hand-keypoints.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. @@ -141,7 +152,7 @@ For a comprehensive list of available arguments, refer to the model [Training](. The Hand Keypoints dataset is designed for advanced pose estimation tasks and includes several key features: - **Large Dataset**: Contains 26,768 images with hand keypoint annotations. -- **YOLOv8 Compatibility**: Ready for use with YOLOv8 models. +- **YOLO11 Compatibility**: Ready for use with YOLO11 models. - **21 Keypoints**: Detailed hand pose representation, including wrist and finger joints. For more details, you can explore the [Hand Keypoints Dataset](#introduction) section. diff --git a/docs/en/datasets/pose/index.md b/docs/en/datasets/pose/index.md index a8bfd222963..0473b310553 100644 --- a/docs/en/datasets/pose/index.md +++ b/docs/en/datasets/pose/index.md @@ -42,7 +42,7 @@ The Ultralytics framework uses a YAML file format to define the dataset and mode ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco8-pose # dataset root dir +path: ../datasets/coco8-pose # dataset root dir (absolute or relative; if relative, it's relative to default datasets_dir) train: images/train # train images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images test: # test images (optional) @@ -72,7 +72,7 @@ The `train` and `val` fields specify the paths to the directories containing the from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640) @@ -82,7 +82,7 @@ The `train` and `val` fields specify the paths to the directories containing the ```bash # Start training from a pretrained *.pt model - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` ## Supported Datasets @@ -127,6 +127,15 @@ This section outlines the datasets that are compatible with Ultralytics YOLO for - **Usage**: Great for human hand pose estimation. - [Read more about Hand Keypoints](hand-keypoints.md) +### Dog-Pose + +- **Description**: The Dog Pose dataset contains approximately 6,000 images, providing a diverse and extensive resource for training and validation of dog pose estimation models. +- **Label Format**: Follows the Ultralytics YOLO format, with annotations for multiple keypoints specific to dog anatomy. +- **Number of Classes**: 1 (Dog). +- **Keypoints**: Includes 24 keypoints tailored to dog poses, such as limbs, joints, and head positions. +- **Usage**: Ideal for training models to estimate dog poses in various scenarios, from research to real-world applications. +- [Read more about Dog-Pose](dog-pose.md) + ### Adding your own dataset If you have your own dataset and would like to use it for training pose estimation models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file. @@ -171,7 +180,7 @@ To use the COCO-Pose dataset with Ultralytics YOLO: ```python from ultralytics import YOLO - model = YOLO("yolov8n-pose.pt") # load pretrained model + model = YOLO("yolo11n-pose.pt") # load pretrained model results = model.train(data="coco-pose.yaml", epochs=100, imgsz=640) ``` @@ -188,7 +197,7 @@ To add your dataset: ```python from ultralytics import YOLO - model = YOLO("yolov8n-pose.pt") + model = YOLO("yolo11n-pose.pt") results = model.train(data="your-dataset.yaml", epochs=100, imgsz=640) ``` diff --git a/docs/en/datasets/pose/tiger-pose.md b/docs/en/datasets/pose/tiger-pose.md index 06333b345b5..d97d9996640 100644 --- a/docs/en/datasets/pose/tiger-pose.md +++ b/docs/en/datasets/pose/tiger-pose.md @@ -1,7 +1,7 @@ --- comments: true description: Explore Ultralytics Tiger-Pose dataset with 263 diverse images. Ideal for testing, training, and refining pose estimation algorithms. -keywords: Ultralytics, Tiger-Pose, dataset, pose estimation, YOLOv8, training data, machine learning, neural networks +keywords: Ultralytics, Tiger-Pose, dataset, pose estimation, YOLO11, training data, machine learning, neural networks --- # Tiger-Pose Dataset @@ -12,7 +12,7 @@ keywords: Ultralytics, Tiger-Pose, dataset, pose estimation, YOLOv8, training da Despite its manageable size of 210 images, tiger-pose dataset offers diversity, making it suitable for assessing training pipelines, identifying potential errors, and serving as a valuable preliminary step before working with larger datasets for pose estimation. -This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). +This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).


@@ -22,7 +22,7 @@ This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics. allowfullscreen>
- Watch: Train YOLOv8 Pose Model on Tiger-Pose Dataset Using Ultralytics HUB + Watch: Train YOLO11 Pose Model on Tiger-Pose Dataset Using Ultralytics HUB

## Dataset YAML @@ -37,7 +37,7 @@ A YAML (Yet Another Markup Language) file serves as the means to specify the con ## Usage -To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-pose model on the Tiger-Pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -47,7 +47,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 [epochs](https:/ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="tiger-pose.yaml", epochs=100, imgsz=640) @@ -57,7 +57,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 [epochs](https:/ ```bash # Start training from a pretrained *.pt model - yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo task=pose mode=train data=tiger-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -101,11 +101,11 @@ The dataset has been released available under the [AGPL-3.0 License](https://git ### What is the Ultralytics Tiger-Pose dataset used for? -The Ultralytics Tiger-Pose dataset is designed for pose estimation tasks, consisting of 263 images sourced from a [YouTube video](https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUbVGlnZXIgd2Fsa2luZyByZWZlcmVuY2UubXA0). The dataset is divided into 210 training images and 53 validation images. It is particularly useful for testing, training, and refining pose estimation algorithms using [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). +The Ultralytics Tiger-Pose dataset is designed for pose estimation tasks, consisting of 263 images sourced from a [YouTube video](https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUbVGlnZXIgd2Fsa2luZyByZWZlcmVuY2UubXA0). The dataset is divided into 210 training images and 53 validation images. It is particularly useful for testing, training, and refining pose estimation algorithms using [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics). -### How do I train a YOLOv8 model on the Tiger-Pose dataset? +### How do I train a YOLO11 model on the Tiger-Pose dataset? -To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an image size of 640, use the following code snippets. For more details, visit the [Training](../../modes/train.md) page: +To train a YOLO11n-pose model on the Tiger-Pose dataset for 100 epochs with an image size of 640, use the following code snippets. For more details, visit the [Training](../../modes/train.md) page: !!! example "Train Example" @@ -115,7 +115,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="tiger-pose.yaml", epochs=100, imgsz=640) @@ -126,16 +126,16 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i ```bash # Start training from a pretrained *.pt model - yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo task=pose mode=train data=tiger-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 ``` ### What configurations does the `tiger-pose.yaml` file include? The `tiger-pose.yaml` file is used to specify the configuration details of the Tiger-Pose dataset. It includes crucial data such as file paths and class definitions. To see the exact configuration, you can check out the [Ultralytics Tiger-Pose Dataset Configuration File](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/tiger-pose.yaml). -### How can I run inference using a YOLOv8 model trained on the Tiger-Pose dataset? +### How can I run inference using a YOLO11 model trained on the Tiger-Pose dataset? -To perform inference using a YOLOv8 model trained on the Tiger-Pose dataset, you can use the following code snippets. For a detailed guide, visit the [Prediction](../../modes/predict.md) page: +To perform inference using a YOLO11 model trained on the Tiger-Pose dataset, you can use the following code snippets. For a detailed guide, visit the [Prediction](../../modes/predict.md) page: !!! example "Inference Example" @@ -161,4 +161,4 @@ To perform inference using a YOLOv8 model trained on the Tiger-Pose dataset, you ### What are the benefits of using the Tiger-Pose dataset for pose estimation? -The Tiger-Pose dataset, despite its manageable size of 210 images for training, provides a diverse collection of images that are ideal for testing pose estimation pipelines. The dataset helps identify potential errors and acts as a preliminary step before working with larger datasets. Additionally, the dataset supports the training and refinement of pose estimation algorithms using advanced tools like [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics), enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy). +The Tiger-Pose dataset, despite its manageable size of 210 images for training, provides a diverse collection of images that are ideal for testing pose estimation pipelines. The dataset helps identify potential errors and acts as a preliminary step before working with larger datasets. Additionally, the dataset supports the training and refinement of pose estimation algorithms using advanced tools like [Ultralytics HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics), enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy). diff --git a/docs/en/datasets/segment/carparts-seg.md b/docs/en/datasets/segment/carparts-seg.md index b798cacad15..e3437c5107b 100644 --- a/docs/en/datasets/segment/carparts-seg.md +++ b/docs/en/datasets/segment/carparts-seg.md @@ -6,19 +6,21 @@ keywords: Carparts Segmentation Dataset, Roboflow, computer vision, automotive A # Roboflow Universe Carparts Segmentation Dataset +Open Carparts Segmentation Dataset In Colab + The [Roboflow](https://roboflow.com/?ref=ultralytics) [Carparts Segmentation Dataset](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm?ref=ultralytics) is a curated collection of images and videos designed for [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications, specifically focusing on segmentation tasks related to car parts. This dataset provides a diverse set of visuals captured from multiple perspectives, offering valuable annotated examples for training and testing segmentation models. Whether you're working on automotive research, developing AI solutions for vehicle maintenance, or exploring computer vision applications, the Carparts Segmentation Dataset serves as a valuable resource for enhancing accuracy and efficiency in your projects.


-
- Watch: Carparts [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation) Using Ultralytics HUB + Watch: Carparts Instance Segmentation with Ultralytics YOLO11

## Dataset Structure @@ -45,7 +47,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train Ultralytics YOLO11n model on the Carparts Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -55,7 +57,7 @@ To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100 from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="carparts-seg.yaml", epochs=100, imgsz=640) @@ -65,7 +67,7 @@ To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100 ```bash # Start training from a pretrained *.pt model - yolo segment train data=carparts-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=carparts-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -108,9 +110,9 @@ We extend our thanks to the Roboflow team for their dedication in developing and The [Roboflow Carparts Segmentation Dataset](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm?ref=ultralytics) is a curated collection of images and videos specifically designed for car part segmentation tasks in computer vision. This dataset includes a diverse range of visuals captured from multiple perspectives, making it an invaluable resource for training and testing segmentation models for automotive applications. -### How can I use the Carparts Segmentation Dataset with Ultralytics YOLOv8? +### How can I use the Carparts Segmentation Dataset with Ultralytics YOLO11? -To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow these steps: +To train a YOLO11 model on the Carparts Segmentation dataset, you can follow these steps: !!! example "Train Example" @@ -120,7 +122,7 @@ To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow the from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="carparts-seg.yaml", epochs=100, imgsz=640) @@ -130,7 +132,7 @@ To train a YOLOv8 model on the Carparts Segmentation dataset, you can follow the ```bash # Start training from a pretrained *.pt model - yolo segment train data=carparts-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=carparts-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` For more details, refer to the [Training](../../modes/train.md) documentation. diff --git a/docs/en/datasets/segment/coco.md b/docs/en/datasets/segment/coco.md index 0f403c69afa..2dd8a0f53a4 100644 --- a/docs/en/datasets/segment/coco.md +++ b/docs/en/datasets/segment/coco.md @@ -1,7 +1,7 @@ --- comments: true description: Explore the COCO-Seg dataset, an extension of COCO, with detailed segmentation annotations. Learn how to train YOLO models with COCO-Seg. -keywords: COCO-Seg, dataset, YOLO models, instance segmentation, object detection, COCO dataset, YOLOv8, computer vision, Ultralytics, machine learning +keywords: COCO-Seg, dataset, YOLO models, instance segmentation, object detection, COCO dataset, YOLO11, computer vision, Ultralytics, machine learning --- # COCO-Seg Dataset @@ -10,13 +10,7 @@ The [COCO-Seg](https://cocodataset.org/#home) dataset, an extension of the COCO ## COCO-Seg Pretrained Models -| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | -| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | -| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | -| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | -| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | +{% include "macros/yolo-seg-perf.md" %} ## Key Features @@ -49,7 +43,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-seg model on the COCO-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -59,17 +53,17 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 [epochs](https://ww from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model - results = model.train(data="coco-seg.yaml", epochs=100, imgsz=640) + results = model.train(data="coco.yaml", epochs=100, imgsz=640) ``` === "CLI" ```bash # Start training from a pretrained *.pt model - yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -109,9 +103,9 @@ We extend our thanks to the COCO Consortium for creating and maintaining this in The [COCO-Seg](https://cocodataset.org/#home) dataset is an extension of the original COCO (Common Objects in Context) dataset, specifically designed for instance segmentation tasks. While it uses the same images as the COCO dataset, COCO-Seg includes more detailed segmentation annotations, making it a powerful resource for researchers and developers focusing on object instance segmentation. -### How can I train a YOLOv8 model using the COCO-Seg dataset? +### How can I train a YOLO11 model using the COCO-Seg dataset? -To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a detailed list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a detailed list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -121,17 +115,17 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model - results = model.train(data="coco-seg.yaml", epochs=100, imgsz=640) + results = model.train(data="coco.yaml", epochs=100, imgsz=640) ``` === "CLI" ```bash # Start training from a pretrained *.pt model - yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ### What are the key features of the COCO-Seg dataset? @@ -145,15 +139,9 @@ The COCO-Seg dataset includes several key features: ### What pretrained models are available for COCO-Seg, and what are their performance metrics? -The COCO-Seg dataset supports multiple pretrained YOLOv8 segmentation models with varying performance metrics. Here's a summary of the available models and their key metrics: +The COCO-Seg dataset supports multiple pretrained YOLO11 segmentation models with varying performance metrics. Here's a summary of the available models and their key metrics: -| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | -| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | -| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | -| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | -| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | +{% include "macros/yolo-seg-perf.md" %} ### How is the COCO-Seg dataset structured and what subsets does it contain? diff --git a/docs/en/datasets/segment/coco8-seg.md b/docs/en/datasets/segment/coco8-seg.md index 21abf3d8029..3fc69745138 100644 --- a/docs/en/datasets/segment/coco8-seg.md +++ b/docs/en/datasets/segment/coco8-seg.md @@ -1,7 +1,7 @@ --- comments: true description: Discover the versatile and manageable COCO8-Seg dataset by Ultralytics, ideal for testing and debugging segmentation models or new detection approaches. -keywords: COCO8-Seg, Ultralytics, segmentation dataset, YOLOv8, COCO 2017, model training, computer vision, dataset configuration +keywords: COCO8-Seg, Ultralytics, segmentation dataset, YOLO11, COCO 2017, model training, computer vision, dataset configuration --- # COCO8-Seg Dataset @@ -10,7 +10,7 @@ keywords: COCO8-Seg, Ultralytics, segmentation dataset, YOLOv8, COCO 2017, model [Ultralytics](https://www.ultralytics.com/) COCO8-Seg is a small, but versatile [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging segmentation models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets. -This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLOv8](https://github.com/ultralytics/ultralytics). +This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics). ## Dataset YAML @@ -24,7 +24,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train a YOLO11n-seg model on the COCO8-Seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -34,7 +34,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 [epochs](https://w from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640) @@ -44,7 +44,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 [epochs](https://w ```bash # Start training from a pretrained *.pt model - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ## Sample Images and Annotations @@ -80,13 +80,13 @@ We would like to acknowledge the COCO Consortium for creating and maintaining th ## FAQ -### What is the COCO8-Seg dataset, and how is it used in Ultralytics YOLOv8? +### What is the COCO8-Seg dataset, and how is it used in Ultralytics YOLO11? -The **COCO8-Seg dataset** is a compact instance segmentation dataset by Ultralytics, consisting of the first 8 images from the COCO train 2017 setโ€”4 images for training and 4 for validation. This dataset is tailored for testing and debugging segmentation models or experimenting with new detection methods. It is particularly useful with Ultralytics [YOLOv8](https://github.com/ultralytics/ultralytics) and [HUB](https://hub.ultralytics.com/) for rapid iteration and pipeline error-checking before scaling to larger datasets. For detailed usage, refer to the model [Training](../../modes/train.md) page. +The **COCO8-Seg dataset** is a compact instance segmentation dataset by Ultralytics, consisting of the first 8 images from the COCO train 2017 setโ€”4 images for training and 4 for validation. This dataset is tailored for testing and debugging segmentation models or experimenting with new detection methods. It is particularly useful with Ultralytics [YOLO11](https://github.com/ultralytics/ultralytics) and [HUB](https://hub.ultralytics.com/) for rapid iteration and pipeline error-checking before scaling to larger datasets. For detailed usage, refer to the model [Training](../../modes/train.md) page. -### How can I train a YOLOv8n-seg model using the COCO8-Seg dataset? +### How can I train a YOLO11n-seg model using the COCO8-Seg dataset? -To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use Python or CLI commands. Here's a quick example: +To train a **YOLO11n-seg** model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use Python or CLI commands. Here's a quick example: !!! example "Train Example" @@ -96,7 +96,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # Load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # Load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640) @@ -106,7 +106,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an ```bash # Start training from a pretrained *.pt model - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` For a thorough explanation of available arguments and configuration options, you can check the [Training](../../modes/train.md) documentation. diff --git a/docs/en/datasets/segment/crack-seg.md b/docs/en/datasets/segment/crack-seg.md index f5ffbe92e0a..1526fa5e90a 100644 --- a/docs/en/datasets/segment/crack-seg.md +++ b/docs/en/datasets/segment/crack-seg.md @@ -34,7 +34,7 @@ A YAML (Yet Another Markup Language) file is employed to outline the configurati ## Usage -To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train Ultralytics YOLO11n model on the Crack Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -44,7 +44,7 @@ To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 [ep from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="crack-seg.yaml", epochs=100, imgsz=640) @@ -54,7 +54,7 @@ To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 [ep ```bash # Start training from a pretrained *.pt model - yolo segment train data=crack-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=crack-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -98,9 +98,9 @@ We would like to acknowledge the Roboflow team for creating and maintaining the The [Roboflow Crack Segmentation Dataset](https://universe.roboflow.com/university-bswxt/crack-bphdr?ref=ultralytics) is a comprehensive collection of 4029 static images designed specifically for transportation and public safety studies. It is ideal for tasks such as self-driving car model development and infrastructure maintenance. The dataset includes training, testing, and validation sets, aiding in accurate crack detection and segmentation. -### How do I train a model using the Crack Segmentation Dataset with Ultralytics YOLOv8? +### How do I train a model using the Crack Segmentation Dataset with Ultralytics YOLO11? -To train an Ultralytics YOLOv8 model on the Crack Segmentation dataset, use the following code snippets. Detailed instructions and further parameters can be found on the model [Training](../../modes/train.md) page. +To train an Ultralytics YOLO11 model on the Crack Segmentation dataset, use the following code snippets. Detailed instructions and further parameters can be found on the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -110,7 +110,7 @@ To train an Ultralytics YOLOv8 model on the Crack Segmentation dataset, use the from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="crack-seg.yaml", epochs=100, imgsz=640) @@ -120,7 +120,7 @@ To train an Ultralytics YOLOv8 model on the Crack Segmentation dataset, use the ```bash # Start training from a pretrained *.pt model - yolo segment train data=crack-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=crack-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ### Why should I use the Crack Segmentation Dataset for my self-driving car project? diff --git a/docs/en/datasets/segment/index.md b/docs/en/datasets/segment/index.md index 52b19781640..9f88aea3c48 100644 --- a/docs/en/datasets/segment/index.md +++ b/docs/en/datasets/segment/index.md @@ -44,7 +44,7 @@ The Ultralytics framework uses a YAML file format to define the dataset and mode ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] -path: ../datasets/coco8-seg # dataset root dir +path: ../datasets/coco8-seg # dataset root dir (absolute or relative; if relative, it's relative to default datasets_dir) train: images/train # train images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images test: # test images (optional) @@ -74,7 +74,7 @@ The `train` and `val` fields specify the paths to the directories containing the from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640) @@ -84,7 +84,7 @@ The `train` and `val` fields specify the paths to the directories containing the ```bash # Start training from a pretrained *.pt model - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ## Supported Datasets @@ -137,18 +137,12 @@ To auto-annotate your dataset using the Ultralytics framework, you can use the ` ```python from ultralytics.data.annotator import auto_annotate - auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model="sam_b.pt") + auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="sam_b.pt") ``` -| Argument | Type | Description | Default | -| ------------ | ----------------------- | ----------------------------------------------------------------------------------------------------------- | -------------- | -| `data` | `str` | Path to a folder containing images to be annotated. | `None` | -| `det_model` | `str, optional` | Pre-trained YOLO detection model. Defaults to `'yolov8x.pt'`. | `'yolov8x.pt'` | -| `sam_model` | `str, optional` | Pre-trained SAM segmentation model. Defaults to `'sam_b.pt'`. | `'sam_b.pt'` | -| `device` | `str, optional` | Device to run the models on. Defaults to an empty string (CPU or GPU, if available). | `''` | -| `output_dir` | `str or None, optional` | Directory to save the annotated results. Defaults to a `'labels'` folder in the same directory as `'data'`. | `None` | +{% include "macros/sam-auto-annotate.md" %} -The `auto_annotate` function takes the path to your images, along with optional arguments for specifying the pre-trained detection and [SAM segmentation models](../../models/sam.md), the device to run the models on, and the output directory for saving the annotated results. +The `auto_annotate` function takes the path to your images, along with optional arguments for specifying the pre-trained detection models i.e. [YOLO11](../../models/yolo11.md), [YOLOv8](../../models/yolov8.md) or other [models](../../models/index.md) and segmentation models i.e, [SAM](../../models/sam.md), [SAM2](../../models/sam-2.md) or [MobileSAM](../../models/mobile-sam.md), the device to run the models on, and the output directory for saving the annotated results. By leveraging the power of pre-trained models, auto-annotation can significantly reduce the time and effort required for creating high-quality segmentation datasets. This feature is particularly useful for researchers and developers working with large image collections, as it allows them to focus on model development and evaluation rather than manual annotation. @@ -195,7 +189,7 @@ Auto-annotation in Ultralytics YOLO allows you to generate segmentation annotati ```python from ultralytics.data.annotator import auto_annotate -auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model="sam_b.pt") +auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="sam_b.pt") # or sam_model="mobile_sam.pt" ``` -This function automates the annotation process, making it faster and more efficient. For more details, explore the [Auto-Annotation](#auto-annotation) section. +This function automates the annotation process, making it faster and more efficient. For more details, explore the [Auto-Annotate Reference](https://docs.ultralytics.com/reference/data/annotator/#ultralytics.data.annotator.auto_annotate). diff --git a/docs/en/datasets/segment/package-seg.md b/docs/en/datasets/segment/package-seg.md index 477072fb574..bd6446983c5 100644 --- a/docs/en/datasets/segment/package-seg.md +++ b/docs/en/datasets/segment/package-seg.md @@ -6,6 +6,8 @@ keywords: Roboflow, Package Segmentation Dataset, computer vision, package ident # Roboflow Universe Package Segmentation Dataset +Open Package Segmentation Dataset In Colab + The [Roboflow](https://roboflow.com/?ref=ultralytics) [Package Segmentation Dataset](https://universe.roboflow.com/factorypackage/factory_package?ref=ultralytics) is a curated collection of images specifically tailored for tasks related to package segmentation in the field of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This dataset is designed to assist researchers, developers, and enthusiasts working on projects related to package identification, sorting, and handling. Containing a diverse set of images showcasing various packages in different contexts and environments, the dataset serves as a valuable resource for training and evaluating segmentation models. Whether you are engaged in logistics, warehouse automation, or any application requiring precise package analysis, the Package Segmentation Dataset provides a targeted and comprehensive set of images to enhance the performance of your computer vision algorithms. @@ -34,7 +36,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur ## Usage -To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. +To train Ultralytics YOLO11n model on the Package Segmentation dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page. !!! example "Train Example" @@ -44,7 +46,7 @@ To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 [ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="package-seg.yaml", epochs=100, imgsz=640) @@ -54,7 +56,7 @@ To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 [ ```bash # Start training from a pretrained *.pt model - yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=package-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` ## Sample Data and Annotations @@ -97,9 +99,9 @@ We express our gratitude to the Roboflow team for their efforts in creating and The [Roboflow Package Segmentation Dataset](https://universe.roboflow.com/factorypackage/factory_package?ref=ultralytics) is a curated collection of images tailored for tasks involving package segmentation. It includes diverse images of packages in various contexts, making it invaluable for training and evaluating segmentation models. This dataset is particularly useful for applications in logistics, warehouse automation, and any project requiring precise package analysis. It helps optimize logistics and enhance vision models for accurate package identification and sorting. -### How do I train an Ultralytics YOLOv8 model on the Package Segmentation Dataset? +### How do I train an Ultralytics YOLO11 model on the Package Segmentation Dataset? -You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Use the snippets below: +You can train an Ultralytics YOLO11n model using both Python and CLI methods. Use the snippets below: !!! example "Train Example" @@ -109,7 +111,7 @@ You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Us from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load a pretrained model + model = YOLO("yolo11n-seg.pt") # load a pretrained model # Train the model results = model.train(data="package-seg.yaml", epochs=100, imgsz=640) @@ -119,7 +121,7 @@ You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Us ```bash # Start training from a pretrained *.pt model - yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=package-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` Refer to the model [Training](../../modes/train.md) page for more details. @@ -134,9 +136,9 @@ The dataset is structured into three main components: This structure ensures a balanced dataset for thorough model training, validation, and testing, enhancing the performance of segmentation algorithms. -### Why should I use Ultralytics YOLOv8 with the Package Segmentation Dataset? +### Why should I use Ultralytics YOLO11 with the Package Segmentation Dataset? -Ultralytics YOLOv8 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed for real-time object detection and segmentation tasks. Using it with the Package Segmentation Dataset allows you to leverage YOLOv8's capabilities for precise package segmentation. This combination is especially beneficial for industries like logistics and warehouse automation, where accurate package identification is critical. For more information, check out our [page on YOLOv8 segmentation](https://docs.ultralytics.com/models/yolov8/). +Ultralytics YOLO11 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed for real-time object detection and segmentation tasks. Using it with the Package Segmentation Dataset allows you to leverage YOLO11's capabilities for precise package segmentation. This combination is especially beneficial for industries like logistics and warehouse automation, where accurate package identification is critical. For more information, check out our [page on YOLO11 segmentation](https://docs.ultralytics.com/models/yolo11/). ### How can I access and use the package-seg.yaml file for the Package Segmentation Dataset? diff --git a/docs/en/datasets/track/index.md b/docs/en/datasets/track/index.md index f9a8b4f81b5..0aa3d8c50bf 100644 --- a/docs/en/datasets/track/index.md +++ b/docs/en/datasets/track/index.md @@ -19,14 +19,14 @@ Multi-Object Detector doesn't need standalone training and directly supports pre ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) ``` === "CLI" ```bash - yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show ``` ## FAQ @@ -42,17 +42,17 @@ To use Multi-Object Tracking with Ultralytics YOLO, you can start by using the P ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") # Load the YOLOv8 model + model = YOLO("yolo11n.pt") # Load the YOLO11 model results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) ``` === "CLI" ```bash - yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3 iou=0.5 show + yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3 iou=0.5 show ``` -These commands load the YOLOv8 model and use it for tracking objects in the given video source with specific confidence (`conf`) and [Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (`iou`) thresholds. For more details, refer to the [track mode documentation](../../modes/track.md). +These commands load the YOLO11 model and use it for tracking objects in the given video source with specific confidence (`conf`) and [Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (`iou`) thresholds. For more details, refer to the [track mode documentation](../../modes/track.md). ### What are the upcoming features for training trackers in Ultralytics? diff --git a/docs/en/guides/analytics.md b/docs/en/guides/analytics.md index 08710f522c5..cd7fc40dcfc 100644 --- a/docs/en/guides/analytics.md +++ b/docs/en/guides/analytics.md @@ -1,10 +1,10 @@ --- comments: true description: Learn to create line graphs, bar plots, and pie charts using Python with guided instructions and code snippets. Maximize your data visualization skills!. -keywords: Ultralytics, YOLOv8, data visualization, line graphs, bar plots, pie charts, Python, analytics, tutorial, guide +keywords: Ultralytics, YOLO11, data visualization, line graphs, bar plots, pie charts, Python, analytics, tutorial, guide --- -# Analytics using Ultralytics YOLOv8 +# Analytics using Ultralytics YOLO11 ## Introduction @@ -33,263 +33,61 @@ This guide provides a comprehensive overview of three fundamental types of [data - Bar plots, on the other hand, are suitable for comparing quantities across different categories and showing relationships between a category and its numerical value. - Lastly, pie charts are effective for illustrating proportions among categories and showing parts of a whole. -!!! analytics "Analytics Examples" +!!! example "Analytics Examples" - === "Line Graph" + === "CLI" - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8s.pt") + ```bash + yolo solutions analytics show=True - cap = cv2.VideoCapture("Path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + # pass the source + yolo solutions analytics source="path/to/video/file.mp4" - out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) + # generate the pie chart + yolo solutions analytics analytics_type="pie" show=True - analytics = solutions.Analytics( - type="line", - writer=out, - im0_shape=(w, h), - view_img=True, - ) - total_counts = 0 - frame_count = 0 - - while cap.isOpened(): - success, frame = cap.read() - - if success: - frame_count += 1 - results = model.track(frame, persist=True, verbose=True) + # generate the bar plots + yolo solutions analytics analytics_type="bar" show=True - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - for box in boxes: - total_counts += 1 - - analytics.update_line(frame_count, total_counts) - - total_counts = 0 - if cv2.waitKey(1) & 0xFF == ord("q"): - break - else: - break - - cap.release() - out.release() - cv2.destroyAllWindows() + # generate the area plots + yolo solutions analytics analytics_type="area" show=True ``` - === "Multiple Lines" + === "Python" ```python import cv2 - from ultralytics import YOLO, solutions - - model = YOLO("yolov8s.pt") + from ultralytics import solutions cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - out = cv2.VideoWriter("multiple_line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) - analytics = solutions.Analytics( - type="line", - writer=out, - im0_shape=(w, h), - view_img=True, - max_points=200, + # Video writer + out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed ) - frame_count = 0 - data = {} - labels = [] - - while cap.isOpened(): - success, frame = cap.read() - - if success: - frame_count += 1 - - results = model.track(frame, persist=True) - - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - track_ids = results[0].boxes.id.int().cpu().tolist() - clss = results[0].boxes.cls.cpu().tolist() - - for box, track_id, cls in zip(boxes, track_ids, clss): - # Store each class label - if model.names[int(cls)] not in labels: - labels.append(model.names[int(cls)]) - - # Store each class count - if model.names[int(cls)] in data: - data[model.names[int(cls)]] += 1 - else: - data[model.names[int(cls)]] = 0 - - # update lines every frame - analytics.update_multiple_lines(data, labels, frame_count) - data = {} # clear the data list for next frame - else: - break - - cap.release() - out.release() - cv2.destroyAllWindows() - ``` - - === "Pie Chart" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8s.pt") - - cap = cv2.VideoCapture("Path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - out = cv2.VideoWriter("pie_chart.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) - + # Init analytics analytics = solutions.Analytics( - type="pie", - writer=out, - im0_shape=(w, h), - view_img=True, + show=True, # Display the output + analytics_type="line", # Pass the analytics type, could be "pie", "bar" or "area". + model="yolo11n.pt", # Path to the YOLO11 model file + # classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model. ) - clswise_count = {} - - while cap.isOpened(): - success, frame = cap.read() - if success: - results = model.track(frame, persist=True, verbose=True) - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - for box, cls in zip(boxes, clss): - if model.names[int(cls)] in clswise_count: - clswise_count[model.names[int(cls)]] += 1 - else: - clswise_count[model.names[int(cls)]] = 1 - - analytics.update_pie(clswise_count) - clswise_count = {} - - if cv2.waitKey(1) & 0xFF == ord("q"): - break - else: - break - - cap.release() - out.release() - cv2.destroyAllWindows() - ``` - - === "Bar Plot" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8s.pt") - - cap = cv2.VideoCapture("Path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - out = cv2.VideoWriter("bar_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) - - analytics = solutions.Analytics( - type="bar", - writer=out, - im0_shape=(w, h), - view_img=True, - ) - - clswise_count = {} - - while cap.isOpened(): - success, frame = cap.read() - if success: - results = model.track(frame, persist=True, verbose=True) - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - for box, cls in zip(boxes, clss): - if model.names[int(cls)] in clswise_count: - clswise_count[model.names[int(cls)]] += 1 - else: - clswise_count[model.names[int(cls)]] = 1 - - analytics.update_bar(clswise_count) - clswise_count = {} - - if cv2.waitKey(1) & 0xFF == ord("q"): - break - else: - break - - cap.release() - out.release() - cv2.destroyAllWindows() - ``` - - === "Area chart" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8s.pt") - - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - out = cv2.VideoWriter("area_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) - - analytics = solutions.Analytics( - type="area", - writer=out, - im0_shape=(w, h), - view_img=True, - ) - - clswise_count = {} + # Process video frame_count = 0 - while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: frame_count += 1 - results = model.track(frame, persist=True, verbose=True) - - if results[0].boxes.id is not None: - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - - for box, cls in zip(boxes, clss): - if model.names[int(cls)] in clswise_count: - clswise_count[model.names[int(cls)]] += 1 - else: - clswise_count[model.names[int(cls)]] = 1 - - analytics.update_area(frame_count, clswise_count) - clswise_count = {} - if cv2.waitKey(1) & 0xFF == ord("q"): - break + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file else: break @@ -302,23 +100,12 @@ This guide provides a comprehensive overview of three fundamental types of [data Here's a table with the `Analytics` arguments: -| Name | Type | Default | Description | -| -------------- | ----------------- | ------------- | -------------------------------------------------------------------------------- | -| `type` | `str` | `None` | Type of data or object. | -| `im0_shape` | `tuple` | `None` | Shape of the initial image. | -| `writer` | `cv2.VideoWriter` | `None` | Object for writing video files. | -| `title` | `str` | `ultralytics` | Title for the visualization. | -| `x_label` | `str` | `x` | Label for the x-axis. | -| `y_label` | `str` | `y` | Label for the y-axis. | -| `bg_color` | `str` | `white` | Background color. | -| `fg_color` | `str` | `black` | Foreground color. | -| `line_color` | `str` | `yellow` | Color of the lines. | -| `line_width` | `int` | `2` | Width of the lines. | -| `fontsize` | `int` | `13` | Font size for text. | -| `view_img` | `bool` | `False` | Flag to display the image or video. | -| `save_img` | `bool` | `True` | Flag to save the image or video. | -| `max_points` | `int` | `50` | For multiple lines, total points drawn on frame, before deleting initial points. | -| `points_width` | `int` | `15` | Width of line points highlighter. | +| Name | Type | Default | Description | +| ---------------- | ------ | ------- | ---------------------------------------------------- | +| `analytics_type` | `str` | `line` | Type of graph i.e "line", "bar", "area", "pie" | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -330,11 +117,11 @@ Understanding when and how to use different types of visualizations is crucial f ## FAQ -### How do I create a line graph using Ultralytics YOLOv8 Analytics? +### How do I create a line graph using Ultralytics YOLO11 Analytics? -To create a line graph using Ultralytics YOLOv8 Analytics, follow these steps: +To create a line graph using Ultralytics YOLO11 Analytics, follow these steps: -1. Load a YOLOv8 model and open your video file. +1. Load a YOLO11 model and open your video file. 2. Initialize the `Analytics` class with the type set to "line." 3. Iterate through video frames, updating the line graph with relevant data, such as object counts per frame. 4. Save the output video displaying the line graph. @@ -344,21 +131,33 @@ Example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8s.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -analytics = solutions.Analytics(type="line", writer=out, im0_shape=(w, h), view_img=True) +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) +analytics = solutions.Analytics( + analytics_type="line", + show=True, +) + +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - total_counts = sum([1 for box in results[0].boxes.xyxy]) - analytics.update_line(frame_count, total_counts) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -366,11 +165,11 @@ out.release() cv2.destroyAllWindows() ``` -For further details on configuring the `Analytics` class, visit the [Analytics using Ultralytics YOLOv8 ๐Ÿ“Š](#analytics-using-ultralytics-yolov8) section. +For further details on configuring the `Analytics` class, visit the [Analytics using Ultralytics YOLO11 ๐Ÿ“Š](#analytics-using-ultralytics-yolo11) section. -### What are the benefits of using Ultralytics YOLOv8 for creating bar plots? +### What are the benefits of using Ultralytics YOLO11 for creating bar plots? -Using Ultralytics YOLOv8 for creating bar plots offers several benefits: +Using Ultralytics YOLO11 for creating bar plots offers several benefits: 1. **Real-time Data Visualization**: Seamlessly integrate [object detection](https://www.ultralytics.com/glossary/object-detection) results into bar plots for dynamic updates. 2. **Ease of Use**: Simple API and functions make it straightforward to implement and visualize data. @@ -382,24 +181,33 @@ Use the following example to generate a bar plot: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8s.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("bar_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -analytics = solutions.Analytics(type="bar", writer=out, im0_shape=(w, h), view_img=True) +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) +analytics = solutions.Analytics( + analytics_type="bar", + show=True, +) + +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - clswise_count = { - model.names[int(cls)]: boxes.size(0) - for cls, boxes in zip(results[0].boxes.cls.tolist(), results[0].boxes.xyxy) - } - analytics.update_bar(clswise_count) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -409,9 +217,9 @@ cv2.destroyAllWindows() To learn more, visit the [Bar Plot](#visual-samples) section in the guide. -### Why should I use Ultralytics YOLOv8 for creating pie charts in my data visualization projects? +### Why should I use Ultralytics YOLO11 for creating pie charts in my data visualization projects? -Ultralytics YOLOv8 is an excellent choice for creating pie charts because: +Ultralytics YOLO11 is an excellent choice for creating pie charts because: 1. **Integration with Object Detection**: Directly integrate object detection results into pie charts for immediate insights. 2. **User-Friendly API**: Simple to set up and use with minimal code. @@ -423,24 +231,33 @@ Here's a quick example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8s.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("pie_chart.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -analytics = solutions.Analytics(type="pie", writer=out, im0_shape=(w, h), view_img=True) +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) +analytics = solutions.Analytics( + analytics_type="pie", + show=True, +) + +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - clswise_count = { - model.names[int(cls)]: boxes.size(0) - for cls, boxes in zip(results[0].boxes.cls.tolist(), results[0].boxes.xyxy) - } - analytics.update_pie(clswise_count) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -450,30 +267,42 @@ cv2.destroyAllWindows() For more information, refer to the [Pie Chart](#visual-samples) section in the guide. -### Can Ultralytics YOLOv8 be used to track objects and dynamically update visualizations? +### Can Ultralytics YOLO11 be used to track objects and dynamically update visualizations? -Yes, Ultralytics YOLOv8 can be used to track objects and dynamically update visualizations. It supports tracking multiple objects in real-time and can update various visualizations like line graphs, bar plots, and pie charts based on the tracked objects' data. +Yes, Ultralytics YOLO11 can be used to track objects and dynamically update visualizations. It supports tracking multiple objects in real-time and can update various visualizations like line graphs, bar plots, and pie charts based on the tracked objects' data. Example for tracking and updating a line graph: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8s.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") -out = cv2.VideoWriter("line_plot.avi", cv2.VideoWriter_fourcc(*"MJPG"), fps, (w, h)) +assert cap.isOpened(), "Error reading video file" + +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + +out = cv2.VideoWriter( + "ultralytics_analytics.avi", + cv2.VideoWriter_fourcc(*"MJPG"), + fps, + (1920, 1080), # This is fixed +) -analytics = solutions.Analytics(type="line", writer=out, im0_shape=(w, h), view_img=True) +analytics = solutions.Analytics( + analytics_type="line", + show=True, +) +frame_count = 0 while cap.isOpened(): - success, frame = cap.read() + success, im0 = cap.read() if success: - results = model.track(frame, persist=True) - total_counts = sum([1 for box in results[0].boxes.xyxy]) - analytics.update_line(frame_count, total_counts) - if cv2.waitKey(1) & 0xFF == ord("q"): + frame_count += 1 + im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame + out.write(im0) # write the video file + else: break cap.release() @@ -483,11 +312,11 @@ cv2.destroyAllWindows() To learn about the complete functionality, see the [Tracking](../modes/track.md) section. -### What makes Ultralytics YOLOv8 different from other object detection solutions like [OpenCV](https://www.ultralytics.com/glossary/opencv) and [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)? +### What makes Ultralytics YOLO11 different from other object detection solutions like [OpenCV](https://www.ultralytics.com/glossary/opencv) and [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)? -Ultralytics YOLOv8 stands out from other object detection solutions like OpenCV and TensorFlow for multiple reasons: +Ultralytics YOLO11 stands out from other object detection solutions like OpenCV and TensorFlow for multiple reasons: -1. **State-of-the-art [Accuracy](https://www.ultralytics.com/glossary/accuracy)**: YOLOv8 provides superior accuracy in object detection, segmentation, and classification tasks. +1. **State-of-the-art [Accuracy](https://www.ultralytics.com/glossary/accuracy)**: YOLO11 provides superior accuracy in object detection, segmentation, and classification tasks. 2. **Ease of Use**: User-friendly API allows for quick implementation and integration without extensive coding. 3. **Real-time Performance**: Optimized for high-speed inference, suitable for real-time applications. 4. **Diverse Applications**: Supports various tasks including multi-object tracking, custom model training, and exporting to different formats like ONNX, TensorRT, and CoreML. diff --git a/docs/en/guides/azureml-quickstart.md b/docs/en/guides/azureml-quickstart.md index a769eee10d2..d13f9812f74 100644 --- a/docs/en/guides/azureml-quickstart.md +++ b/docs/en/guides/azureml-quickstart.md @@ -1,10 +1,10 @@ --- comments: true -description: Learn how to run YOLOv8 on AzureML. Quickstart instructions for terminal and notebooks to harness Azure's cloud computing for efficient model training. -keywords: YOLOv8, AzureML, machine learning, cloud computing, quickstart, terminal, notebooks, model training, Python SDK, AI, Ultralytics +description: Learn how to run YOLO11 on AzureML. Quickstart instructions for terminal and notebooks to harness Azure's cloud computing for efficient model training. +keywords: YOLO11, AzureML, machine learning, cloud computing, quickstart, terminal, notebooks, model training, Python SDK, AI, Ultralytics --- -# YOLOv8 ๐Ÿš€ on AzureML +# YOLO11 ๐Ÿš€ on AzureML ## What is Azure? @@ -22,7 +22,7 @@ For users of YOLO (You Only Look Once), AzureML provides a robust, scalable, and - Utilize built-in tools for data preprocessing, feature selection, and model training. - Collaborate more efficiently with capabilities for MLOps (Machine Learning Operations), including but not limited to monitoring, auditing, and versioning of models and data. -In the subsequent sections, you will find a quickstart guide detailing how to run YOLOv8 object detection models using AzureML, either from a compute terminal or a notebook. +In the subsequent sections, you will find a quickstart guide detailing how to run YOLO11 object detection models using AzureML, either from a compute terminal or a notebook. ## Prerequisites @@ -46,11 +46,12 @@ Start your compute and open a Terminal: ### Create virtualenv -Create your conda virtualenv and install pip in it: +Create your conda virtualenv with your favorite python version and install pip in it: +Python 3.13.1 is having some issues with some dependencies in AzureML. ```bash -conda create --name yolov8env -y -conda activate yolov8env +conda create --name yolo11env -y python=3.12 +conda activate yolo11env conda install pip -y ``` @@ -63,18 +64,18 @@ pip install ultralytics pip install onnx>=1.12.0 ``` -### Perform YOLOv8 tasks +### Perform YOLO11 tasks Predict: ```bash -yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' +yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg' ``` Train a detection model for 10 [epochs](https://www.ultralytics.com/glossary/epoch) with an initial learning_rate of 0.01: ```bash -yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 +yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 ``` You can find more [instructions to use the Ultralytics CLI here](../quickstart.md#use-ultralytics-with-cli). @@ -89,14 +90,14 @@ Open the compute Terminal. Open Terminal

-From your compute terminal, you need to create a new ipykernel that will be used by your notebook to manage your dependencies: +From your compute terminal, you need to create a new ipykernel (with a specific python version - because Python 3.13.1 is having some issues with some dependencies in AzureML) that will be used by your notebook to manage your dependencies: ```bash -conda create --name yolov8env -y -conda activate yolov8env +conda create --name yolo11env -y python=3.12 +conda activate yolo11env conda install pip -y conda install ipykernel -y -python -m ipykernel install --user --name yolov8env --display-name "yolov8env" +python -m ipykernel install --user --name yolo11env --display-name "yolo11env" ``` Close your terminal and create a new notebook. From your Notebook, you can select the new kernel. @@ -105,21 +106,21 @@ Then you can open a Notebook cell and install the required dependencies: ```bash %%bash -source activate yolov8env +source activate yolo11env cd ultralytics pip install -r requirements.txt pip install ultralytics pip install onnx>=1.12.0 ``` -Note that we need to use the `source activate yolov8env` for all the %%bash cells, to make sure that the %%bash cell uses environment we want. +Note that we need to use the `source activate yolo11env` for all the %%bash cells, to make sure that the %%bash cell uses environment we want. Run some predictions using the [Ultralytics CLI](../quickstart.md#use-ultralytics-with-cli): ```bash %%bash -source activate yolov8env -yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' +source activate yolo11env +yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg' ``` Or with the [Ultralytics Python interface](../quickstart.md#use-ultralytics-with-python), for example to train the model: @@ -128,7 +129,7 @@ Or with the [Ultralytics Python interface](../quickstart.md#use-ultralytics-with from ultralytics import YOLO # Load a model -model = YOLO("yolov8n.pt") # load an official YOLOv8n model +model = YOLO("yolo11n.pt") # load an official YOLO11n model # Use the model model.train(data="coco8.yaml", epochs=3) # train the model @@ -137,47 +138,47 @@ results = model("https://ultralytics.com/images/bus.jpg") # predict on an image path = model.export(format="onnx") # export the model to ONNX format ``` -You can use either the Ultralytics CLI or Python interface for running YOLOv8 tasks, as described in the terminal section above. +You can use either the Ultralytics CLI or Python interface for running YOLO11 tasks, as described in the terminal section above. -By following these steps, you should be able to get YOLOv8 running quickly on AzureML for quick trials. For more advanced uses, you may refer to the full AzureML documentation linked at the beginning of this guide. +By following these steps, you should be able to get YOLO11 running quickly on AzureML for quick trials. For more advanced uses, you may refer to the full AzureML documentation linked at the beginning of this guide. ## Explore More with AzureML -This guide serves as an introduction to get you up and running with YOLOv8 on AzureML. However, it only scratches the surface of what AzureML can offer. To delve deeper and unlock the full potential of AzureML for your machine learning projects, consider exploring the following resources: +This guide serves as an introduction to get you up and running with YOLO11 on AzureML. However, it only scratches the surface of what AzureML can offer. To delve deeper and unlock the full potential of AzureML for your machine learning projects, consider exploring the following resources: - [Create a Data Asset](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets): Learn how to set up and manage your data assets effectively within the AzureML environment. - [Initiate an AzureML Job](https://learn.microsoft.com/azure/machine-learning/how-to-train-model): Get a comprehensive understanding of how to kickstart your machine learning training jobs on AzureML. - [Register a Model](https://learn.microsoft.com/azure/machine-learning/how-to-manage-models): Familiarize yourself with model management practices including registration, versioning, and deployment. -- [Train YOLOv8 with AzureML Python SDK](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azure-machine-learning-python-sdk-8268696be8ba): Explore a step-by-step guide on using the AzureML Python SDK to train your YOLOv8 models. -- [Train YOLOv8 with AzureML CLI](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azureml-and-the-az-cli-73d3c870ba8e): Discover how to utilize the command-line interface for streamlined training and management of YOLOv8 models on AzureML. +- [Train YOLO11 with AzureML Python SDK](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azure-machine-learning-python-sdk-8268696be8ba): Explore a step-by-step guide on using the AzureML Python SDK to train your YOLO11 models. +- [Train YOLO11 with AzureML CLI](https://medium.com/@ouphi/how-to-train-the-yolov8-model-with-azureml-and-the-az-cli-73d3c870ba8e): Discover how to utilize the command-line interface for streamlined training and management of YOLO11 models on AzureML. ## FAQ -### How do I run YOLOv8 on AzureML for model training? +### How do I run YOLO11 on AzureML for model training? -Running YOLOv8 on AzureML for model training involves several steps: +Running YOLO11 on AzureML for model training involves several steps: 1. **Create a Compute Instance**: From your AzureML workspace, navigate to Compute > Compute instances > New, and select the required instance. -2. **Setup Environment**: Start your compute instance, open a terminal, and create a conda environment: +2. **Setup Environment**: Start your compute instance, open a terminal, and create a conda environment, and don't forget to set your python version (python 3.13.1 is not supported yet) : ```bash - conda create --name yolov8env -y - conda activate yolov8env + conda create --name yolo11env -y python=3.12 + conda activate yolo11env conda install pip -y pip install ultralytics onnx>=1.12.0 ``` -3. **Run YOLOv8 Tasks**: Use the Ultralytics CLI to train your model: +3. **Run YOLO11 Tasks**: Use the Ultralytics CLI to train your model: ```bash - yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 + yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 ``` For more details, you can refer to the [instructions to use the Ultralytics CLI](../quickstart.md#use-ultralytics-with-cli). -### What are the benefits of using AzureML for YOLOv8 training? +### What are the benefits of using AzureML for YOLO11 training? -AzureML provides a robust and efficient ecosystem for training YOLOv8 models: +AzureML provides a robust and efficient ecosystem for training YOLO11 models: - **Scalability**: Easily scale your compute resources as your data and model complexity grows. - **MLOps Integration**: Utilize features like versioning, monitoring, and auditing to streamline ML operations. @@ -185,9 +186,9 @@ AzureML provides a robust and efficient ecosystem for training YOLOv8 models: These advantages make AzureML an ideal platform for projects ranging from quick prototypes to large-scale deployments. For more tips, check out [AzureML Jobs](https://learn.microsoft.com/azure/machine-learning/how-to-train-model). -### How do I troubleshoot common issues when running YOLOv8 on AzureML? +### How do I troubleshoot common issues when running YOLO11 on AzureML? -Troubleshooting common issues with YOLOv8 on AzureML can involve the following steps: +Troubleshooting common issues with YOLO11 on AzureML can involve the following steps: - **Dependency Issues**: Ensure all required packages are installed. Refer to the `requirements.txt` file for dependencies. - **Environment Setup**: Verify that your conda environment is correctly activated before running commands. @@ -202,7 +203,7 @@ Yes, AzureML allows you to use both the Ultralytics CLI and the Python interface - **CLI**: Ideal for quick tasks and running standard scripts directly from the terminal. ```bash - yolo predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg' ``` - **Python Interface**: Useful for more complex tasks requiring custom coding and integration within notebooks. @@ -210,18 +211,18 @@ Yes, AzureML allows you to use both the Ultralytics CLI and the Python interface ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.train(data="coco8.yaml", epochs=3) ``` Refer to the quickstart guides for more detailed instructions [here](../quickstart.md#use-ultralytics-with-cli) and [here](../quickstart.md#use-ultralytics-with-python). -### What is the advantage of using Ultralytics YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models? +### What is the advantage of using Ultralytics YOLO11 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models? -Ultralytics YOLOv8 offers several unique advantages over competing object detection models: +Ultralytics YOLO11 offers several unique advantages over competing object detection models: - **Speed**: Faster inference and training times compared to models like Faster R-CNN and SSD. - **[Accuracy](https://www.ultralytics.com/glossary/accuracy)**: High accuracy in detection tasks with features like anchor-free design and enhanced augmentation strategies. - **Ease of Use**: Intuitive API and CLI for quick setup, making it accessible both to beginners and experts. -To explore more about YOLOv8's features, visit the [Ultralytics YOLO](https://www.ultralytics.com/yolo) page for detailed insights. +To explore more about YOLO11's features, visit the [Ultralytics YOLO](https://www.ultralytics.com/yolo) page for detailed insights. diff --git a/docs/en/guides/conda-quickstart.md b/docs/en/guides/conda-quickstart.md index 6b52339260e..b958c85f273 100644 --- a/docs/en/guides/conda-quickstart.md +++ b/docs/en/guides/conda-quickstart.md @@ -37,7 +37,7 @@ This guide provides a comprehensive introduction to setting up a Conda environme First, let's create a new Conda environment. Open your terminal and run the following command: ```bash -conda create --name ultralytics-env python=3.8 -y +conda create --name ultralytics-env python=3.11 -y ``` Activate the new environment: @@ -73,7 +73,7 @@ With Ultralytics installed, you can now start using its robust features for [obj ```python from ultralytics import YOLO -model = YOLO("yolov8n.pt") # initialize model +model = YOLO("yolo11n.pt") # initialize model results = model("path/to/image.jpg") # perform inference results[0].show() # display results for the first image ``` @@ -135,7 +135,7 @@ Congratulations! You have successfully set up a Conda environment, installed the Setting up a Conda environment for Ultralytics projects is straightforward and ensures smooth package management. First, create a new Conda environment using the following command: ```bash -conda create --name ultralytics-env python=3.8 -y +conda create --name ultralytics-env python=3.11 -y ``` Then, activate the new environment with: diff --git a/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md b/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md index db61c08196c..716310598b7 100644 --- a/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md +++ b/docs/en/guides/coral-edge-tpu-on-raspberry-pi.md @@ -1,10 +1,10 @@ --- comments: true -description: Learn how to boost your Raspberry Pi's ML performance using Coral Edge TPU with Ultralytics YOLOv8. Follow our detailed setup and installation guide. -keywords: Coral Edge TPU, Raspberry Pi, YOLOv8, Ultralytics, TensorFlow Lite, ML inference, machine learning, AI, installation guide, setup tutorial +description: Learn how to boost your Raspberry Pi's ML performance using Coral Edge TPU with Ultralytics YOLO11. Follow our detailed setup and installation guide. +keywords: Coral Edge TPU, Raspberry Pi, YOLO11, Ultralytics, TensorFlow Lite, ML inference, machine learning, AI, installation guide, setup tutorial --- -# Coral Edge TPU on a Raspberry Pi with Ultralytics YOLOv8 ๐Ÿš€ +# Coral Edge TPU on a Raspberry Pi with Ultralytics YOLO11 ๐Ÿš€

Raspberry Pi single board computer with USB Edge TPU accelerator @@ -27,11 +27,11 @@ The Coral Edge TPU is a compact device that adds an Edge TPU coprocessor to your ## Boost Raspberry Pi Model Performance with Coral Edge TPU -Many people want to run their models on an embedded or mobile device such as a Raspberry Pi, since they are very power efficient and can be used in many different applications. However, the inference performance on these devices is usually poor even when using formats like [onnx](../integrations/onnx.md) or [openvino](../integrations/openvino.md). The Coral Edge TPU is a great solution to this problem, since it can be used with a Raspberry Pi and accelerate inference performance greatly. +Many people want to run their models on an embedded or mobile device such as a Raspberry Pi, since they are very power efficient and can be used in many different applications. However, the inference performance on these devices is usually poor even when using formats like [ONNX](../integrations/onnx.md) or [OpenVINO](../integrations/openvino.md). The Coral Edge TPU is a great solution to this problem, since it can be used with a Raspberry Pi and accelerate inference performance greatly. ## Edge TPU on Raspberry Pi with TensorFlow Lite (New)โญ -The [existing guide](https://coral.ai/docs/accelerator/get-started/) by Coral on how to use the Edge TPU with a Raspberry Pi is outdated, and the current Coral Edge TPU runtime builds do not work with the current TensorFlow Lite runtime versions anymore. In addition to that, Google seems to have completely abandoned the Coral project, and there have not been any updates between 2021 and 2024. This guide will show you how to get the Edge TPU working with the latest versions of the TensorFlow Lite runtime and an updated Coral Edge TPU runtime on a Raspberry Pi single board computer (SBC). +The [existing guide](https://coral.ai/docs/accelerator/get-started/) by Coral on how to use the Edge TPU with a Raspberry Pi is outdated, and the current Coral Edge TPU runtime builds do not work with the current TensorFlow Lite runtime versions anymore. In addition to that, Google seems to have completely abandoned the Coral project, and there have not been any updates between 2021 and 2025. This guide will show you how to get the Edge TPU working with the latest versions of the TensorFlow Lite runtime and an updated Coral Edge TPU runtime on a Raspberry Pi single board computer (SBC). ## Prerequisites @@ -47,6 +47,7 @@ This guide assumes that you already have a working Raspberry Pi OS install and h ### Installing the Edge TPU runtime First, we need to install the Edge TPU runtime. There are many different versions available, so you need to choose the right version for your operating system. +The high frequency version runs the Edge TPU at a higher clock speed, which improves performance. However, it might result in the Edge TPU thermal throttling, so it is recommended to have some sort of cooling mechanism in place. | Raspberry Pi OS | High frequency mode | Version to download | | --------------- | :-----------------: | ------------------------------------------ | @@ -81,11 +82,11 @@ After installing the runtime, you need to plug in your Coral Edge TPU into a USB sudo apt remove libedgetpu1-max ``` -## Export your model to a Edge TPU compatible model +## Export to Edge TPU To use the Edge TPU, you need to convert your model into a compatible format. It is recommended that you run export on Google Colab, x86_64 Linux machine, using the official [Ultralytics Docker container](docker-quickstart.md), or using [Ultralytics HUB](../hub/quickstart.md), since the Edge TPU compiler is not available on ARM. See the [Export Mode](../modes/export.md) for the available arguments. -!!! note "Exporting the model" +!!! example "Exporting the model" === "Python" @@ -105,13 +106,27 @@ To use the Edge TPU, you need to convert your model into a compatible format. It yolo export model=path/to/model.pt format=edgetpu # Export an official model or custom model ``` -The exported model will be saved in the `_saved_model/` folder with the name `_full_integer_quant_edgetpu.tflite`. +The exported model will be saved in the `_saved_model/` folder with the name `_full_integer_quant_edgetpu.tflite`. It is important that your model ends with the suffix `_edgetpu.tflite`, otherwise ultralytics doesn't know that you're using an Edge TPU model. ## Running the model -After exporting your model, you can run inference with it using the following code: +Before you can actually run the model, you will need to install the correct libraries. -!!! note "Running the model" +If `tensorflow` is installed, uninstall tensorflow with the following command: + +```bash +pip uninstall tensorflow tensorflow-aarch64 +``` + +Then install/update `tflite-runtime`: + +```bash +pip install -U tflite-runtime +``` + +Now you can run inference using the following code: + +!!! example "Running the model" === "Python" @@ -119,7 +134,7 @@ After exporting your model, you can run inference with it using the following co from ultralytics import YOLO # Load a model - model = YOLO("path/to/edgetpu_model.tflite") # Load an official model or custom model + model = YOLO("path/to/_full_integer_quant_edgetpu.tflite") # Load an official model or custom model # Run Prediction model.predict("path/to/source.png") @@ -128,33 +143,69 @@ After exporting your model, you can run inference with it using the following co === "CLI" ```bash - yolo predict model=path/to/edgetpu_model.tflite source=path/to/source.png # Load an official model or custom model + yolo predict model=path/to/_full_integer_quant_edgetpu.tflite source=path/to/source.png # Load an official model or custom model ``` Find comprehensive information on the [Predict](../modes/predict.md) page for full prediction mode details. -???+ warning "Important" +!!! note "Inference with multiple Edge TPUs" - You should run the model using `tflite-runtime` and not `tensorflow`. - If `tensorflow` is installed, uninstall tensorflow with the following command: + If you have multiple Edge TPUs you can use the following code to select a specific TPU. - ```bash - pip uninstall tensorflow tensorflow-aarch64 - ``` + === "Python" - Then install/update `tflite-runtime`: + ```python + from ultralytics import YOLO - ``` - pip install -U tflite-runtime - ``` + # Load a model + model = YOLO("path/to/_full_integer_quant_edgetpu.tflite") # Load an official model or custom model + + # Run Prediction + model.predict("path/to/source.png") # Inference defaults to the first TPU + + model.predict("path/to/source.png", device="tpu:0") # Select the first TPU + + model.predict("path/to/source.png", device="tpu:1") # Select the second TPU + ``` + +## Benchmarks + +!!! tip "Benchmarks" + + Tested with Raspberry Pi Os Bookworm 64-Bit and a USB Coral Edge TPU. + + !!! note + Shown is the inference time, pre-/postprocessing is not included. + + === "Raspberry Pi 4B 2GB" + + | Image Size | Model | Standard Inference Time (ms) | High Frequency Inference Time (ms) | + |------------|---------|------------------------------|------------------------------------| + | 320 | YOLOv8n | 32.2 | 26.7 | + | 320 | YOLOv8s | 47.1 | 39.8 | + | 512 | YOLOv8n | 73.5 | 60.7 | + | 512 | YOLOv8s | 149.6 | 125.3 | + + === "Raspberry Pi 5 8GB" + + | Image Size | Model | Standard Inference Time (ms) | High Frequency Inference Time (ms) | + |------------|---------|------------------------------|------------------------------------| + | 320 | YOLOv8n | 22.2 | 16.7 | + | 320 | YOLOv8s | 40.1 | 32.2 | + | 512 | YOLOv8n | 53.5 | 41.6 | + | 512 | YOLOv8s | 132.0 | 103.3 | + + On average: - If you want a `tflite-runtime` wheel for `tensorflow` 2.15.0 download it from [here](https://github.com/feranick/TFlite-builds/releases) and install it using `pip` or your package manager of choice. + - The Raspberry Pi 5 is 22% faster with the standard mode than the Raspberry Pi 4B. + - The Raspberry Pi 5 is 30.2% faster with the high frequency mode than the Raspberry Pi 4B. + - The high frequency mode is 28.4% faster than the standard mode. ## FAQ -### What is a Coral Edge TPU and how does it enhance Raspberry Pi's performance with Ultralytics YOLOv8? +### What is a Coral Edge TPU and how does it enhance Raspberry Pi's performance with Ultralytics YOLO11? -The Coral Edge TPU is a compact device designed to add an Edge TPU coprocessor to your system. This coprocessor enables low-power, high-performance [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) inference, particularly optimized for TensorFlow Lite models. When using a Raspberry Pi, the Edge TPU accelerates ML model inference, significantly boosting performance, especially for Ultralytics YOLOv8 models. You can read more about the Coral Edge TPU on their [home page](https://coral.ai/products/accelerator). +The Coral Edge TPU is a compact device designed to add an Edge TPU coprocessor to your system. This coprocessor enables low-power, high-performance [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) inference, particularly optimized for TensorFlow Lite models. When using a Raspberry Pi, the Edge TPU accelerates ML model inference, significantly boosting performance, especially for Ultralytics YOLO11 models. You can read more about the Coral Edge TPU on their [home page](https://coral.ai/products/accelerator). ### How do I install the Coral Edge TPU runtime on a Raspberry Pi? @@ -166,9 +217,9 @@ sudo dpkg -i path/to/package.deb Make sure to uninstall any previous Coral Edge TPU runtime versions by following the steps outlined in the [Installation Walkthrough](#installation-walkthrough) section. -### Can I export my Ultralytics YOLOv8 model to be compatible with Coral Edge TPU? +### Can I export my Ultralytics YOLO11 model to be compatible with Coral Edge TPU? -Yes, you can export your Ultralytics YOLOv8 model to be compatible with the Coral Edge TPU. It is recommended to perform the export on Google Colab, an x86_64 Linux machine, or using the [Ultralytics Docker container](docker-quickstart.md). You can also use Ultralytics HUB for exporting. Here is how you can export your model using Python and CLI: +Yes, you can export your Ultralytics YOLO11 model to be compatible with the Coral Edge TPU. It is recommended to perform the export on Google Colab, an x86_64 Linux machine, or using the [Ultralytics Docker container](docker-quickstart.md). You can also use Ultralytics HUB for exporting. Here is how you can export your model using Python and CLI: !!! note "Exporting the model" @@ -192,7 +243,7 @@ Yes, you can export your Ultralytics YOLOv8 model to be compatible with the Cora For more information, refer to the [Export Mode](../modes/export.md) documentation. -### What should I do if TensorFlow is already installed on my Raspberry Pi but I want to use tflite-runtime instead? +### What should I do if TensorFlow is already installed on my Raspberry Pi, but I want to use tflite-runtime instead? If you have TensorFlow installed on your Raspberry Pi and need to switch to `tflite-runtime`, you'll need to uninstall TensorFlow first using: @@ -208,9 +259,9 @@ pip install -U tflite-runtime For a specific wheel, such as TensorFlow 2.15.0 `tflite-runtime`, you can download it from [this link](https://github.com/feranick/TFlite-builds/releases) and install it using `pip`. Detailed instructions are available in the section on running the model [Running the Model](#running-the-model). -### How do I run inference with an exported YOLOv8 model on a Raspberry Pi using the Coral Edge TPU? +### How do I run inference with an exported YOLO11 model on a Raspberry Pi using the Coral Edge TPU? -After exporting your YOLOv8 model to an Edge TPU-compatible format, you can run inference using the following code snippets: +After exporting your YOLO11 model to an Edge TPU-compatible format, you can run inference using the following code snippets: !!! note "Running the model" diff --git a/docs/en/guides/data-collection-and-annotation.md b/docs/en/guides/data-collection-and-annotation.md index dce15e0a682..058323ee29e 100644 --- a/docs/en/guides/data-collection-and-annotation.md +++ b/docs/en/guides/data-collection-and-annotation.md @@ -136,12 +136,12 @@ Bouncing your ideas and queries off other [computer vision](https://www.ultralyt ### Where to Find Help and Support -- **GitHub Issues:** Visit the YOLOv8 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face. +- **GitHub Issues:** Visit the YOLO11 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to connect with other users and developers, get support, share knowledge, and brainstorm ideas. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Refer to the [official YOLOv8 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects. +- **Ultralytics YOLO11 Documentation:** Refer to the [official YOLO11 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects. ## Conclusion @@ -159,7 +159,7 @@ Ensuring high consistency and accuracy in data annotation involves establishing ### How many images do I need for training Ultralytics YOLO models? -For effective [transfer learning](https://www.ultralytics.com/glossary/transfer-learning) and object detection with Ultralytics YOLO models, start with a minimum of a few hundred annotated objects per class. If training for just one class, begin with at least 100 annotated images and train for approximately 100 [epochs](https://www.ultralytics.com/glossary/epoch). More complex tasks might require thousands of images per class to achieve high reliability and performance. Quality annotations are crucial, so ensure your data collection and annotation processes are rigorous and aligned with your project's specific goals. Explore detailed training strategies in the [YOLOv8 training guide](../modes/train.md). +For effective [transfer learning](https://www.ultralytics.com/glossary/transfer-learning) and object detection with Ultralytics YOLO models, start with a minimum of a few hundred annotated objects per class. If training for just one class, begin with at least 100 annotated images and train for approximately 100 [epochs](https://www.ultralytics.com/glossary/epoch). More complex tasks might require thousands of images per class to achieve high reliability and performance. Quality annotations are crucial, so ensure your data collection and annotation processes are rigorous and aligned with your project's specific goals. Explore detailed training strategies in the [YOLO11 training guide](../modes/train.md). ### What are some popular tools for data annotation? diff --git a/docs/en/guides/deepstream-nvidia-jetson.md b/docs/en/guides/deepstream-nvidia-jetson.md index ab15009b993..1170eddc93a 100644 --- a/docs/en/guides/deepstream-nvidia-jetson.md +++ b/docs/en/guides/deepstream-nvidia-jetson.md @@ -1,10 +1,10 @@ --- comments: true -description: Learn how to deploy Ultralytics YOLOv8 on NVIDIA Jetson devices using TensorRT and DeepStream SDK. Explore performance benchmarks and maximize AI capabilities. -keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, embedded systems, deep learning, TensorRT, DeepStream SDK, computer vision +description: Learn how to deploy Ultralytics YOLO11 on NVIDIA Jetson devices using TensorRT and DeepStream SDK. Explore performance benchmarks and maximize AI capabilities. +keywords: Ultralytics, YOLO11, NVIDIA Jetson, JetPack, AI deployment, embedded systems, deep learning, TensorRT, DeepStream SDK, computer vision --- -# Ultralytics YOLOv8 on NVIDIA Jetson using DeepStream SDK and TensorRT +# Ultralytics YOLO11 on NVIDIA Jetson using DeepStream SDK and TensorRT


@@ -14,16 +14,17 @@ keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, embedded s allowfullscreen>
- Watch: How to Run Multiple Streams with DeepStream SDK on Jetson Nano using Ultralytics YOLOv8 + Watch: How to Run Multiple Streams with DeepStream SDK on Jetson Nano using Ultralytics YOLO11

-This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices using DeepStream SDK and TensorRT. Here we use TensorRT to maximize the inference performance on the Jetson platform. +This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices using DeepStream SDK and TensorRT. Here we use TensorRT to maximize the inference performance on the Jetson platform. DeepStream on NVIDIA Jetson !!! note - This guide has been tested with both [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.4](https://developer.nvidia.com/jetpack-sdk-464). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy. + This guide has been tested with [NVIDIA Jetson Orin Nano Super Developer Kit](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit) running the latest stable JetPack release of [JP6.1](https://developer.nvidia.com/embedded/jetpack-sdk-61), + [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.4](https://developer.nvidia.com/jetpack-sdk-464). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy. ## What is NVIDIA DeepStream? @@ -33,48 +34,63 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti Before you start to follow this guide: -- Visit our documentation, [Quick Start Guide: NVIDIA Jetson with Ultralytics YOLOv8](nvidia-jetson.md) to set up your NVIDIA Jetson device with Ultralytics YOLOv8 +- Visit our documentation, [Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO11](nvidia-jetson.md) to set up your NVIDIA Jetson device with Ultralytics YOLO11 - Install [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) according to the JetPack version - For JetPack 4.6.4, install [DeepStream 6.0.1](https://docs.nvidia.com/metropolis/deepstream/6.0.1/dev-guide/text/DS_Quickstart.html) - For JetPack 5.1.3, install [DeepStream 6.3](https://docs.nvidia.com/metropolis/deepstream/6.3/dev-guide/text/DS_Quickstart.html) + - For JetPack 6.1, install [DeepStream 7.1](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_Installation.html) !!! tip In this guide we have used the Debian package method of installing DeepStream SDK to the Jetson device. You can also visit the [DeepStream SDK on Jetson (Archived)](https://developer.nvidia.com/embedded/deepstream-on-jetson-downloads-archived) to access legacy versions of DeepStream. -## DeepStream Configuration for YOLOv8 +## DeepStream Configuration for YOLO11 Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) GitHub repository which includes NVIDIA DeepStream SDK support for YOLO models. We appreciate the efforts of marcoslucianops for his contributions! -1. Install dependencies +1. Install Ultralytics with necessary dependencies ```bash - pip install cmake - pip install onnxsim + cd ~ + pip install -U pip + git clone https://github.com/ultralytics/ultralytics + cd ultralytics + pip install -e ".[export]" onnxslim ``` -2. Clone the following repository +2. Clone the DeepStream-Yolo repository ```bash + cd ~ git clone https://github.com/marcoslucianops/DeepStream-Yolo - cd DeepStream-Yolo ``` -3. Download Ultralytics YOLOv8 detection model (.pt) of your choice from [YOLOv8 releases](https://github.com/ultralytics/assets/releases). Here we use [yolov8s.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt). +3. Copy the `export_yoloV8.py` file from `DeepStream-Yolo/utils` directory to the `ultralytics` folder ```bash - wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt + cp ~/DeepStream-Yolo/utils/export_yoloV8.py ~/ultralytics + cd ultralytics ``` !!! note - You can also use a [custom trained YOLOv8 model](https://docs.ultralytics.com/modes/train/). + `export_yoloV8.py` works for both YOLOv8 and YOLO11 models. -4. Convert model to ONNX +4. Download Ultralytics YOLO11 detection model (.pt) of your choice from [YOLO11 releases](https://github.com/ultralytics/assets/releases). Here we use [yolo11s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt). ```bash - python3 utils/export_yoloV8.py -w yolov8s.pt + wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt + ``` + + !!! note + + You can also use a [custom trained YOLO11 model](https://docs.ultralytics.com/modes/train/). + +5. Convert model to ONNX + + ```bash + python3 export_yoloV8.py -w yolo11s.pt ``` !!! note "Pass the below arguments to the above command" @@ -120,7 +136,14 @@ Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcosluc --batch 4 ``` -5. Set the CUDA version according to the JetPack version installed +6. Copy the generated `.onnx` model file and `labels.txt` file to the `DeepStream-Yolo` folder + + ```bash + cp yolo11s.pt.onnx labels.txt ~/DeepStream-Yolo + cd ~/DeepStream-Yolo + ``` + +7. Set the CUDA version according to the JetPack version installed For JetPack 4.6.4: @@ -134,24 +157,30 @@ Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcosluc export CUDA_VER=11.4 ``` -6. Compile the library + For Jetpack 6.1: + + ```bash + export CUDA_VER=12.6 + ``` + +8. Compile the library ```bash make -C nvdsinfer_custom_impl_Yolo clean && make -C nvdsinfer_custom_impl_Yolo ``` -7. Edit the `config_infer_primary_yoloV8.txt` file according to your model (for YOLOv8s with 80 classes) +9. Edit the `config_infer_primary_yoloV8.txt` file according to your model (for YOLO11s with 80 classes) ```bash [property] ... - onnx-file=yolov8s.onnx + onnx-file=yolo11s.pt.onnx ... num-detected-classes=80 ... ``` -8. Edit the `deepstream_app_config` file +10. Edit the `deepstream_app_config` file ```bash ... @@ -160,7 +189,7 @@ Here we are using [marcoslucianops/DeepStream-Yolo](https://github.com/marcosluc config-file=config_infer_primary_yoloV8.txt ``` -9. You can also change the video source in `deepstream_app_config` file. Here a default video file is loaded +11. You can also change the video source in `deepstream_app_config` file. Here a default video file is loaded ```bash ... @@ -179,16 +208,20 @@ deepstream-app -c deepstream_app_config.txt It will take a long time to generate the TensorRT engine file before starting the inference. So please be patient. -
YOLOv8 with deepstream
+
YOLO11 with deepstream
!!! tip - If you want to convert the model to FP16 [precision](https://www.ultralytics.com/glossary/precision), simply set `model-engine-file=model_b1_gpu0_fp16.engine` and `network-mode=2` inside `config_infer_primary_yoloV8.txt` + If you want to convert the model to FP16 precision, simply set `model-engine-file=model_b1_gpu0_fp16.engine` and `network-mode=2` inside `config_infer_primary_yoloV8.txt` ## INT8 Calibration If you want to use INT8 precision for inference, you need to follow the steps below +!!! note + + Currently INT8 does not work with TensorRT 10.x. This section of the guide has been tested with TensorRT 8.x which is expected to work. + 1. Set `OPENCV` environment variable ```bash @@ -303,50 +336,92 @@ deepstream-app -c deepstream_app_config.txt ## Benchmark Results -The following table summarizes how YOLOv8s models perform at different TensorRT precision levels with an input size of 640x640 on NVIDIA Jetson Orin NX 16GB. +The following benchmarks summarizes how YOLO11 models perform at different TensorRT precision levels with an input size of 640x640 on NVIDIA Jetson Orin NX 16GB. + +### Comparison Chart + +
Jetson DeepStream Benchmarks Chart
+ +### Detailed Comparison Table + +!!! performance + + === "YOLO11n" + + | Format | Status | Inference time (ms/im) | + |-----------------|--------|------------------------| + | TensorRT (FP32) | โœ… | 8.64 | + | TensorRT (FP16) | โœ… | 5.27 | + | TensorRT (INT8) | โœ… | 4.54 | + + === "YOLO11s" + + | Format | Status | Inference time (ms/im) | + |-----------------|--------|------------------------| + | TensorRT (FP32) | โœ… | 14.53 | + | TensorRT (FP16) | โœ… | 7.91 | + | TensorRT (INT8) | โœ… | 6.05 | + + === "YOLO11m" + + | Format | Status | Inference time (ms/im) | + |-----------------|--------|------------------------| + | TensorRT (FP32) | โœ… | 32.05 | + | TensorRT (FP16) | โœ… | 15.55 | + | TensorRT (INT8) | โœ… | 10.43 | + + === "YOLO11l" + + | Format | Status | Inference time (ms/im) | + |-----------------|--------|------------------------| + | TensorRT (FP32) | โœ… | 39.68 | + | TensorRT (FP16) | โœ… | 19.88 | + | TensorRT (INT8) | โœ… | 13.64 | + + === "YOLO11x" -| Model Name | Precision | Inference Time (ms/im) | FPS | -| ---------- | --------- | ---------------------- | --- | -| YOLOv8s | FP32 | 15.63 | 64 | -| | FP16 | 7.94 | 126 | -| | INT8 | 5.53 | 181 | + | Format | Status | Inference time (ms/im) | + |-----------------|--------|------------------------| + | TensorRT (FP32) | โœ… | 80.65 | + | TensorRT (FP16) | โœ… | 39.06 | + | TensorRT (INT8) | โœ… | 22.83 | -### Acknowledgements +## Acknowledgements This guide was initially created by our friends at Seeed Studio, Lakshantha and Elaine. ## FAQ -### How do I set up Ultralytics YOLOv8 on an NVIDIA Jetson device? +### How do I set up Ultralytics YOLO11 on an NVIDIA Jetson device? -To set up Ultralytics YOLOv8 on an [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) device, you first need to install the [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) compatible with your JetPack version. Follow the step-by-step guide in our [Quick Start Guide](nvidia-jetson.md) to configure your NVIDIA Jetson for YOLOv8 deployment. +To set up Ultralytics YOLO11 on an [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) device, you first need to install the [DeepStream SDK](https://developer.nvidia.com/deepstream-getting-started) compatible with your JetPack version. Follow the step-by-step guide in our [Quick Start Guide](nvidia-jetson.md) to configure your NVIDIA Jetson for YOLO11 deployment. -### What is the benefit of using TensorRT with YOLOv8 on NVIDIA Jetson? +### What is the benefit of using TensorRT with YOLO11 on NVIDIA Jetson? -Using TensorRT with YOLOv8 optimizes the model for inference, significantly reducing latency and improving throughput on NVIDIA Jetson devices. TensorRT provides high-performance, low-latency [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference through layer fusion, precision calibration, and kernel auto-tuning. This leads to faster and more efficient execution, particularly useful for real-time applications like video analytics and autonomous machines. +Using TensorRT with YOLO11 optimizes the model for inference, significantly reducing latency and improving throughput on NVIDIA Jetson devices. TensorRT provides high-performance, low-latency [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference through layer fusion, precision calibration, and kernel auto-tuning. This leads to faster and more efficient execution, particularly useful for real-time applications like video analytics and autonomous machines. -### Can I run Ultralytics YOLOv8 with DeepStream SDK across different NVIDIA Jetson hardware? +### Can I run Ultralytics YOLO11 with DeepStream SDK across different NVIDIA Jetson hardware? -Yes, the guide for deploying Ultralytics YOLOv8 with the DeepStream SDK and TensorRT is compatible across the entire NVIDIA Jetson lineup. This includes devices like the Jetson Orin NX 16GB with [JetPack 5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and the Jetson Nano 4GB with [JetPack 4.6.4](https://developer.nvidia.com/jetpack-sdk-464). Refer to the section [DeepStream Configuration for YOLOv8](#deepstream-configuration-for-yolov8) for detailed steps. +Yes, the guide for deploying Ultralytics YOLO11 with the DeepStream SDK and TensorRT is compatible across the entire NVIDIA Jetson lineup. This includes devices like the Jetson Orin NX 16GB with [JetPack 5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and the Jetson Nano 4GB with [JetPack 4.6.4](https://developer.nvidia.com/jetpack-sdk-464). Refer to the section [DeepStream Configuration for YOLO11](#deepstream-configuration-for-yolo11) for detailed steps. -### How can I convert a YOLOv8 model to ONNX for DeepStream? +### How can I convert a YOLO11 model to ONNX for DeepStream? -To convert a YOLOv8 model to ONNX format for deployment with DeepStream, use the `utils/export_yoloV8.py` script from the [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) repository. +To convert a YOLO11 model to ONNX format for deployment with DeepStream, use the `utils/export_yoloV8.py` script from the [DeepStream-Yolo](https://github.com/marcoslucianops/DeepStream-Yolo) repository. Here's an example command: ```bash -python3 utils/export_yoloV8.py -w yolov8s.pt --opset 12 --simplify +python3 utils/export_yoloV8.py -w yolo11s.pt --opset 12 --simplify ``` For more details on model conversion, check out our [model export section](../modes/export.md). -### What are the performance benchmarks for YOLOv8 on NVIDIA Jetson Orin NX? +### What are the performance benchmarks for YOLO on NVIDIA Jetson Orin NX? -The performance of YOLOv8 models on NVIDIA Jetson Orin NX 16GB varies based on TensorRT precision levels. For example, YOLOv8s models achieve: +The performance of YOLO11 models on NVIDIA Jetson Orin NX 16GB varies based on TensorRT precision levels. For example, YOLO11s models achieve: -- **FP32 Precision**: 15.63 ms/im, 64 FPS +- **FP32 Precision**: 14.6 ms/im, 68.5 FPS - **FP16 Precision**: 7.94 ms/im, 126 FPS -- **INT8 Precision**: 5.53 ms/im, 181 FPS +- **INT8 Precision**: 5.95 ms/im, 168 FPS -These benchmarks underscore the efficiency and capability of using TensorRT-optimized YOLOv8 models on NVIDIA Jetson hardware. For further details, see our [Benchmark Results](#benchmark-results) section. +These benchmarks underscore the efficiency and capability of using TensorRT-optimized YOLO11 models on NVIDIA Jetson hardware. For further details, see our [Benchmark Results](#benchmark-results) section. diff --git a/docs/en/guides/defining-project-goals.md b/docs/en/guides/defining-project-goals.md index c5e3c58cf32..2a5dc1b124e 100644 --- a/docs/en/guides/defining-project-goals.md +++ b/docs/en/guides/defining-project-goals.md @@ -1,7 +1,7 @@ --- comments: true description: Learn how to define clear goals and objectives for your computer vision project with our practical guide. Includes tips on problem statements, measurable objectives, and key decisions. -keywords: computer vision, project planning, problem statement, measurable objectives, dataset preparation, model selection, YOLOv8, Ultralytics +keywords: computer vision, project planning, problem statement, measurable objectives, dataset preparation, model selection, YOLO11, Ultralytics --- # A Practical Guide for Defining Your [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Project @@ -30,7 +30,7 @@ Let's walk through an example. Consider a computer vision project where you want to [estimate the speed of vehicles](./speed-estimation.md) on a highway. The core issue is that current speed monitoring methods are inefficient and error-prone due to outdated radar systems and manual processes. The project aims to develop a real-time computer vision system that can replace legacy [speed estimation](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) systems.

- Speed Estimation Using YOLOv8 + Speed Estimation Using YOLO11

Primary users include traffic management authorities and law enforcement, while secondary stakeholders are highway planners and the public benefiting from safer roads. Key requirements involve evaluating budget, time, and personnel, as well as addressing technical needs like high-resolution cameras and real-time data processing. Additionally, regulatory constraints on privacy and [data security](https://www.ultralytics.com/glossary/data-security) must be considered. @@ -85,7 +85,7 @@ The most popular computer vision tasks include [image classification](https://ww Overview of Computer Vision Tasks

-For a detailed explanation of various tasks, please take a look at the Ultralytics Docs page on [YOLOv8 Tasks](../tasks/index.md). +For a detailed explanation of various tasks, please take a look at the Ultralytics Docs page on [YOLO11 Tasks](../tasks/index.md). ### Can a Pre-trained Model Remember Classes It Knew Before Custom Training? @@ -114,12 +114,12 @@ Connecting with other computer vision enthusiasts can be incredibly helpful for ### Community Support Channels -- **GitHub Issues:** Head over to the YOLOv8 GitHub repository. You can use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers can assist with specific problems you encounter. +- **GitHub Issues:** Head over to the YOLO11 GitHub repository. You can use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers can assist with specific problems you encounter. - **Ultralytics Discord Server:** Become part of the [Ultralytics Discord server](https://discord.com/invite/ultralytics). Connect with fellow users and developers, seek support, exchange knowledge, and discuss ideas. ### Comprehensive Guides and Documentation -- **Ultralytics YOLOv8 Documentation:** Explore the [official YOLOv8 documentation](./index.md) for in-depth guides and valuable tips on various computer vision tasks and projects. +- **Ultralytics YOLO11 Documentation:** Explore the [official YOLO11 documentation](./index.md) for in-depth guides and valuable tips on various computer vision tasks and projects. ## Conclusion @@ -138,11 +138,11 @@ To define a clear problem statement for your Ultralytics computer vision project Providing a well-defined problem statement ensures that the project remains focused and aligned with your objectives. For a detailed guide, refer to our [practical guide](#defining-a-clear-problem-statement). -### Why should I use Ultralytics YOLOv8 for speed estimation in my computer vision project? +### Why should I use Ultralytics YOLO11 for speed estimation in my computer vision project? -Ultralytics YOLOv8 is ideal for speed estimation because of its real-time object tracking capabilities, high accuracy, and robust performance in detecting and monitoring vehicle speeds. It overcomes inefficiencies and inaccuracies of traditional radar systems by leveraging cutting-edge computer vision technology. Check out our blog on [speed estimation using YOLOv8](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) for more insights and practical examples. +Ultralytics YOLO11 is ideal for speed estimation because of its real-time object tracking capabilities, high accuracy, and robust performance in detecting and monitoring vehicle speeds. It overcomes inefficiencies and inaccuracies of traditional radar systems by leveraging cutting-edge computer vision technology. Check out our blog on [speed estimation using YOLO11](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) for more insights and practical examples. -### How do I set effective measurable objectives for my computer vision project with Ultralytics YOLOv8? +### How do I set effective measurable objectives for my computer vision project with Ultralytics YOLO11? Set effective and measurable objectives using the SMART criteria: diff --git a/docs/en/guides/distance-calculation.md b/docs/en/guides/distance-calculation.md index 443b208b706..c9775124d4d 100644 --- a/docs/en/guides/distance-calculation.md +++ b/docs/en/guides/distance-calculation.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to calculate distances between objects using Ultralytics YOLOv8 for accurate spatial positioning and scene understanding. -keywords: Ultralytics, YOLOv8, distance calculation, computer vision, object tracking, spatial positioning +description: Learn how to calculate distances between objects using Ultralytics YOLO11 for accurate spatial positioning and scene understanding. +keywords: Ultralytics, YOLO11, distance calculation, computer vision, object tracking, spatial positioning --- -# Distance Calculation using Ultralytics YOLOv8 +# Distance Calculation using Ultralytics YOLO11 ## What is Distance Calculation? -Measuring the gap between two objects is known as distance calculation within a specified space. In the case of [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics), the [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroid is employed to calculate the distance for bounding boxes highlighted by the user. +Measuring the gap between two objects is known as distance calculation within a specified space. In the case of [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics), the [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroid is employed to calculate the distance for bounding boxes highlighted by the user.


@@ -18,14 +18,14 @@ Measuring the gap between two objects is known as distance calculation within a allowfullscreen>
- Watch: Distance Calculation using Ultralytics YOLOv8 + Watch: Distance Calculation using Ultralytics YOLO11

## Visuals -| Distance Calculation using Ultralytics YOLOv8 | +| Distance Calculation using Ultralytics YOLO11 | | :---------------------------------------------------------------------------------------------------------------------------: | -| ![Ultralytics YOLOv8 Distance Calculation](https://github.com/ultralytics/docs/releases/download/0/distance-calculation.avif) | +| ![Ultralytics YOLO11 Distance Calculation](https://github.com/ultralytics/docs/releases/download/0/distance-calculation.avif) | ## Advantages of Distance Calculation? @@ -36,19 +36,16 @@ Measuring the gap between two objects is known as distance calculation within a - Click on any two bounding boxes with Left Mouse click for distance calculation -!!! example "Distance Calculation using YOLOv8 Example" +!!! example "Distance Calculation using YOLO11 Example" === "Video Stream" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolov8n.pt") - names = model.model.names - - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -56,16 +53,15 @@ Measuring the gap between two objects is known as distance calculation within a video_writer = cv2.VideoWriter("distance_calculation.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) # Init distance-calculation obj - dist_obj = solutions.DistanceCalculation(names=names, view_img=True) + distance = solutions.DistanceCalculation(model="yolo11n.pt", show=True) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - - tracks = model.track(im0, persist=True, show=False) - im0 = dist_obj.start_process(im0, tracks) + im0 = distance.calculate(im0) video_writer.write(im0) cap.release() @@ -84,13 +80,11 @@ Measuring the gap between two objects is known as distance calculation within a ### Arguments `DistanceCalculation()` -| `Name` | `Type` | `Default` | Description | -| ---------------- | ------- | --------------- | --------------------------------------------------------- | -| `names` | `dict` | `None` | Dictionary of classes names. | -| `view_img` | `bool` | `False` | Flag to indicate if the video stream should be displayed. | -| `line_thickness` | `int` | `2` | Thickness of the lines drawn on the image. | -| `line_color` | `tuple` | `(255, 255, 0)` | Color of the lines drawn on the image (BGR format). | -| `centroid_color` | `tuple` | `(255, 0, 255)` | Color of the centroids drawn (BGR format). | +| `Name` | `Type` | `Default` | Description | +| ------------ | ------ | --------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -98,34 +92,32 @@ Measuring the gap between two objects is known as distance calculation within a ## FAQ -### How do I calculate distances between objects using Ultralytics YOLOv8? +### How do I calculate distances between objects using Ultralytics YOLO11? -To calculate distances between objects using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics), you need to identify the bounding box centroids of the detected objects. This process involves initializing the `DistanceCalculation` class from Ultralytics' `solutions` module and using the model's tracking outputs to calculate the distances. You can refer to the implementation in the [distance calculation example](#distance-calculation-using-ultralytics-yolov8). +To calculate distances between objects using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics), you need to identify the bounding box centroids of the detected objects. This process involves initializing the `DistanceCalculation` class from Ultralytics' `solutions` module and using the model's tracking outputs to calculate the distances. You can refer to the implementation in the [distance calculation example](#distance-calculation-using-ultralytics-yolo11). -### What are the advantages of using distance calculation with Ultralytics YOLOv8? +### What are the advantages of using distance calculation with Ultralytics YOLO11? -Using distance calculation with Ultralytics YOLOv8 offers several advantages: +Using distance calculation with Ultralytics YOLO11 offers several advantages: - **Localization Precision:** Provides accurate spatial positioning for objects. - **Size Estimation:** Helps estimate physical sizes, contributing to better contextual understanding. - **Scene Understanding:** Enhances 3D scene comprehension, aiding improved decision-making in applications like autonomous driving and surveillance. -### Can I perform distance calculation in real-time video streams with Ultralytics YOLOv8? +### Can I perform distance calculation in real-time video streams with Ultralytics YOLO11? -Yes, you can perform distance calculation in real-time video streams with Ultralytics YOLOv8. The process involves capturing video frames using [OpenCV](https://www.ultralytics.com/glossary/opencv), running YOLOv8 [object detection](https://www.ultralytics.com/glossary/object-detection), and using the `DistanceCalculation` class to calculate distances between objects in successive frames. For a detailed implementation, see the [video stream example](#distance-calculation-using-ultralytics-yolov8). +Yes, you can perform distance calculation in real-time video streams with Ultralytics YOLO11. The process involves capturing video frames using [OpenCV](https://www.ultralytics.com/glossary/opencv), running YOLO11 [object detection](https://www.ultralytics.com/glossary/object-detection), and using the `DistanceCalculation` class to calculate distances between objects in successive frames. For a detailed implementation, see the [video stream example](#distance-calculation-using-ultralytics-yolo11). -### How do I delete points drawn during distance calculation using Ultralytics YOLOv8? +### How do I delete points drawn during distance calculation using Ultralytics YOLO11? -To delete points drawn during distance calculation with Ultralytics YOLOv8, you can use a right mouse click. This action will clear all the points you have drawn. For more details, refer to the note section under the [distance calculation example](#distance-calculation-using-ultralytics-yolov8). +To delete points drawn during distance calculation with Ultralytics YOLO11, you can use a right mouse click. This action will clear all the points you have drawn. For more details, refer to the note section under the [distance calculation example](#distance-calculation-using-ultralytics-yolo11). -### What are the key arguments for initializing the DistanceCalculation class in Ultralytics YOLOv8? +### What are the key arguments for initializing the DistanceCalculation class in Ultralytics YOLO11? -The key arguments for initializing the `DistanceCalculation` class in Ultralytics YOLOv8 include: +The key arguments for initializing the `DistanceCalculation` class in Ultralytics YOLO11 include: -- `names`: Dictionary mapping class indices to class names. -- `view_img`: Flag to indicate if the video stream should be displayed. -- `line_thickness`: Thickness of the lines drawn on the image. -- `line_color`: Color of the lines drawn on the image (BGR format). -- `centroid_color`: Color of the centroids (BGR format). +- `model`: Model file path. +- `show`: Flag to indicate if the video stream should be displayed. +- `line_width`: Thickness of bounding box and the lines drawn on the image. For an exhaustive list and default values, see the [arguments of DistanceCalculation](#arguments-distancecalculation). diff --git a/docs/en/guides/docker-quickstart.md b/docs/en/guides/docker-quickstart.md index 3ee48946c97..f08d62ad3cd 100644 --- a/docs/en/guides/docker-quickstart.md +++ b/docs/en/guides/docker-quickstart.md @@ -98,7 +98,7 @@ Here's how to execute the Ultralytics Docker container: ### Using only the CPU ```bash -# Run with all GPUs +# Run without GPU sudo docker run -it --ipc=host $t ``` @@ -197,10 +197,10 @@ Setup and configuration of an X11 or Wayland display server is outside the scope ### Using Docker with a GUI -Now you can display graphical applications inside your Docker container. For example, you can run the following [CLI command](../usage/cli.md) to visualize the [predictions](../modes/predict.md) from a [YOLOv8 model](../models/yolov8.md): +Now you can display graphical applications inside your Docker container. For example, you can run the following [CLI command](../usage/cli.md) to visualize the [predictions](../modes/predict.md) from a [YOLO11 model](../models/yolo11.md): ```bash -yolo predict model=yolov8n.pt show=True +yolo predict model=yolo11n.pt show=True ``` ??? info "Testing" diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md index d2ebd4b14bc..5310eb98ca2 100644 --- a/docs/en/guides/heatmaps.md +++ b/docs/en/guides/heatmaps.md @@ -1,14 +1,16 @@ --- comments: true -description: Transform complex data into insightful heatmaps using Ultralytics YOLOv8. Discover patterns, trends, and anomalies with vibrant visualizations. -keywords: Ultralytics, YOLOv8, heatmaps, data visualization, data analysis, complex data, patterns, trends, anomalies +description: Transform complex data into insightful heatmaps using Ultralytics YOLO11. Discover patterns, trends, and anomalies with vibrant visualizations. +keywords: Ultralytics, YOLO11, heatmaps, data visualization, data analysis, complex data, patterns, trends, anomalies --- -# Advanced [Data Visualization](https://www.ultralytics.com/glossary/data-visualization): Heatmaps using Ultralytics YOLOv8 ๐Ÿš€ +# Advanced [Data Visualization](https://www.ultralytics.com/glossary/data-visualization): Heatmaps using Ultralytics YOLO11 ๐Ÿš€ ## Introduction to Heatmaps -A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains. +Open Heatmaps In Colab + +A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.


@@ -18,7 +20,7 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult allowfullscreen>
- Watch: Heatmaps using Ultralytics YOLOv8 + Watch: Heatmaps using Ultralytics YOLO11

## Why Choose Heatmaps for Data Analysis? @@ -31,237 +33,65 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult | Transportation | Retail | | :--------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | -| ![Ultralytics YOLOv8 Transportation Heatmap](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-transportation-heatmap.avif) | ![Ultralytics YOLOv8 Retail Heatmap](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-retail-heatmap.avif) | -| Ultralytics YOLOv8 Transportation Heatmap | Ultralytics YOLOv8 Retail Heatmap | - -!!! tip "Heatmap Configuration" - - - `heatmap_alpha`: Ensure this value is within the range (0.0 - 1.0). - - `decay_factor`: Used for removing heatmap after an object is no longer in the frame, its value should also be in the range (0.0 - 1.0). - -!!! example "Heatmaps using Ultralytics YOLOv8 Example" - - === "Heatmap" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Video writer - video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - # Init heatmap - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - names=model.names, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - tracks = model.track(im0, persist=True, show=False) - - im0 = heatmap_obj.generate_heatmap(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` - - === "Line Counting" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Video writer - video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - line_points = [(20, 400), (1080, 404)] # line for object counting - - # Init heatmap - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - count_reg_pts=line_points, - names=model.names, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` - - === "Polygon Counting" +| ![Ultralytics YOLO11 Transportation Heatmap](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-transportation-heatmap.avif) | ![Ultralytics YOLO11 Retail Heatmap](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-retail-heatmap.avif) | +| Ultralytics YOLO11 Transportation Heatmap | Ultralytics YOLO11 Retail Heatmap | - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Video writer - video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - # Define polygon points - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] - - # Init heatmap - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - count_reg_pts=region_points, - names=model.names, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` - - === "Region Counting" - - ```python - import cv2 +!!! example "Heatmaps using Ultralytics YOLO11 Example" - from ultralytics import YOLO, solutions + === "CLI" - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Video writer - video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - # Define region points - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] - - # Init heatmap - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - count_reg_pts=region_points, - names=model.names, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break + ```bash + # Run a heatmap example + yolo solutions heatmap show=True - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` + # Pass a source video + yolo solutions heatmap source="path/to/video/file.mp4" - === "Im0" + # Pass a custom colormap + yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8s.pt") # YOLOv8 custom/pretrained model - - im0 = cv2.imread("path/to/image.png") # path to image file - h, w = im0.shape[:2] # image height and width - - # Heatmap Init - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - names=model.names, - ) - - results = model.track(im0, persist=True) - im0 = heatmap_obj.generate_heatmap(im0, tracks=results) - cv2.imwrite("ultralytics_output.png", im0) + # Heatmaps + object counting + yolo solutions heatmap region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] ``` - === "Specific Classes" + === "Python" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) # Video writer video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - classes_for_heatmap = [0, 2] # classes for heatmap + # In case you want to apply object counting + heatmaps, you can pass region points. + # region_points = [(20, 400), (1080, 400)] # Define line points + # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Define region points + # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)] # Define polygon points # Init heatmap - heatmap_obj = solutions.Heatmap( - colormap=cv2.COLORMAP_PARULA, - view_img=True, - shape="circle", - names=model.names, + heatmap = solutions.Heatmap( + show=True, # Display the output + model="yolo11n.pt", # Path to the YOLO11 model file + colormap=cv2.COLORMAP_PARULA, # Colormap of heatmap + # region=region_points, # If you want to do object counting with heatmaps, you can pass region_points + # classes=[0, 2], # If you want to generate heatmap for specific classes i.e person and car. + # show_in=True, # Display in counts + # show_out=True, # Display out counts + # line_width=2, # Adjust the line width for bounding boxes and text display ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False, classes=classes_for_heatmap) - - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) video_writer.write(im0) cap.release() @@ -271,25 +101,15 @@ A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ult ### Arguments `Heatmap()` -| Name | Type | Default | Description | -| ------------------ | ---------------- | ------------------ | ----------------------------------------------------------------- | -| `names` | `list` | `None` | Dictionary of class names. | -| `imw` | `int` | `0` | Image width. | -| `imh` | `int` | `0` | Image height. | -| `colormap` | `int` | `cv2.COLORMAP_JET` | Colormap to use for the heatmap. | -| `heatmap_alpha` | `float` | `0.5` | Alpha blending value for heatmap overlay. | -| `view_img` | `bool` | `False` | Whether to display the image with the heatmap overlay. | -| `view_in_counts` | `bool` | `True` | Whether to display the count of objects entering the region. | -| `view_out_counts` | `bool` | `True` | Whether to display the count of objects exiting the region. | -| `count_reg_pts` | `list` or `None` | `None` | Points defining the counting region (either a line or a polygon). | -| `count_txt_color` | `tuple` | `(0, 0, 0)` | Text color for displaying counts. | -| `count_bg_color` | `tuple` | `(255, 255, 255)` | Background color for displaying counts. | -| `count_reg_color` | `tuple` | `(255, 0, 255)` | Color for the counting region. | -| `region_thickness` | `int` | `5` | Thickness of the region line. | -| `line_dist_thresh` | `int` | `15` | Distance threshold for line-based counting. | -| `line_thickness` | `int` | `2` | Thickness of the lines used in drawing. | -| `decay_factor` | `float` | `0.99` | Decay factor for the heatmap to reduce intensity over time. | -| `shape` | `str` | `"circle"` | Shape of the heatmap blobs ('circle' or 'rect'). | +| Name | Type | Default | Description | +| ------------ | ------ | ------------------ | ----------------------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `colormap` | `int` | `cv2.COLORMAP_JET` | Colormap to use for the heatmap. | +| `show` | `bool` | `False` | Whether to display the image with the heatmap overlay. | +| `show_in` | `bool` | `True` | Whether to display the count of objects entering the region. | +| `show_out` | `bool` | `True` | Whether to display the count of objects exiting the region. | +| `region` | `list` | `None` | Points defining the counting region (either a line or a polygon). | +| `line_width` | `int` | `2` | Thickness of the lines used in drawing. | ### Arguments `model.track` @@ -326,29 +146,27 @@ These colormaps are commonly used for visualizing data with different color repr ## FAQ -### How does Ultralytics YOLOv8 generate heatmaps and what are their benefits? +### How does Ultralytics YOLO11 generate heatmaps and what are their benefits? -Ultralytics YOLOv8 generates heatmaps by transforming complex data into a color-coded matrix where different hues represent data intensities. Heatmaps make it easier to visualize patterns, correlations, and anomalies in the data. Warmer hues indicate higher values, while cooler tones represent lower values. The primary benefits include intuitive visualization of data distribution, efficient pattern detection, and enhanced spatial analysis for decision-making. For more details and configuration options, refer to the [Heatmap Configuration](#arguments-heatmap) section. +Ultralytics YOLO11 generates heatmaps by transforming complex data into a color-coded matrix where different hues represent data intensities. Heatmaps make it easier to visualize patterns, correlations, and anomalies in the data. Warmer hues indicate higher values, while cooler tones represent lower values. The primary benefits include intuitive visualization of data distribution, efficient pattern detection, and enhanced spatial analysis for decision-making. For more details and configuration options, refer to the [Heatmap Configuration](#arguments-heatmap) section. -### Can I use Ultralytics YOLOv8 to perform object tracking and generate a heatmap simultaneously? +### Can I use Ultralytics YOLO11 to perform object tracking and generate a heatmap simultaneously? -Yes, Ultralytics YOLOv8 supports object tracking and heatmap generation concurrently. This can be achieved through its `Heatmap` solution integrated with object tracking models. To do so, you need to initialize the heatmap object and use YOLOv8's tracking capabilities. Here's a simple example: +Yes, Ultralytics YOLO11 supports object tracking and heatmap generation concurrently. This can be achieved through its `Heatmap` solution integrated with object tracking models. To do so, you need to initialize the heatmap object and use YOLO11's tracking capabilities. Here's a simple example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") -heatmap_obj = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, view_img=True, shape="circle", names=model.names) +heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, show=True, model="yolo11n.pt") while cap.isOpened(): success, im0 = cap.read() if not success: break - tracks = model.track(im0, persist=True, show=False) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) cv2.imshow("Heatmap", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -359,30 +177,27 @@ cv2.destroyAllWindows() For further guidance, check the [Tracking Mode](../modes/track.md) page. -### What makes Ultralytics YOLOv8 heatmaps different from other data visualization tools like those from [OpenCV](https://www.ultralytics.com/glossary/opencv) or Matplotlib? +### What makes Ultralytics YOLO11 heatmaps different from other data visualization tools like those from [OpenCV](https://www.ultralytics.com/glossary/opencv) or Matplotlib? -Ultralytics YOLOv8 heatmaps are specifically designed for integration with its [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking models, providing an end-to-end solution for real-time data analysis. Unlike generic visualization tools like OpenCV or Matplotlib, YOLOv8 heatmaps are optimized for performance and automated processing, supporting features like persistent tracking, decay factor adjustment, and real-time video overlay. For more information on YOLOv8's unique features, visit the [Ultralytics YOLOv8 Introduction](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8). +Ultralytics YOLO11 heatmaps are specifically designed for integration with its [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking models, providing an end-to-end solution for real-time data analysis. Unlike generic visualization tools like OpenCV or Matplotlib, YOLO11 heatmaps are optimized for performance and automated processing, supporting features like persistent tracking, decay factor adjustment, and real-time video overlay. For more information on YOLO11's unique features, visit the [Ultralytics YOLO11 Introduction](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8). -### How can I visualize only specific object classes in heatmaps using Ultralytics YOLOv8? +### How can I visualize only specific object classes in heatmaps using Ultralytics YOLO11? You can visualize specific object classes by specifying the desired classes in the `track()` method of the YOLO model. For instance, if you only want to visualize cars and persons (assuming their class indices are 0 and 2), you can set the `classes` parameter accordingly. ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") -heatmap_obj = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, view_img=True, shape="circle", names=model.names) +heatmap = solutions.Heatmap(show=True, model="yolo11n.pt", classes=[0, 2]) -classes_for_heatmap = [0, 2] # Classes to visualize while cap.isOpened(): success, im0 = cap.read() if not success: break - tracks = model.track(im0, persist=True, show=False, classes=classes_for_heatmap) - im0 = heatmap_obj.generate_heatmap(im0, tracks) + im0 = heatmap.generate_heatmap(im0) cv2.imshow("Heatmap", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -391,6 +206,6 @@ cap.release() cv2.destroyAllWindows() ``` -### Why should businesses choose Ultralytics YOLOv8 for heatmap generation in data analysis? +### Why should businesses choose Ultralytics YOLO11 for heatmap generation in data analysis? -Ultralytics YOLOv8 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLOv8's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans). +Ultralytics YOLO11 offers seamless integration of advanced object detection and real-time heatmap generation, making it an ideal choice for businesses looking to visualize data more effectively. The key advantages include intuitive data distribution visualization, efficient pattern detection, and enhanced spatial analysis for better decision-making. Additionally, YOLO11's cutting-edge features such as persistent tracking, customizable colormaps, and support for various export formats make it superior to other tools like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and OpenCV for comprehensive data analysis. Learn more about business applications at [Ultralytics Plans](https://www.ultralytics.com/plans). diff --git a/docs/en/guides/hyperparameter-tuning.md b/docs/en/guides/hyperparameter-tuning.md index d715820f24a..267e7eb5d10 100644 --- a/docs/en/guides/hyperparameter-tuning.md +++ b/docs/en/guides/hyperparameter-tuning.md @@ -10,6 +10,17 @@ keywords: Ultralytics YOLO, hyperparameter tuning, machine learning, model optim Hyperparameter tuning is not just a one-time set-up but an iterative process aimed at optimizing the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) model's performance metrics, such as accuracy, precision, and recall. In the context of Ultralytics YOLO, these hyperparameters could range from learning rate to architectural details, such as the number of layers or types of activation functions used. +

+
+ +
+ Watch: How to Tune Hyperparameters for Better Model Performance ๐Ÿš€ +

+ ### What are Hyperparameters? Hyperparameters are high-level, structural settings for the algorithm. They are set prior to the training phase and remain constant during it. Here are some commonly tuned hyperparameters in Ultralytics YOLO: @@ -23,7 +34,7 @@ Hyperparameters are high-level, structural settings for the algorithm. They are Hyperparameter Tuning Visual

-For a full list of augmentation hyperparameters used in YOLOv8 please refer to the [configurations page](../usage/cfg.md#augmentation-settings). +For a full list of augmentation hyperparameters used in YOLO11 please refer to the [configurations page](../usage/cfg.md#augmentation-settings). ### Genetic Evolution and Mutation @@ -65,9 +76,37 @@ It's crucial to log both the performance metrics and the corresponding hyperpara The process is repeated until either the set number of iterations is reached or the performance metric is satisfactory. -## Usage Example - -Here's how to use the `model.tune()` method to utilize the `Tuner` class for hyperparameter tuning of YOLOv8n on COCO8 for 30 epochs with an AdamW optimizer and skipping plotting, checkpointing and validation other than on final epoch for faster Tuning. +## Default Search Space Description + +The following table lists the default search space parameters for hyperparameter tuning in YOLO11. Each parameter has a specific value range defined by a tuple `(min, max)`. + +| Parameter | Type | Value Range | Description | +| ----------------- | ------- | -------------- | ---------------------------------------------------------------------------------------------------------------- | +| `lr0` | `float` | `(1e-5, 1e-1)` | Initial learning rate at the start of training. Lower values provide more stable training but slower convergence | +| `lrf` | `float` | `(0.01, 1.0)` | Final learning rate factor as a fraction of lr0. Controls how much the learning rate decreases during training | +| `momentum` | `float` | `(0.6, 0.98)` | SGD momentum factor. Higher values help maintain consistent gradient direction and can speed up convergence | +| `weight_decay` | `float` | `(0.0, 0.001)` | L2 regularization factor to prevent overfitting. Larger values enforce stronger regularization | +| `warmup_epochs` | `float` | `(0.0, 5.0)` | Number of epochs for linear learning rate warmup. Helps prevent early training instability | +| `warmup_momentum` | `float` | `(0.0, 0.95)` | Initial momentum during warmup phase. Gradually increases to the final momentum value | +| `box` | `float` | `(0.02, 0.2)` | Bounding box loss weight in the total loss function. Balances box regression vs classification | +| `cls` | `float` | `(0.2, 4.0)` | Classification loss weight in the total loss function. Higher values emphasize correct class prediction | +| `hsv_h` | `float` | `(0.0, 0.1)` | Random hue augmentation range in HSV color space. Helps model generalize across color variations | +| `hsv_s` | `float` | `(0.0, 0.9)` | Random saturation augmentation range in HSV space. Simulates different lighting conditions | +| `hsv_v` | `float` | `(0.0, 0.9)` | Random value (brightness) augmentation range. Helps model handle different exposure levels | +| `degrees` | `float` | `(0.0, 45.0)` | Maximum rotation augmentation in degrees. Helps model become invariant to object orientation | +| `translate` | `float` | `(0.0, 0.9)` | Maximum translation augmentation as fraction of image size. Improves robustness to object position | +| `scale` | `float` | `(0.0, 0.9)` | Random scaling augmentation range. Helps model detect objects at different sizes | +| `shear` | `float` | `(0.0, 10.0)` | Maximum shear augmentation in degrees. Adds perspective-like distortions to training images | +| `perspective` | `float` | `(0.0, 0.001)` | Random perspective augmentation range. Simulates different viewing angles | +| `flipud` | `float` | `(0.0, 1.0)` | Probability of vertical image flip during training. Useful for overhead/aerial imagery | +| `fliplr` | `float` | `(0.0, 1.0)` | Probability of horizontal image flip. Helps model become invariant to object direction | +| `mosaic` | `float` | `(0.0, 1.0)` | Probability of using mosaic augmentation, which combines 4 images. Especially useful for small object detection | +| `mixup` | `float` | `(0.0, 1.0)` | Probability of using mixup augmentation, which blends two images. Can improve model robustness | +| `copy_paste` | `float` | `(0.0, 1.0)` | Probability of using copy-paste augmentation. Helps improve instance segmentation performance | + +## Custom Search Space Example + +Here's how to define a search space and use the `model.tune()` method to utilize the `Tuner` class for hyperparameter tuning of YOLO11n on COCO8 for 30 epochs with an AdamW optimizer and skipping plotting, checkpointing and validation other than on final epoch for faster Tuning. !!! example @@ -77,10 +116,25 @@ Here's how to use the `model.tune()` method to utilize the `Tuner` class for hyp from ultralytics import YOLO # Initialize the YOLO model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") + + # Define search space + search_space = { + "lr0": (1e-5, 1e-1), + "degrees": (0.0, 45.0), + } # Tune hyperparameters on COCO8 for 30 epochs - model.tune(data="coco8.yaml", epochs=30, iterations=300, optimizer="AdamW", plots=False, save=False, val=False) + model.tune( + data="coco8.yaml", + epochs=30, + iterations=300, + optimizer="AdamW", + space=search_space, + plots=False, + save=False, + val=False, + ) ``` ## Results @@ -202,7 +256,7 @@ The hyperparameter tuning process in Ultralytics YOLO is simplified yet powerful 1. [Hyperparameter Optimization in Wikipedia](https://en.wikipedia.org/wiki/Hyperparameter_optimization) 2. [YOLOv5 Hyperparameter Evolution Guide](../yolov5/tutorials/hyperparameter_evolution.md) -3. [Efficient Hyperparameter Tuning with Ray Tune and YOLOv8](../integrations/ray-tune.md) +3. [Efficient Hyperparameter Tuning with Ray Tune and YOLO11](../integrations/ray-tune.md) For deeper insights, you can explore the `Tuner` class source code and accompanying documentation. Should you have any questions, feature requests, or need further assistance, feel free to reach out to us on [GitHub](https://github.com/ultralytics/ultralytics/issues/new/choose) or [Discord](https://discord.com/invite/ultralytics). @@ -220,7 +274,7 @@ To optimize the learning rate for Ultralytics YOLO, start by setting an initial from ultralytics import YOLO # Initialize the YOLO model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Tune hyperparameters on COCO8 for 30 epochs model.tune(data="coco8.yaml", epochs=30, iterations=300, optimizer="AdamW", plots=False, save=False, val=False) @@ -228,9 +282,9 @@ To optimize the learning rate for Ultralytics YOLO, start by setting an initial For more details, check the [Ultralytics YOLO configuration page](../usage/cfg.md#augmentation-settings). -### What are the benefits of using genetic algorithms for hyperparameter tuning in YOLOv8? +### What are the benefits of using genetic algorithms for hyperparameter tuning in YOLO11? -Genetic algorithms in Ultralytics YOLOv8 provide a robust method for exploring the hyperparameter space, leading to highly optimized model performance. Key benefits include: +Genetic algorithms in Ultralytics YOLO11 provide a robust method for exploring the hyperparameter space, leading to highly optimized model performance. Key benefits include: - **Efficient Search**: Genetic algorithms like mutation can quickly explore a large set of hyperparameters. - **Avoiding Local Minima**: By introducing randomness, they help in avoiding local minima, ensuring better global optimization. @@ -240,7 +294,7 @@ To see how genetic algorithms can optimize hyperparameters, check out the [hyper ### How long does the hyperparameter tuning process take for Ultralytics YOLO? -The time required for hyperparameter tuning with Ultralytics YOLO largely depends on several factors such as the size of the dataset, the complexity of the model architecture, the number of iterations, and the computational resources available. For instance, tuning YOLOv8n on a dataset like COCO8 for 30 epochs might take several hours to days, depending on the hardware. +The time required for hyperparameter tuning with Ultralytics YOLO largely depends on several factors such as the size of the dataset, the complexity of the model architecture, the number of iterations, and the computational resources available. For instance, tuning YOLO11n on a dataset like COCO8 for 30 epochs might take several hours to days, depending on the hardware. To effectively manage tuning time, define a clear tuning budget beforehand ([internal section link](#preparing-for-hyperparameter-tuning)). This helps in balancing resource allocation and optimization goals. diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md index 1ad70434abc..f3deebd3e64 100644 --- a/docs/en/guides/index.md +++ b/docs/en/guides/index.md @@ -18,7 +18,7 @@ Whether you're a beginner or an expert in [deep learning](https://www.ultralytic allowfullscreen>
- Watch: Ultralytics YOLOv8 Guides Overview + Watch: Ultralytics YOLO11 Guides Overview

## Guides @@ -27,32 +27,32 @@ Here's a compilation of in-depth guides to help you master different aspects of - [YOLO Common Issues](yolo-common-issues.md) โญ RECOMMENDED: Practical solutions and troubleshooting tips to the most frequently encountered issues when working with Ultralytics YOLO models. - [YOLO Performance Metrics](yolo-performance-metrics.md) โญ ESSENTIAL: Understand the key metrics like mAP, IoU, and [F1 score](https://www.ultralytics.com/glossary/f1-score) used to evaluate the performance of your YOLO models. Includes practical examples and tips on how to improve detection accuracy and speed. +- [YOLO Thread-Safe Inference](yolo-thread-safe-inference.md) ๐Ÿš€ NEW: Guidelines for performing inference with YOLO models in a thread-safe manner. Learn the importance of thread safety and best practices to prevent race conditions and ensure consistent predictions. - [Model Deployment Options](model-deployment-options.md): Overview of YOLO [model deployment](https://www.ultralytics.com/glossary/model-deployment) formats like ONNX, OpenVINO, and TensorRT, with pros and cons for each to inform your deployment strategy. - [K-Fold Cross Validation](kfold-cross-validation.md) ๐Ÿš€ NEW: Learn how to improve model generalization using K-Fold cross-validation technique. - [Hyperparameter Tuning](hyperparameter-tuning.md) ๐Ÿš€ NEW: Discover how to optimize your YOLO models by fine-tuning hyperparameters using the Tuner class and genetic evolution algorithms. -- [SAHI Tiled Inference](sahi-tiled-inference.md) ๐Ÿš€ NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLOv8 for object detection in high-resolution images. +- [SAHI Tiled Inference](sahi-tiled-inference.md) ๐Ÿš€ NEW: Comprehensive guide on leveraging SAHI's sliced inference capabilities with YOLO11 for object detection in high-resolution images. - [AzureML Quickstart](azureml-quickstart.md) ๐Ÿš€ NEW: Get up and running with Ultralytics YOLO models on Microsoft's Azure [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml) platform. Learn how to train, deploy, and scale your object detection projects in the cloud. - [Conda Quickstart](conda-quickstart.md) ๐Ÿš€ NEW: Step-by-step guide to setting up a [Conda](https://anaconda.org/conda-forge/ultralytics) environment for Ultralytics. Learn how to install and start using the Ultralytics package efficiently with Conda. - [Docker Quickstart](docker-quickstart.md) ๐Ÿš€ NEW: Complete guide to setting up and using Ultralytics YOLO models with [Docker](https://hub.docker.com/r/ultralytics/ultralytics). Learn how to install Docker, manage GPU support, and run YOLO models in isolated containers for consistent development and deployment. - [Raspberry Pi](raspberry-pi.md) ๐Ÿš€ NEW: Quickstart tutorial to run YOLO models to the latest Raspberry Pi hardware. - [NVIDIA Jetson](nvidia-jetson.md) ๐Ÿš€ NEW: Quickstart guide for deploying YOLO models on NVIDIA Jetson devices. - [DeepStream on NVIDIA Jetson](deepstream-nvidia-jetson.md) ๐Ÿš€ NEW: Quickstart guide for deploying YOLO models on NVIDIA Jetson devices using DeepStream and TensorRT. -- [Triton Inference Server Integration](triton-inference-server.md) ๐Ÿš€ NEW: Dive into the integration of Ultralytics YOLOv8 with NVIDIA's Triton Inference Server for scalable and efficient deep learning inference deployments. -- [YOLO Thread-Safe Inference](yolo-thread-safe-inference.md) ๐Ÿš€ NEW: Guidelines for performing inference with YOLO models in a thread-safe manner. Learn the importance of thread safety and best practices to prevent race conditions and ensure consistent predictions. +- [Triton Inference Server Integration](triton-inference-server.md) ๐Ÿš€ NEW: Dive into the integration of Ultralytics YOLO11 with NVIDIA's Triton Inference Server for scalable and efficient deep learning inference deployments. - [Isolating Segmentation Objects](isolating-segmentation-objects.md) ๐Ÿš€ NEW: Step-by-step recipe and explanation on how to extract and/or isolate objects from images using Ultralytics Segmentation. - [Edge TPU on Raspberry Pi](coral-edge-tpu-on-raspberry-pi.md): [Google Edge TPU](https://coral.ai/products/accelerator) accelerates YOLO inference on [Raspberry Pi](https://www.raspberrypi.com/). - [View Inference Images in a Terminal](view-results-in-terminal.md): Use VSCode's integrated terminal to view inference results when using Remote Tunnel or SSH sessions. - [OpenVINO Latency vs Throughput Modes](optimizing-openvino-latency-vs-throughput-modes.md) - Learn latency and throughput optimization techniques for peak YOLO inference performance. +- [ROS Quickstart](ros-quickstart.md) ๐Ÿš€ NEW: Learn how to integrate YOLO with the Robot Operating System (ROS) for real-time object detection in robotics applications, including Point Cloud and Depth images. - [Steps of a Computer Vision Project ](steps-of-a-cv-project.md) ๐Ÿš€ NEW: Learn about the key steps involved in a computer vision project, including defining goals, selecting models, preparing data, and evaluating results. - [Defining A Computer Vision Project's Goals](defining-project-goals.md) ๐Ÿš€ NEW: Walk through how to effectively define clear and measurable goals for your computer vision project. Learn the importance of a well-defined problem statement and how it creates a roadmap for your project. - [Data Collection and Annotation](data-collection-and-annotation.md) ๐Ÿš€ NEW: Explore the tools, techniques, and best practices for collecting and annotating data to create high-quality inputs for your computer vision models. -- [Preprocessing Annotated Data](preprocessing_annotated_data.md) ๐Ÿš€ NEW: Learn about preprocessing and augmenting image data in computer vision projects using YOLOv8, including normalization, dataset augmentation, splitting, and exploratory data analysis (EDA). +- [Preprocessing Annotated Data](preprocessing_annotated_data.md) ๐Ÿš€ NEW: Learn about preprocessing and augmenting image data in computer vision projects using YOLO11, including normalization, dataset augmentation, splitting, and exploratory data analysis (EDA). - [Tips for Model Training](model-training-tips.md) ๐Ÿš€ NEW: Explore tips on optimizing [batch sizes](https://www.ultralytics.com/glossary/batch-size), using [mixed precision](https://www.ultralytics.com/glossary/mixed-precision), applying pre-trained weights, and more to make training your computer vision model a breeze. - [Insights on Model Evaluation and Fine-Tuning](model-evaluation-insights.md) ๐Ÿš€ NEW: Gain insights into the strategies and best practices for evaluating and fine-tuning your computer vision models. Learn about the iterative process of refining models to achieve optimal results. - [A Guide on Model Testing](model-testing.md) ๐Ÿš€ NEW: A thorough guide on testing your computer vision models in realistic settings. Learn how to verify accuracy, reliability, and performance in line with project goals. - [Best Practices for Model Deployment](model-deployment-practices.md) ๐Ÿš€ NEW: Walk through tips and best practices for efficiently deploying models in computer vision projects, with a focus on optimization, troubleshooting, and security. - [Maintaining Your Computer Vision Model](model-monitoring-and-maintenance.md) ๐Ÿš€ NEW: Understand the key practices for monitoring, maintaining, and documenting computer vision models to guarantee accuracy, spot anomalies, and mitigate data drift. -- [ROS Quickstart](ros-quickstart.md) ๐Ÿš€ NEW: Learn how to integrate YOLO with the Robot Operating System (ROS) for real-time object detection in robotics applications, including Point Cloud and Depth images. ## Contribute to Our Guides @@ -75,14 +75,14 @@ Training a custom object detection model with Ultralytics YOLO is straightforwar ```python from ultralytics import YOLO - model = YOLO("yolov8s.pt") # Load a pre-trained YOLO model + model = YOLO("yolo11n.pt") # Load a pre-trained YOLO model model.train(data="path/to/dataset.yaml", epochs=50) # Train on custom dataset ``` === "CLI" ```bash - yolo task=detect mode=train model=yolov8s.pt data=path/to/dataset.yaml epochs=50 + yolo task=detect mode=train model=yolo11n.pt data=path/to/dataset.yaml epochs=50 ``` For detailed dataset formatting and additional options, refer to our [Tips for Model Training](model-training-tips.md) guide. diff --git a/docs/en/guides/instance-segmentation-and-tracking.md b/docs/en/guides/instance-segmentation-and-tracking.md index 95e91a8cafc..12cd7477a67 100644 --- a/docs/en/guides/instance-segmentation-and-tracking.md +++ b/docs/en/guides/instance-segmentation-and-tracking.md @@ -1,14 +1,14 @@ --- comments: true -description: Master instance segmentation and tracking with Ultralytics YOLOv8. Learn techniques for precise object identification and tracking. -keywords: instance segmentation, tracking, YOLOv8, Ultralytics, object detection, machine learning, computer vision, python +description: Master instance segmentation and tracking with Ultralytics YOLO11. Learn techniques for precise object identification and tracking. +keywords: instance segmentation, tracking, YOLO11, Ultralytics, object detection, machine learning, computer vision, python --- -# Instance Segmentation and Tracking using Ultralytics YOLOv8 ๐Ÿš€ +# Instance Segmentation and Tracking using Ultralytics YOLO11 ๐Ÿš€ ## What is [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation)? -[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation), it uniquely labels and precisely delineates each object, crucial for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and medical imaging. +[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike [semantic segmentation](https://www.ultralytics.com/glossary/semantic-segmentation), it uniquely labels and precisely delineates each object, crucial for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and medical imaging. There are two types of instance segmentation tracking available in the Ultralytics package: @@ -24,7 +24,7 @@ There are two types of instance segmentation tracking available in the Ultralyti allowfullscreen>
- Watch: Instance Segmentation with Object Tracking using Ultralytics YOLOv8 + Watch: Instance Segmentation with Object Tracking using Ultralytics YOLO11

## Samples @@ -44,7 +44,7 @@ There are two types of instance segmentation tracking available in the Ultralyti from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8n-seg.pt") # segmentation model + model = YOLO("yolo11n-seg.pt") # segmentation model names = model.model.names cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -82,16 +82,12 @@ There are two types of instance segmentation tracking available in the Ultralyti === "Instance Segmentation with Object Tracking" ```python - from collections import defaultdict - import cv2 from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - track_history = defaultdict(lambda: []) - - model = YOLO("yolov8n-seg.pt") # segmentation model + model = YOLO("yolo11n-seg.pt") # segmentation model cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -142,9 +138,9 @@ For any inquiries, feel free to post your questions in the [Ultralytics Issue Se ## FAQ -### How do I perform instance segmentation using Ultralytics YOLOv8? +### How do I perform instance segmentation using Ultralytics YOLO11? -To perform instance segmentation using Ultralytics YOLOv8, initialize the YOLO model with a segmentation version of YOLOv8 and process video frames through it. Here's a simplified code example: +To perform instance segmentation using Ultralytics YOLO11, initialize the YOLO model with a segmentation version of YOLO11 and process video frames through it. Here's a simplified code example: !!! example @@ -156,7 +152,7 @@ To perform instance segmentation using Ultralytics YOLOv8, initialize the YOLO m from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8n-seg.pt") # segmentation model + model = YOLO("yolo11n-seg.pt") # segmentation model cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -186,17 +182,17 @@ To perform instance segmentation using Ultralytics YOLOv8, initialize the YOLO m cv2.destroyAllWindows() ``` -Learn more about instance segmentation in the [Ultralytics YOLOv8 guide](#what-is-instance-segmentation). +Learn more about instance segmentation in the [Ultralytics YOLO11 guide](#what-is-instance-segmentation). -### What is the difference between instance segmentation and object tracking in Ultralytics YOLOv8? +### What is the difference between instance segmentation and object tracking in Ultralytics YOLO11? -Instance segmentation identifies and outlines individual objects within an image, giving each object a unique label and mask. Object tracking extends this by assigning consistent labels to objects across video frames, facilitating continuous tracking of the same objects over time. Learn more about the distinctions in the [Ultralytics YOLOv8 documentation](#samples). +Instance segmentation identifies and outlines individual objects within an image, giving each object a unique label and mask. Object tracking extends this by assigning consistent labels to objects across video frames, facilitating continuous tracking of the same objects over time. Learn more about the distinctions in the [Ultralytics YOLO11 documentation](#samples). -### Why should I use Ultralytics YOLOv8 for instance segmentation and tracking over other models like Mask R-CNN or Faster R-CNN? +### Why should I use Ultralytics YOLO11 for instance segmentation and tracking over other models like Mask R-CNN or Faster R-CNN? -Ultralytics YOLOv8 offers real-time performance, superior [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of use compared to other models like Mask R-CNN or Faster R-CNN. YOLOv8 provides a seamless integration with Ultralytics HUB, allowing users to manage models, datasets, and training pipelines efficiently. Discover more about the benefits of YOLOv8 in the [Ultralytics blog](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8). +Ultralytics YOLO11 offers real-time performance, superior [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of use compared to other models like Mask R-CNN or Faster R-CNN. YOLO11 provides a seamless integration with Ultralytics HUB, allowing users to manage models, datasets, and training pipelines efficiently. Discover more about the benefits of YOLO11 in the [Ultralytics blog](https://www.ultralytics.com/blog/introducing-ultralytics-yolov8). -### How can I implement object tracking using Ultralytics YOLOv8? +### How can I implement object tracking using Ultralytics YOLO11? To implement object tracking, use the `model.track` method and ensure that each object's ID is consistently assigned across frames. Below is a simple example: @@ -205,16 +201,12 @@ To implement object tracking, use the `model.track` method and ensure that each === "Python" ```python - from collections import defaultdict - import cv2 from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - track_history = defaultdict(lambda: []) - - model = YOLO("yolov8n-seg.pt") # segmentation model + model = YOLO("yolo11n-seg.pt") # segmentation model cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -247,6 +239,6 @@ To implement object tracking, use the `model.track` method and ensure that each Find more in the [Instance Segmentation and Tracking section](#samples). -### Are there any datasets provided by Ultralytics suitable for training YOLOv8 models for instance segmentation and tracking? +### Are there any datasets provided by Ultralytics suitable for training YOLO11 models for instance segmentation and tracking? -Yes, Ultralytics offers several datasets suitable for training YOLOv8 models, including segmentation and tracking datasets. Dataset examples, structures, and instructions for use can be found in the [Ultralytics Datasets documentation](https://docs.ultralytics.com/datasets/). +Yes, Ultralytics offers several datasets suitable for training YOLO11 models, including segmentation and tracking datasets. Dataset examples, structures, and instructions for use can be found in the [Ultralytics Datasets documentation](https://docs.ultralytics.com/datasets/). diff --git a/docs/en/guides/isolating-segmentation-objects.md b/docs/en/guides/isolating-segmentation-objects.md index 57b4b7ab8d7..e761bd8265c 100644 --- a/docs/en/guides/isolating-segmentation-objects.md +++ b/docs/en/guides/isolating-segmentation-objects.md @@ -1,7 +1,7 @@ --- comments: true description: Learn to extract isolated objects from inference results using Ultralytics Predict Mode. Step-by-step guide for segmentation object isolation. -keywords: Ultralytics, segmentation, object isolation, Predict Mode, YOLOv8, machine learning, object detection, binary mask, image processing +keywords: Ultralytics, segmentation, object isolation, Predict Mode, YOLO11, machine learning, object detection, binary mask, image processing --- # Isolating Segmentation Objects @@ -24,7 +24,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") + model = YOLO("yolo11n-seg.pt") # Run inference results = model.predict() @@ -135,7 +135,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab === "Black Background Pixels" - ```py + ```python # Create 3-channel mask mask3ch = cv2.cvtColor(b_mask, cv2.COLOR_GRAY2BGR) @@ -187,7 +187,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab === "Transparent Background Pixels" - ```py + ```python # Isolate object with transparent background (when saved as PNG) isolated = np.dstack([img, b_mask]) ``` @@ -244,7 +244,7 @@ After performing the [Segment Task](../tasks/segment.md), it's sometimes desirab ??? example "Example Final Step" - ```py + ```python # Save isolated object to file _ = cv2.imwrite(f"{img_name}_{label}-{ci}.png", iso_crop) ``` @@ -263,7 +263,7 @@ import numpy as np from ultralytics import YOLO -m = YOLO("yolov8n-seg.pt") # (4)! +m = YOLO("yolo11n-seg.pt") # (4)! res = m.predict() # (3)! # Iterate detection results (5) @@ -306,16 +306,16 @@ for r in res: ## FAQ -### How do I isolate objects using Ultralytics YOLOv8 for segmentation tasks? +### How do I isolate objects using Ultralytics YOLO11 for segmentation tasks? -To isolate objects using Ultralytics YOLOv8, follow these steps: +To isolate objects using Ultralytics YOLO11, follow these steps: 1. **Load the model and run inference:** ```python from ultralytics import YOLO - model = YOLO("yolov8n-seg.pt") + model = YOLO("yolo11n-seg.pt") results = model.predict(source="path/to/your/image.jpg") ``` @@ -341,7 +341,7 @@ Refer to the guide on [Predict Mode](../modes/predict.md) and the [Segment Task] ### What options are available for saving the isolated objects after segmentation? -Ultralytics YOLOv8 offers two main options for saving isolated objects: +Ultralytics YOLO11 offers two main options for saving isolated objects: 1. **With a Black Background:** @@ -357,7 +357,7 @@ Ultralytics YOLOv8 offers two main options for saving isolated objects: For further details, visit the [Predict Mode](../modes/predict.md) section. -### How can I crop isolated objects to their bounding boxes using Ultralytics YOLOv8? +### How can I crop isolated objects to their bounding boxes using Ultralytics YOLO11? To crop isolated objects to their bounding boxes: @@ -374,9 +374,9 @@ To crop isolated objects to their bounding boxes: Learn more about bounding box results in the [Predict Mode](../modes/predict.md#boxes) documentation. -### Why should I use Ultralytics YOLOv8 for object isolation in segmentation tasks? +### Why should I use Ultralytics YOLO11 for object isolation in segmentation tasks? -Ultralytics YOLOv8 provides: +Ultralytics YOLO11 provides: - **High-speed** real-time object detection and segmentation. - **Accurate bounding box and mask generation** for precise object isolation. @@ -384,9 +384,9 @@ Ultralytics YOLOv8 provides: Explore the benefits of using YOLO in the [Segment Task documentation](../tasks/segment.md). -### Can I save isolated objects including the background using Ultralytics YOLOv8? +### Can I save isolated objects including the background using Ultralytics YOLO11? -Yes, this is a built-in feature in Ultralytics YOLOv8. Use the `save_crop` argument in the `predict()` method. For example: +Yes, this is a built-in feature in Ultralytics YOLO11. Use the `save_crop` argument in the `predict()` method. For example: ```python results = model.predict(source="path/to/your/image.jpg", save_crop=True) diff --git a/docs/en/guides/kfold-cross-validation.md b/docs/en/guides/kfold-cross-validation.md index 80009e2eb29..369bfca9077 100644 --- a/docs/en/guides/kfold-cross-validation.md +++ b/docs/en/guides/kfold-cross-validation.md @@ -94,8 +94,8 @@ Without further ado, let's dive in! ```python import pandas as pd - indx = [label.stem for label in labels] # uses base filename as ID (no extension) - labels_df = pd.DataFrame([], columns=cls_idx, index=indx) + index = [label.stem for label in labels] # uses base filename as ID (no extension) + labels_df = pd.DataFrame([], columns=cls_idx, index=index) ``` 5. Count the instances of each class-label present in the annotation files. @@ -154,11 +154,11 @@ The rows index the label files, each corresponding to an image in your dataset, ```python folds = [f"split_{n}" for n in range(1, ksplit + 1)] - folds_df = pd.DataFrame(index=indx, columns=folds) + folds_df = pd.DataFrame(index=index, columns=folds) - for idx, (train, val) in enumerate(kfolds, start=1): - folds_df[f"split_{idx}"].loc[labels_df.iloc[train].index] = "train" - folds_df[f"split_{idx}"].loc[labels_df.iloc[val].index] = "val" + for i, (train, val) in enumerate(kfolds, start=1): + folds_df[f"split_{i}"].loc[labels_df.iloc[train].index] = "train" + folds_df[f"split_{i}"].loc[labels_df.iloc[val].index] = "val" ``` 3. Now we will calculate the distribution of class labels for each fold as a ratio of the classes present in `val` to those present in `train`. @@ -265,6 +265,7 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv") for k in range(ksplit): dataset_yaml = ds_yamls[k] + model = YOLO(weights_path, task="detect") model.train(data=dataset_yaml, epochs=epochs, batch=batch, project=project) # include any train arguments results[k] = model.metrics # save output metrics for further analysis ``` diff --git a/docs/en/guides/model-deployment-options.md b/docs/en/guides/model-deployment-options.md index c2ecf8b6495..2e7e98309b6 100644 --- a/docs/en/guides/model-deployment-options.md +++ b/docs/en/guides/model-deployment-options.md @@ -1,26 +1,26 @@ --- comments: true -description: Learn about YOLOv8's diverse deployment options to maximize your model's performance. Explore PyTorch, TensorRT, OpenVINO, TF Lite, and more!. -keywords: YOLOv8, deployment options, export formats, PyTorch, TensorRT, OpenVINO, TF Lite, machine learning, model deployment +description: Learn about YOLO11's diverse deployment options to maximize your model's performance. Explore PyTorch, TensorRT, OpenVINO, TF Lite, and more!. +keywords: YOLO11, deployment options, export formats, PyTorch, TensorRT, OpenVINO, TF Lite, machine learning, model deployment --- -# Understanding YOLOv8's Deployment Options +# Understanding YOLO11's Deployment Options ## Introduction -You've come a long way on your journey with YOLOv8. You've diligently collected data, meticulously annotated it, and put in the hours to train and rigorously evaluate your custom YOLOv8 model. Now, it's time to put your model to work for your specific application, use case, or project. But there's a critical decision that stands before you: how to export and deploy your model effectively. +You've come a long way on your journey with YOLO11. You've diligently collected data, meticulously annotated it, and put in the hours to train and rigorously evaluate your custom YOLO11 model. Now, it's time to put your model to work for your specific application, use case, or project. But there's a critical decision that stands before you: how to export and deploy your model effectively. -This guide walks you through YOLOv8's deployment options and the essential factors to consider to choose the right option for your project. +This guide walks you through YOLO11's deployment options and the essential factors to consider to choose the right option for your project. -## How to Select the Right Deployment Option for Your YOLOv8 Model +## How to Select the Right Deployment Option for Your YOLO11 Model -When it's time to deploy your YOLOv8 model, selecting a suitable export format is very important. As outlined in the [Ultralytics YOLOv8 Modes documentation](../modes/export.md#usage-examples), the model.export() function allows for converting your trained model into a variety of formats tailored to diverse environments and performance requirements. +When it's time to deploy your YOLO11 model, selecting a suitable export format is very important. As outlined in the [Ultralytics YOLO11 Modes documentation](../modes/export.md#usage-examples), the model.export() function allows for converting your trained model into a variety of formats tailored to diverse environments and performance requirements. The ideal format depends on your model's intended operational context, balancing speed, hardware constraints, and ease of integration. In the following section, we'll take a closer look at each export option, understanding when to choose each one. -### YOLOv8's Deployment Options +### YOLO11's Deployment Options -Let's walk through the different YOLOv8 deployment options. For a detailed walkthrough of the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). +Let's walk through the different YOLO11 deployment options. For a detailed walkthrough of the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). #### PyTorch @@ -258,57 +258,62 @@ NCNN is a high-performance neural network inference framework optimized for the - **Hardware Acceleration**: Tailored for ARM CPUs and GPUs, with specific optimizations for these architectures. -## Comparative Analysis of YOLOv8 Deployment Options - -The following table provides a snapshot of the various deployment options available for YOLOv8 models, helping you to assess which may best fit your project needs based on several critical criteria. For an in-depth look at each deployment option's format, please see the [Ultralytics documentation page on export formats](../modes/export.md#export-formats). - -| Deployment Option | Performance Benchmarks | Compatibility and Integration | Community Support and Ecosystem | Case Studies | Maintenance and Updates | Security Considerations | Hardware Acceleration | -| ----------------- | ----------------------------------------------- | ---------------------------------------------- | --------------------------------------------- | ------------------------------------------ | ------------------------------------------- | ------------------------------------------------- | ---------------------------------- | -| PyTorch | Good flexibility; may trade off raw performance | Excellent with Python libraries | Extensive resources and community | Research and prototypes | Regular, active development | Dependent on deployment environment | CUDA support for GPU acceleration | -| TorchScript | Better for production than PyTorch | Smooth transition from PyTorch to C++ | Specialized but narrower than PyTorch | Industry where Python is a bottleneck | Consistent updates with PyTorch | Improved security without full Python | Inherits CUDA support from PyTorch | -| ONNX | Variable depending on runtime | High across different frameworks | Broad ecosystem, supported by many orgs | Flexibility across ML frameworks | Regular updates for new operations | Ensure secure conversion and deployment practices | Various hardware optimizations | -| OpenVINO | Optimized for Intel hardware | Best within Intel ecosystem | Solid in computer vision domain | IoT and edge with Intel hardware | Regular updates for Intel hardware | Robust features for sensitive applications | Tailored for Intel hardware | -| TensorRT | Top-tier on NVIDIA GPUs | Best for NVIDIA hardware | Strong network through NVIDIA | Real-time video and image inference | Frequent updates for new GPUs | Emphasis on security | Designed for NVIDIA GPUs | -| CoreML | Optimized for on-device Apple hardware | Exclusive to Apple ecosystem | Strong Apple and developer support | On-device ML on Apple products | Regular Apple updates | Focus on privacy and security | Apple neural engine and GPU | -| TF SavedModel | Scalable in server environments | Wide compatibility in TensorFlow ecosystem | Large support due to TensorFlow popularity | Serving models at scale | Regular updates by Google and community | Robust features for enterprise | Various hardware accelerations | -| TF GraphDef | Stable for static computation graphs | Integrates well with TensorFlow infrastructure | Resources for optimizing static graphs | Scenarios requiring static graphs | Updates alongside TensorFlow core | Established TensorFlow security practices | TensorFlow acceleration options | -| TF Lite | Speed and efficiency on mobile/embedded | Wide range of device support | Robust community, Google backed | Mobile applications with minimal footprint | Latest features for mobile | Secure environment on end-user devices | GPU and DSP among others | -| TF Edge TPU | Optimized for Google's Edge TPU hardware | Exclusive to Edge TPU devices | Growing with Google and third-party resources | IoT devices requiring real-time processing | Improvements for new Edge TPU hardware | Google's robust IoT security | Custom-designed for Google Coral | -| TF.js | Reasonable in-browser performance | High with web technologies | Web and Node.js developers support | Interactive web applications | TensorFlow team and community contributions | Web platform security model | Enhanced with WebGL and other APIs | -| PaddlePaddle | Competitive, easy to use and scalable | Baidu ecosystem, wide application support | Rapidly growing, especially in China | Chinese market and language processing | Focus on Chinese AI applications | Emphasizes data privacy and security | Including Baidu's Kunlun chips | -| NCNN | Optimized for mobile ARM-based devices | Mobile and embedded ARM systems | Niche but active mobile/embedded ML community | Android and ARM systems efficiency | High performance maintenance on ARM | On-device security advantages | ARM CPUs and GPUs optimizations | +#### MNN + +MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. In addition, MNN is also used on embedded devices, such as IoT. + +## Comparative Analysis of YOLO11 Deployment Options + +The following table provides a snapshot of the various deployment options available for YOLO11 models, helping you to assess which may best fit your project needs based on several critical criteria. For an in-depth look at each deployment option's format, please see the [Ultralytics documentation page on export formats](../modes/export.md#export-formats). + +| Deployment Option | Performance Benchmarks | Compatibility and Integration | Community Support and Ecosystem | Case Studies | Maintenance and Updates | Security Considerations | Hardware Acceleration | +| ----------------- | ----------------------------------------------- | ---------------------------------------------- | --------------------------------------------- | ------------------------------------------ | ---------------------------------------------- | ------------------------------------------------- | ---------------------------------- | +| PyTorch | Good flexibility; may trade off raw performance | Excellent with Python libraries | Extensive resources and community | Research and prototypes | Regular, active development | Dependent on deployment environment | CUDA support for GPU acceleration | +| TorchScript | Better for production than PyTorch | Smooth transition from PyTorch to C++ | Specialized but narrower than PyTorch | Industry where Python is a bottleneck | Consistent updates with PyTorch | Improved security without full Python | Inherits CUDA support from PyTorch | +| ONNX | Variable depending on runtime | High across different frameworks | Broad ecosystem, supported by many orgs | Flexibility across ML frameworks | Regular updates for new operations | Ensure secure conversion and deployment practices | Various hardware optimizations | +| OpenVINO | Optimized for Intel hardware | Best within Intel ecosystem | Solid in computer vision domain | IoT and edge with Intel hardware | Regular updates for Intel hardware | Robust features for sensitive applications | Tailored for Intel hardware | +| TensorRT | Top-tier on NVIDIA GPUs | Best for NVIDIA hardware | Strong network through NVIDIA | Real-time video and image inference | Frequent updates for new GPUs | Emphasis on security | Designed for NVIDIA GPUs | +| CoreML | Optimized for on-device Apple hardware | Exclusive to Apple ecosystem | Strong Apple and developer support | On-device ML on Apple products | Regular Apple updates | Focus on privacy and security | Apple neural engine and GPU | +| TF SavedModel | Scalable in server environments | Wide compatibility in TensorFlow ecosystem | Large support due to TensorFlow popularity | Serving models at scale | Regular updates by Google and community | Robust features for enterprise | Various hardware accelerations | +| TF GraphDef | Stable for static computation graphs | Integrates well with TensorFlow infrastructure | Resources for optimizing static graphs | Scenarios requiring static graphs | Updates alongside TensorFlow core | Established TensorFlow security practices | TensorFlow acceleration options | +| TF Lite | Speed and efficiency on mobile/embedded | Wide range of device support | Robust community, Google backed | Mobile applications with minimal footprint | Latest features for mobile | Secure environment on end-user devices | GPU and DSP among others | +| TF Edge TPU | Optimized for Google's Edge TPU hardware | Exclusive to Edge TPU devices | Growing with Google and third-party resources | IoT devices requiring real-time processing | Improvements for new Edge TPU hardware | Google's robust IoT security | Custom-designed for Google Coral | +| TF.js | Reasonable in-browser performance | High with web technologies | Web and Node.js developers support | Interactive web applications | TensorFlow team and community contributions | Web platform security model | Enhanced with WebGL and other APIs | +| PaddlePaddle | Competitive, easy to use and scalable | Baidu ecosystem, wide application support | Rapidly growing, especially in China | Chinese market and language processing | Focus on Chinese AI applications | Emphasizes data privacy and security | Including Baidu's Kunlun chips | +| MNN | High-performance for mobile devices. | Mobile and embedded ARM systems and X86-64 CPU | Mobile/embedded ML community | Mobile systems efficiency | High performance maintenance on Mobile Devices | On-device security advantages | ARM CPUs and GPUs optimizations | +| NCNN | Optimized for mobile ARM-based devices | Mobile and embedded ARM systems | Niche but active mobile/embedded ML community | Android and ARM systems efficiency | High performance maintenance on ARM | On-device security advantages | ARM CPUs and GPUs optimizations | This comparative analysis gives you a high-level overview. For deployment, it's essential to consider the specific requirements and constraints of your project, and consult the detailed documentation and resources available for each option. ## Community and Support -When you're getting started with YOLOv8, having a helpful community and support can make a significant impact. Here's how to connect with others who share your interests and get the assistance you need. +When you're getting started with YOLO11, having a helpful community and support can make a significant impact. Here's how to connect with others who share your interests and get the assistance you need. ### Engage with the Broader Community -- **GitHub Discussions:** The YOLOv8 repository on GitHub has a "Discussions" section where you can ask questions, report issues, and suggest improvements. +- **GitHub Discussions:** The YOLO11 repository on GitHub has a "Discussions" section where you can ask questions, report issues, and suggest improvements. - **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://discord.com/invite/ultralytics) where you can interact with other users and developers. ### Official Documentation and Resources -- **Ultralytics YOLOv8 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting. +- **Ultralytics YOLO11 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLO11, along with guides on installation, usage, and troubleshooting. -These resources will help you tackle challenges and stay updated on the latest trends and best practices in the YOLOv8 community. +These resources will help you tackle challenges and stay updated on the latest trends and best practices in the YOLO11 community. ## Conclusion -In this guide, we've explored the different deployment options for YOLOv8. We've also discussed the important factors to consider when making your choice. These options allow you to customize your model for various environments and performance requirements, making it suitable for real-world applications. +In this guide, we've explored the different deployment options for YOLO11. We've also discussed the important factors to consider when making your choice. These options allow you to customize your model for various environments and performance requirements, making it suitable for real-world applications. -Don't forget that the YOLOv8 and Ultralytics community is a valuable source of help. Connect with other developers and experts to learn unique tips and solutions you might not find in regular documentation. Keep seeking knowledge, exploring new ideas, and sharing your experiences. +Don't forget that the YOLO11 and Ultralytics community is a valuable source of help. Connect with other developers and experts to learn unique tips and solutions you might not find in regular documentation. Keep seeking knowledge, exploring new ideas, and sharing your experiences. Happy deploying! ## FAQ -### What are the deployment options available for YOLOv8 on different hardware platforms? +### What are the deployment options available for YOLO11 on different hardware platforms? -Ultralytics YOLOv8 supports various deployment formats, each designed for specific environments and hardware platforms. Key formats include: +Ultralytics YOLO11 supports various deployment formats, each designed for specific environments and hardware platforms. Key formats include: - **PyTorch** for research and prototyping, with excellent Python integration. - **TorchScript** for production environments where Python is unavailable. @@ -318,18 +323,18 @@ Ultralytics YOLOv8 supports various deployment formats, each designed for specif Each format has unique advantages. For a detailed walkthrough, see our [export process documentation](../modes/export.md#usage-examples). -### How do I improve the inference speed of my YOLOv8 model on an Intel CPU? +### How do I improve the inference speed of my YOLO11 model on an Intel CPU? -To enhance inference speed on Intel CPUs, you can deploy your YOLOv8 model using Intel's OpenVINO toolkit. OpenVINO offers significant performance boosts by optimizing models to leverage Intel hardware efficiently. +To enhance inference speed on Intel CPUs, you can deploy your YOLO11 model using Intel's OpenVINO toolkit. OpenVINO offers significant performance boosts by optimizing models to leverage Intel hardware efficiently. -1. Convert your YOLOv8 model to the OpenVINO format using the `model.export()` function. +1. Convert your YOLO11 model to the OpenVINO format using the `model.export()` function. 2. Follow the detailed setup guide in the [Intel OpenVINO Export documentation](../integrations/openvino.md). For more insights, check out our [blog post](https://www.ultralytics.com/blog/achieve-faster-inference-speeds-ultralytics-yolov8-openvino). -### Can I deploy YOLOv8 models on mobile devices? +### Can I deploy YOLO11 models on mobile devices? -Yes, YOLOv8 models can be deployed on mobile devices using [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) Lite (TF Lite) for both Android and iOS platforms. TF Lite is designed for mobile and embedded devices, providing efficient on-device inference. +Yes, YOLO11 models can be deployed on mobile devices using [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) Lite (TF Lite) for both Android and iOS platforms. TF Lite is designed for mobile and embedded devices, providing efficient on-device inference. !!! example @@ -349,9 +354,9 @@ Yes, YOLOv8 models can be deployed on mobile devices using [TensorFlow](https:// For more details on deploying models to mobile, refer to our [TF Lite integration guide](../integrations/tflite.md). -### What factors should I consider when choosing a deployment format for my YOLOv8 model? +### What factors should I consider when choosing a deployment format for my YOLO11 model? -When choosing a deployment format for YOLOv8, consider the following factors: +When choosing a deployment format for YOLO11, consider the following factors: - **Performance**: Some formats like TensorRT provide exceptional speeds on NVIDIA GPUs, while OpenVINO is optimized for Intel hardware. - **Compatibility**: ONNX offers broad compatibility across different platforms. @@ -360,11 +365,11 @@ When choosing a deployment format for YOLOv8, consider the following factors: For a comparative analysis, refer to our [export formats documentation](../modes/export.md#export-formats). -### How can I deploy YOLOv8 models in a web application? +### How can I deploy YOLO11 models in a web application? -To deploy YOLOv8 models in a web application, you can use TensorFlow.js (TF.js), which allows for running [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models directly in the browser. This approach eliminates the need for backend infrastructure and provides real-time performance. +To deploy YOLO11 models in a web application, you can use TensorFlow.js (TF.js), which allows for running [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models directly in the browser. This approach eliminates the need for backend infrastructure and provides real-time performance. -1. Export the YOLOv8 model to the TF.js format. +1. Export the YOLO11 model to the TF.js format. 2. Integrate the exported model into your web application. For step-by-step instructions, refer to our guide on [TensorFlow.js integration](../integrations/tfjs.md). diff --git a/docs/en/guides/model-deployment-practices.md b/docs/en/guides/model-deployment-practices.md index f259779ceb3..5f6d2730aac 100644 --- a/docs/en/guides/model-deployment-practices.md +++ b/docs/en/guides/model-deployment-practices.md @@ -27,7 +27,7 @@ It's also important to follow best practices when deploying a model because depl Often times, once a model is [trained](./model-training-tips.md), [evaluated](./model-evaluation-insights.md), and [tested](./model-testing.md), it needs to be converted into specific formats to be deployed effectively in various environments, such as cloud, edge, or local devices. -With respect to YOLOv8, you can [export your model](../modes/export.md) to different formats. For example, when you need to transfer your model between different frameworks, ONNX is an excellent tool and [exporting to YOLOv8 to ONNX](../integrations/onnx.md) is easy. You can check out more options about integrating your model into different environments smoothly and effectively [here](../integrations/index.md). +With respect to YOLO11, you can [export your model](../modes/export.md) to different formats. For example, when you need to transfer your model between different frameworks, ONNX is an excellent tool and [exporting to YOLO11 to ONNX](../integrations/onnx.md) is easy. You can check out more options about integrating your model into different environments smoothly and effectively [here](../integrations/index.md). ### Choosing a Deployment Environment @@ -94,7 +94,7 @@ Experiencing a drop in your model's accuracy after deployment can be frustrating - **Review Model Export and Conversion:** Re-export the model and make sure that the conversion process maintains the integrity of the model weights and architecture. - **Test with a Controlled Dataset:** Deploy the model in a test environment with a dataset you control and compare the results with the training phase. You can identify if the issue is with the deployment environment or the data. -When deploying YOLOv8, several factors can affect model accuracy. Converting models to formats like [TensorRT](../integrations/tensorrt.md) involves optimizations such as weight quantization and layer fusion, which can cause minor precision losses. Using FP16 (half-precision) instead of FP32 (full-precision) can speed up inference but may introduce numerical precision errors. Also, hardware constraints, like those on the [Jetson Nano](./nvidia-jetson.md), with lower CUDA core counts and reduced memory bandwidth, can impact performance. +When deploying YOLO11, several factors can affect model accuracy. Converting models to formats like [TensorRT](../integrations/tensorrt.md) involves optimizations such as weight quantization and layer fusion, which can cause minor precision losses. Using FP16 (half-precision) instead of FP32 (full-precision) can speed up inference but may introduce numerical precision errors. Also, hardware constraints, like those on the [Jetson Nano](./nvidia-jetson.md), with lower CUDA core counts and reduced memory bandwidth, can impact performance. ### Inferences Are Taking Longer Than You Expected @@ -106,7 +106,7 @@ When deploying [machine learning](https://www.ultralytics.com/glossary/machine-l - **Profile the Inference Pipeline:** Identifying bottlenecks in the inference pipeline can help pinpoint the source of delays. Use profiling tools to analyze each step of the inference process, identifying and addressing any stages that cause significant delays, such as inefficient layers or data transfer issues. - **Use Appropriate Precision:** Using higher precision than necessary can slow down inference times. Experiment with using lower precision, such as FP16 (half-precision), instead of FP32 (full-precision). While FP16 can reduce inference time, also keep in mind that it can impact model accuracy. -If you are facing this issue while deploying YOLOv8, consider that YOLOv8 offers [various model sizes](../models/yolov8.md), such as YOLOv8n (nano) for devices with lower memory capacity and YOLOv8x (extra-large) for more powerful GPUs. Choosing the right model variant for your hardware can help balance memory usage and processing time. +If you are facing this issue while deploying YOLO11, consider that YOLO11 offers [various model sizes](../models/yolo11.md), such as YOLO11n (nano) for devices with lower memory capacity and YOLO11x (extra-large) for more powerful GPUs. Choosing the right model variant for your hardware can help balance memory usage and processing time. Also keep in mind that the size of the input images directly impacts memory usage and processing time. Lower resolutions reduce memory usage and speed up inference, while higher resolutions improve accuracy but require more memory and processing power. @@ -132,12 +132,12 @@ Being part of a community of computer vision enthusiasts can help you solve prob ### Community Resources -- **GitHub Issues:** Explore the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help. +- **GitHub Issues:** Explore the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Visit the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. +- **Ultralytics YOLO11 Documentation:** Visit the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. Using these resources will help you solve challenges and stay up-to-date with the latest trends and practices in the computer vision community. @@ -149,22 +149,22 @@ After deploying your model, the next step would be monitoring, maintaining, and ## FAQ -### What are the best practices for deploying a machine learning model using Ultralytics YOLOv8? +### What are the best practices for deploying a machine learning model using Ultralytics YOLO11? -Deploying a machine learning model, particularly with Ultralytics YOLOv8, involves several best practices to ensure efficiency and reliability. First, choose the deployment environment that suits your needsโ€”cloud, edge, or local. Optimize your model through techniques like [pruning, quantization, and knowledge distillation](#model-optimization-techniques) for efficient deployment in resource-constrained environments. Lastly, ensure data consistency and preprocessing steps align with the training phase to maintain performance. You can also refer to [model deployment options](./model-deployment-options.md) for more detailed guidelines. +Deploying a machine learning model, particularly with Ultralytics YOLO11, involves several best practices to ensure efficiency and reliability. First, choose the deployment environment that suits your needsโ€”cloud, edge, or local. Optimize your model through techniques like [pruning, quantization, and knowledge distillation](#model-optimization-techniques) for efficient deployment in resource-constrained environments. Lastly, ensure data consistency and preprocessing steps align with the training phase to maintain performance. You can also refer to [model deployment options](./model-deployment-options.md) for more detailed guidelines. -### How can I troubleshoot common deployment issues with Ultralytics YOLOv8 models? +### How can I troubleshoot common deployment issues with Ultralytics YOLO11 models? Troubleshooting deployment issues can be broken down into a few key steps. If your model's accuracy drops after deployment, check for data consistency, validate preprocessing steps, and ensure the hardware/software environment matches what you used during training. For slow inference times, perform warm-up runs, optimize your inference engine, use asynchronous processing, and profile your inference pipeline. Refer to [troubleshooting deployment issues](#troubleshooting-deployment-issues) for a detailed guide on these best practices. -### How does Ultralytics YOLOv8 optimization enhance model performance on edge devices? +### How does Ultralytics YOLO11 optimization enhance model performance on edge devices? -Optimizing Ultralytics YOLOv8 models for edge devices involves using techniques like pruning to reduce the model size, quantization to convert weights to lower precision, and knowledge distillation to train smaller models that mimic larger ones. These techniques ensure the model runs efficiently on devices with limited computational power. Tools like [TensorFlow Lite](../integrations/tflite.md) and [NVIDIA Jetson](./nvidia-jetson.md) are particularly useful for these optimizations. Learn more about these techniques in our section on [model optimization](#model-optimization-techniques). +Optimizing Ultralytics YOLO11 models for edge devices involves using techniques like pruning to reduce the model size, quantization to convert weights to lower precision, and knowledge distillation to train smaller models that mimic larger ones. These techniques ensure the model runs efficiently on devices with limited computational power. Tools like [TensorFlow Lite](../integrations/tflite.md) and [NVIDIA Jetson](./nvidia-jetson.md) are particularly useful for these optimizations. Learn more about these techniques in our section on [model optimization](#model-optimization-techniques). -### What are the security considerations for deploying machine learning models with Ultralytics YOLOv8? +### What are the security considerations for deploying machine learning models with Ultralytics YOLO11? Security is paramount when deploying machine learning models. Ensure secure data transmission using encryption protocols like TLS. Implement robust access controls, including strong authentication and role-based access control (RBAC). Model obfuscation techniques, such as encrypting model parameters and serving models in a secure environment like a trusted execution environment (TEE), offer additional protection. For detailed practices, refer to [security considerations](#security-considerations-in-model-deployment). -### How do I choose the right deployment environment for my Ultralytics YOLOv8 model? +### How do I choose the right deployment environment for my Ultralytics YOLO11 model? -Selecting the optimal deployment environment for your Ultralytics YOLOv8 model depends on your application's specific needs. Cloud deployment offers scalability and ease of access, making it ideal for applications with high data volumes. Edge deployment is best for low-latency applications requiring real-time responses, using tools like [TensorFlow Lite](../integrations/tflite.md). Local deployment suits scenarios needing stringent data privacy and control. For a comprehensive overview of each environment, check out our section on [choosing a deployment environment](#choosing-a-deployment-environment). +Selecting the optimal deployment environment for your Ultralytics YOLO11 model depends on your application's specific needs. Cloud deployment offers scalability and ease of access, making it ideal for applications with high data volumes. Edge deployment is best for low-latency applications requiring real-time responses, using tools like [TensorFlow Lite](../integrations/tflite.md). Local deployment suits scenarios needing stringent data privacy and control. For a comprehensive overview of each environment, check out our section on [choosing a deployment environment](#choosing-a-deployment-environment). diff --git a/docs/en/guides/model-evaluation-insights.md b/docs/en/guides/model-evaluation-insights.md index ef9389c266b..5b16a99bdb1 100644 --- a/docs/en/guides/model-evaluation-insights.md +++ b/docs/en/guides/model-evaluation-insights.md @@ -1,6 +1,6 @@ --- comments: true -description: Explore the most effective ways to assess and refine YOLOv8 models for better performance. Learn about evaluation metrics, fine-tuning processes, and how to customize your model for specific needs. +description: Explore the most effective ways to assess and refine YOLO11 models for better performance. Learn about evaluation metrics, fine-tuning processes, and how to customize your model for specific needs. keywords: Model Evaluation, Machine Learning Model Evaluation, Fine Tuning Machine Learning, Fine Tune Model, Evaluating Models, Model Fine Tuning, How to Fine Tune a Model --- @@ -10,6 +10,17 @@ keywords: Model Evaluation, Machine Learning Model Evaluation, Fine Tuning Machi Once you've [trained](./model-training-tips.md) your computer vision model, evaluating and refining it to perform optimally is essential. Just training your model isn't enough. You need to make sure that your model is accurate, efficient, and fulfills the [objective](./defining-project-goals.md) of your computer vision project. By evaluating and fine-tuning your model, you can identify weaknesses, improve its accuracy, and boost overall performance. +

+
+ +
+ Watch: Insights into Model Evaluation and Fine-Tuning | Tips for Improving Mean Average Precision +

+ In this guide, we'll share insights on model evaluation and fine-tuning that'll make this [step of a computer vision project](./steps-of-a-cv-project.md) more approachable. We'll discuss how to understand evaluation metrics and implement fine-tuning techniques, giving you the knowledge to elevate your model's capabilities. ## Evaluating Model Performance Using Metrics @@ -20,7 +31,7 @@ Evaluating how well a model performs helps us understand how effectively it work The confidence score represents the model's certainty that a detected object belongs to a particular class. It ranges from 0 to 1, with higher scores indicating greater confidence. The confidence score helps filter predictions; only detections with confidence scores above a specified threshold are considered valid. -_Quick Tip:_ When running inferences, if you aren't seeing any predictions and you've checked everything else, try lowering the confidence score. Sometimes, the threshold is too high, causing the model to ignore valid predictions. Lowering the score allows the model to consider more possibilities. This might not meet your project goals, but it's a good way to see what the model can do and decide how to fine-tune it. +_Quick Tip:_ When running inferences, if you aren't seeing any predictions, and you've checked everything else, try lowering the confidence score. Sometimes, the threshold is too high, causing the model to ignore valid predictions. Lowering the score allows the model to consider more possibilities. This might not meet your project goals, but it's a good way to see what the model can do and decide how to fine-tune it. ### Intersection over Union @@ -45,23 +56,23 @@ Other mAP metrics include mAP@0.75, which uses a stricter IoU threshold of 0.75, Mean Average Precision Overview

-## Evaluating YOLOv8 Model Performance +## Evaluating YOLO11 Model Performance -With respect to YOLOv8, you can use the [validation mode](../modes/val.md) to evaluate the model. Also, be sure to take a look at our guide that goes in-depth into [YOLOv8 performance metrics](./yolo-performance-metrics.md) and how they can be interpreted. +With respect to YOLO11, you can use the [validation mode](../modes/val.md) to evaluate the model. Also, be sure to take a look at our guide that goes in-depth into [YOLO11 performance metrics](./yolo-performance-metrics.md) and how they can be interpreted. ### Common Community Questions -When evaluating your YOLOv8 model, you might run into a few hiccups. Based on common community questions, here are some tips to help you get the most out of your YOLOv8 model: +When evaluating your YOLO11 model, you might run into a few hiccups. Based on common community questions, here are some tips to help you get the most out of your YOLO11 model: #### Handling Variable Image Sizes -Evaluating your YOLOv8 model with images of different sizes can help you understand its performance on diverse datasets. Using the `rect=true` validation parameter, YOLOv8 adjusts the network's stride for each batch based on the image sizes, allowing the model to handle rectangular images without forcing them to a single size. +Evaluating your YOLO11 model with images of different sizes can help you understand its performance on diverse datasets. Using the `rect=true` validation parameter, YOLO11 adjusts the network's stride for each batch based on the image sizes, allowing the model to handle rectangular images without forcing them to a single size. The `imgsz` validation parameter sets the maximum dimension for image resizing, which is 640 by default. You can adjust this based on your dataset's maximum dimensions and the GPU memory available. Even with `imgsz` set, `rect=true` lets the model manage varying image sizes effectively by dynamically adjusting the stride. -#### Accessing YOLOv8 Metrics +#### Accessing YOLO11 Metrics -If you want to get a deeper understanding of your YOLOv8 model's performance, you can easily access specific evaluation metrics with a few lines of Python code. The code snippet below will let you load your model, run an evaluation, and print out various metrics that show how well your model is doing. +If you want to get a deeper understanding of your YOLO11 model's performance, you can easily access specific evaluation metrics with a few lines of Python code. The code snippet below will let you load your model, run an evaluation, and print out various metrics that show how well your model is doing. !!! example "Usage" @@ -71,7 +82,7 @@ If you want to get a deeper understanding of your YOLOv8 model's performance, yo from ultralytics import YOLO # Load the model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Run the evaluation results = model.val(data="coco8.yaml") @@ -101,7 +112,7 @@ If you want to get a deeper understanding of your YOLOv8 model's performance, yo print("Recall curve:", results.box.r_curve) ``` -The results object also includes speed metrics like preprocess time, inference time, loss, and postprocess time. By analyzing these metrics, you can fine-tune and optimize your YOLOv8 model for better performance, making it more effective for your specific use case. +The results object also includes speed metrics like preprocess time, inference time, loss, and postprocess time. By analyzing these metrics, you can fine-tune and optimize your YOLO11 model for better performance, making it more effective for your specific use case. ## How Does Fine-Tuning Work? @@ -115,11 +126,11 @@ Fine-tuning a model means paying close attention to several vital parameters and Usually, during the initial training [epochs](https://www.ultralytics.com/glossary/epoch), the learning rate starts low and gradually increases to stabilize the training process. However, since your model has already learned some features from the previous dataset, starting with a higher learning rate right away can be more beneficial. -When evaluating your YOLOv8 model, you can set the `warmup_epochs` validation parameter to `warmup_epochs=0` to prevent the learning rate from starting too high. By following this process, the training will continue from the provided weights, adjusting to the nuances of your new data. +When evaluating your YOLO11 model, you can set the `warmup_epochs` validation parameter to `warmup_epochs=0` to prevent the learning rate from starting too high. By following this process, the training will continue from the provided weights, adjusting to the nuances of your new data. ### Image Tiling for Small Objects -Image tiling can improve detection accuracy for small objects. By dividing larger images into smaller segments, such as splitting 1280x1280 images into multiple 640x640 segments, you maintain the original resolution, and the model can learn from high-resolution fragments. When using YOLOv8, make sure to adjust your labels for these new segments correctly. +Image tiling can improve detection accuracy for small objects. By dividing larger images into smaller segments, such as splitting 1280x1280 images into multiple 640x640 segments, you maintain the original resolution, and the model can learn from high-resolution fragments. When using YOLO11, make sure to adjust your labels for these new segments correctly. ## Engage with the Community @@ -127,12 +138,12 @@ Sharing your ideas and questions with other [computer vision](https://www.ultral ### Finding Help and Support -- **GitHub Issues:** Explore the YOLOv8 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to ask questions, report bugs, and suggest features. The community and maintainers are available to assist with any issues you encounter. +- **GitHub Issues:** Explore the YOLO11 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to ask questions, report bugs, and suggest features. The community and maintainers are available to assist with any issues you encounter. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to connect with other users and developers, get support, share knowledge, and brainstorm ideas. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Check out the [official YOLOv8 documentation](./index.md) for comprehensive guides and valuable insights on various computer vision tasks and projects. +- **Ultralytics YOLO11 Documentation:** Check out the [official YOLO11 documentation](./index.md) for comprehensive guides and valuable insights on various computer vision tasks and projects. ## Final Thoughts @@ -140,30 +151,30 @@ Evaluating and fine-tuning your computer vision model are important steps for su ## FAQ -### What are the key metrics for evaluating YOLOv8 model performance? +### What are the key metrics for evaluating YOLO11 model performance? -To evaluate YOLOv8 model performance, important metrics include Confidence Score, Intersection over Union (IoU), and Mean Average Precision (mAP). Confidence Score measures the model's certainty for each detected object class. IoU evaluates how well the predicted bounding box overlaps with the ground truth. Mean Average Precision (mAP) aggregates precision scores across classes, with mAP@.5 and mAP@.5:.95 being two common types for varying IoU thresholds. Learn more about these metrics in our [YOLOv8 performance metrics guide](./yolo-performance-metrics.md). +To evaluate YOLO11 model performance, important metrics include Confidence Score, Intersection over Union (IoU), and Mean Average Precision (mAP). Confidence Score measures the model's certainty for each detected object class. IoU evaluates how well the predicted bounding box overlaps with the ground truth. Mean Average Precision (mAP) aggregates precision scores across classes, with mAP@.5 and mAP@.5:.95 being two common types for varying IoU thresholds. Learn more about these metrics in our [YOLO11 performance metrics guide](./yolo-performance-metrics.md). -### How can I fine-tune a pre-trained YOLOv8 model for my specific dataset? +### How can I fine-tune a pre-trained YOLO11 model for my specific dataset? -Fine-tuning a pre-trained YOLOv8 model involves adjusting its parameters to improve performance on a specific task or dataset. Start by evaluating your model using metrics, then set a higher initial learning rate by adjusting the `warmup_epochs` parameter to 0 for immediate stability. Use parameters like `rect=true` for handling varied image sizes effectively. For more detailed guidance, refer to our section on [fine-tuning YOLOv8 models](#how-does-fine-tuning-work). +Fine-tuning a pre-trained YOLO11 model involves adjusting its parameters to improve performance on a specific task or dataset. Start by evaluating your model using metrics, then set a higher initial learning rate by adjusting the `warmup_epochs` parameter to 0 for immediate stability. Use parameters like `rect=true` for handling varied image sizes effectively. For more detailed guidance, refer to our section on [fine-tuning YOLO11 models](#how-does-fine-tuning-work). -### How can I handle variable image sizes when evaluating my YOLOv8 model? +### How can I handle variable image sizes when evaluating my YOLO11 model? -To handle variable image sizes during evaluation, use the `rect=true` parameter in YOLOv8, which adjusts the network's stride for each batch based on image sizes. The `imgsz` parameter sets the maximum dimension for image resizing, defaulting to 640. Adjust `imgsz` to suit your dataset and GPU memory. For more details, visit our [section on handling variable image sizes](#handling-variable-image-sizes). +To handle variable image sizes during evaluation, use the `rect=true` parameter in YOLO11, which adjusts the network's stride for each batch based on image sizes. The `imgsz` parameter sets the maximum dimension for image resizing, defaulting to 640. Adjust `imgsz` to suit your dataset and GPU memory. For more details, visit our [section on handling variable image sizes](#handling-variable-image-sizes). -### What practical steps can I take to improve mean average precision for my YOLOv8 model? +### What practical steps can I take to improve mean average precision for my YOLO11 model? -Improving mean average precision (mAP) for a YOLOv8 model involves several steps: +Improving mean average precision (mAP) for a YOLO11 model involves several steps: 1. **Tuning Hyperparameters**: Experiment with different learning rates, [batch sizes](https://www.ultralytics.com/glossary/batch-size), and image augmentations. 2. **[Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation)**: Use techniques like Mosaic and MixUp to create diverse training samples. 3. **Image Tiling**: Split larger images into smaller tiles to improve detection accuracy for small objects. Refer to our detailed guide on [model fine-tuning](#tips-for-fine-tuning-your-model) for specific strategies. -### How do I access YOLOv8 model evaluation metrics in Python? +### How do I access YOLO11 model evaluation metrics in Python? -You can access YOLOv8 model evaluation metrics using Python with the following steps: +You can access YOLO11 model evaluation metrics using Python with the following steps: !!! example "Usage" @@ -173,7 +184,7 @@ You can access YOLOv8 model evaluation metrics using Python with the following s from ultralytics import YOLO # Load the model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Run the evaluation results = model.val(data="coco8.yaml") @@ -185,4 +196,4 @@ You can access YOLOv8 model evaluation metrics using Python with the following s print("Mean recall:", results.box.mr) ``` -Analyzing these metrics helps fine-tune and optimize your YOLOv8 model. For a deeper dive, check out our guide on [YOLOv8 metrics](../modes/val.md). +Analyzing these metrics helps fine-tune and optimize your YOLO11 model. For a deeper dive, check out our guide on [YOLO11 metrics](../modes/val.md). diff --git a/docs/en/guides/model-monitoring-and-maintenance.md b/docs/en/guides/model-monitoring-and-maintenance.md index 2aedc8e3a35..79fa52ea5e4 100644 --- a/docs/en/guides/model-monitoring-and-maintenance.md +++ b/docs/en/guides/model-monitoring-and-maintenance.md @@ -10,6 +10,17 @@ keywords: Computer Vision Models, AI Model Monitoring, Data Drift Detection, Ano If you are here, we can assume you've completed many [steps in your computer vision project](./steps-of-a-cv-project.md): from [gathering requirements](./defining-project-goals.md), [annotating data](./data-collection-and-annotation.md), and [training the model](./model-training-tips.md) to finally [deploying](./model-deployment-practices.md) it. Your application is now running in production, but your project doesn't end here. The most important part of a computer vision project is making sure your model continues to fulfill your [project's objectives](./defining-project-goals.md) over time, and that's where monitoring, maintaining, and documenting your computer vision model enters the picture. +

+
+ +
+ Watch: How to Maintain Computer Vision Models after Deployment | Data Drift Detection +

+ In this guide, we'll take a closer look at how you can maintain your computer vision models after deployment. We'll explore how model monitoring can help you catch problems early on, how to keep your model accurate and up-to-date, and why documentation is important for troubleshooting. ## Model Monitoring is Key @@ -23,7 +34,7 @@ Regular model monitoring helps developers track the [model's performance](./mode Here are some best practices to keep in mind while monitoring your computer vision model in production: - **Track Performance Regularly**: Continuously monitor the model's performance to detect changes over time. -- **Double Check the Data Quality**: Check for missing values or anomalies in the data. +- **Double-Check the Data Quality**: Check for missing values or anomalies in the data. - **Use Diverse Data Sources**: Monitor data from various sources to get a comprehensive view of the model's performance. - **Combine Monitoring Techniques**: Use a mix of drift detection algorithms and rule-based approaches to identify a wide range of issues. - **Monitor Inputs and Outputs**: Keep an eye on both the data the model processes and the results it produces to make sure everything is functioning correctly. @@ -123,12 +134,12 @@ Joining a community of computer vision enthusiasts can help you solve problems a ### Community Resources -- **GitHub Issues:** Check out the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are highly active and supportive. +- **GitHub Issues:** Check out the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are highly active and supportive. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Visit the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. +- **Ultralytics YOLO11 Documentation:** Visit the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. Using these resources will help you solve challenges and stay up-to-date with the latest trends and practices in the computer vision community. diff --git a/docs/en/guides/model-testing.md b/docs/en/guides/model-testing.md index 8d324679554..f6123a845e1 100644 --- a/docs/en/guides/model-testing.md +++ b/docs/en/guides/model-testing.md @@ -10,6 +10,17 @@ keywords: Overfitting and Underfitting in Machine Learning, Model Testing, Data After [training](./model-training-tips.md) and [evaluating](./model-evaluation-insights.md) your model, it's time to test it. Model testing involves assessing how well it performs in real-world scenarios. Testing considers factors like accuracy, reliability, fairness, and how easy it is to understand the model's decisions. The goal is to make sure the model performs as intended, delivers the expected results, and fits into the [overall objective of your application](./defining-project-goals.md) or project. +

+
+ +
+ Watch: How to Test Machine Learning Models | Avoid Data Leakage in Computer Vision ๐Ÿš€ +

+ Model testing is quite similar to model evaluation, but they are two distinct [steps in a computer vision project](./steps-of-a-cv-project.md). Model evaluation involves metrics and plots to assess the model's accuracy. On the other hand, model testing checks if the model's learned behavior is the same as expectations. In this guide, we'll explore strategies for testing your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models. ## Model Testing Vs. Model Evaluation @@ -44,22 +55,22 @@ Next, the testing results can be analyzed: - **Error Analysis:** Perform a thorough error analysis to understand the types of errors (e.g., false positives vs. false negatives) and their potential causes. - **Bias and Fairness:** Check for any biases in the model's predictions. Ensure that the model performs equally well across different subsets of the data, especially if it includes sensitive attributes like race, gender, or age. -## Testing Your YOLOv8 Model +## Testing Your YOLO11 Model -To test your YOLOv8 model, you can use the validation mode. It's a straightforward way to understand the model's strengths and areas that need improvement. Also, you'll need to format your test dataset correctly for YOLOv8. For more details on how to use the validation mode, check out the [Model Validation](../modes/val.md) docs page. +To test your YOLO11 model, you can use the validation mode. It's a straightforward way to understand the model's strengths and areas that need improvement. Also, you'll need to format your test dataset correctly for YOLO11. For more details on how to use the validation mode, check out the [Model Validation](../modes/val.md) docs page. -## Using YOLOv8 to Predict on Multiple Test Images +## Using YOLO11 to Predict on Multiple Test Images -If you want to test your trained YOLOv8 model on multiple images stored in a folder, you can easily do so in one go. Instead of using the validation mode, which is typically used to evaluate model performance on a validation set and provide detailed metrics, you might just want to see predictions on all images in your test set. For this, you can use the [prediction mode](../modes/predict.md). +If you want to test your trained YOLO11 model on multiple images stored in a folder, you can easily do so in one go. Instead of using the validation mode, which is typically used to evaluate model performance on a validation set and provide detailed metrics, you might just want to see predictions on all images in your test set. For this, you can use the [prediction mode](../modes/predict.md). ### Difference Between Validation and Prediction Modes - **[Validation Mode](../modes/val.md):** Used to evaluate the model's performance by comparing predictions against known labels (ground truth). It provides detailed metrics such as accuracy, precision, recall, and F1 score. - **[Prediction Mode](../modes/predict.md):** Used to run the model on new, unseen data to generate predictions. It does not provide detailed performance metrics but allows you to see how the model performs on real-world images. -## Running YOLOv8 Predictions Without Custom Training +## Running YOLO11 Predictions Without Custom Training -If you are interested in testing the basic YOLOv8 model to understand whether it can be used for your application without custom training, you can use the prediction mode. While the model is pre-trained on datasets like COCO, running predictions on your own dataset can give you a quick sense of how well it might perform in your specific context. +If you are interested in testing the basic YOLO11 model to understand whether it can be used for your application without custom training, you can use the prediction mode. While the model is pre-trained on datasets like COCO, running predictions on your own dataset can give you a quick sense of how well it might perform in your specific context. ## Overfitting and [Underfitting](https://www.ultralytics.com/glossary/underfitting) in [Machine Learning](https://www.ultralytics.com/glossary/machine-learning-ml) @@ -81,7 +92,7 @@ Underfitting occurs when your model can't capture the underlying patterns in the #### Signs of Underfitting - **Low Training Accuracy:** If your model can't achieve high accuracy on the training set, it might be underfitting. -- **Visual Misclassification:** Consistent failure to recognize obvious features or objects suggests underfitting. +- **Visual Mis-classification:** Consistent failure to recognize obvious features or objects suggests underfitting. ### Balancing Overfitting and Underfitting @@ -128,12 +139,12 @@ Becoming part of a community of computer vision enthusiasts can aid in solving p ### Community Resources -- **GitHub Issues:** Explore the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help. +- **GitHub Issues:** Explore the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Check out the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. +- **Ultralytics YOLO11 Documentation:** Check out the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. These resources will help you navigate challenges and remain updated on the latest trends and practices within the computer vision community. @@ -147,9 +158,9 @@ Building trustworthy computer vision models relies on rigorous model testing. By Model evaluation and model testing are distinct steps in a computer vision project. Model evaluation involves using a labeled dataset to compute metrics such as [accuracy](https://www.ultralytics.com/glossary/accuracy), precision, recall, and [F1 score](https://www.ultralytics.com/glossary/f1-score), providing insights into the model's performance with a controlled dataset. Model testing, on the other hand, assesses the model's performance in real-world scenarios by applying it to new, unseen data, ensuring the model's learned behavior aligns with expectations outside the evaluation environment. For a detailed guide, refer to the [steps in a computer vision project](./steps-of-a-cv-project.md). -### How can I test my Ultralytics YOLOv8 model on multiple images? +### How can I test my Ultralytics YOLO11 model on multiple images? -To test your Ultralytics YOLOv8 model on multiple images, you can use the [prediction mode](../modes/predict.md). This mode allows you to run the model on new, unseen data to generate predictions without providing detailed metrics. This is ideal for real-world performance testing on larger image sets stored in a folder. For evaluating performance metrics, use the [validation mode](../modes/val.md) instead. +To test your Ultralytics YOLO11 model on multiple images, you can use the [prediction mode](../modes/predict.md). This mode allows you to run the model on new, unseen data to generate predictions without providing detailed metrics. This is ideal for real-world performance testing on larger image sets stored in a folder. For evaluating performance metrics, use the [validation mode](../modes/val.md) instead. ### What should I do if my computer vision model shows signs of overfitting or underfitting? @@ -195,6 +206,6 @@ Post-testing, if the model performance meets the project goals, proceed with dep Gain insights from the [Model Testing Vs. Model Evaluation](#model-testing-vs-model-evaluation) section to refine and enhance model effectiveness in real-world applications. -### How do I run YOLOv8 predictions without custom training? +### How do I run YOLO11 predictions without custom training? -You can run predictions using the pre-trained YOLOv8 model on your dataset to see if it suits your application needs. Utilize the [prediction mode](../modes/predict.md) to get a quick sense of performance results without diving into custom training. +You can run predictions using the pre-trained YOLO11 model on your dataset to see if it suits your application needs. Utilize the [prediction mode](../modes/predict.md) to get a quick sense of performance results without diving into custom training. diff --git a/docs/en/guides/model-training-tips.md b/docs/en/guides/model-training-tips.md index 725081a244b..6ace0bc50ea 100644 --- a/docs/en/guides/model-training-tips.md +++ b/docs/en/guides/model-training-tips.md @@ -18,7 +18,7 @@ One of the most important steps when working on a [computer vision project](./st allowfullscreen>
- Watch: Model Training Tips | How to Handle Large Datasets | Batch Size, GPU Utilization and [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision) + Watch: Model Training Tips | How to Handle Large Datasets | Batch Size, GPU Utilization and Mixed Precision

So, what is [model training](../modes/train.md)? Model training is the process of teaching your model to recognize visual patterns and make predictions based on your data. It directly impacts the performance and accuracy of your application. In this guide, we'll cover best practices, optimization techniques, and troubleshooting tips to help you train your computer vision models effectively. @@ -46,25 +46,25 @@ There are a few different aspects to think about when you are planning on using When training models on large datasets, efficiently utilizing your GPU is key. Batch size is an important factor. It is the number of data samples that a machine learning model processes in a single training iteration. Using the maximum batch size supported by your GPU, you can fully take advantage of its capabilities and reduce the time model training takes. However, you want to avoid running out of GPU memory. If you encounter memory errors, reduce the batch size incrementally until the model trains smoothly. -With respect to YOLOv8, you can set the `batch_size` parameter in the [training configuration](../modes/train.md) to match your GPU capacity. Also, setting `batch=-1` in your training script will automatically determine the [batch size](https://www.ultralytics.com/glossary/batch-size) that can be efficiently processed based on your device's capabilities. By fine-tuning the batch size, you can make the most of your GPU resources and improve the overall training process. +With respect to YOLO11, you can set the `batch_size` parameter in the [training configuration](../modes/train.md) to match your GPU capacity. Also, setting `batch=-1` in your training script will automatically determine the [batch size](https://www.ultralytics.com/glossary/batch-size) that can be efficiently processed based on your device's capabilities. By fine-tuning the batch size, you can make the most of your GPU resources and improve the overall training process. ### Subset Training Subset training is a smart strategy that involves training your model on a smaller set of data that represents the larger dataset. It can save time and resources, especially during initial model development and testing. If you are running short on time or experimenting with different model configurations, subset training is a good option. -When it comes to YOLOv8, you can easily implement subset training by using the `fraction` parameter. This parameter lets you specify what fraction of your dataset to use for training. For example, setting `fraction=0.1` will train your model on 10% of the data. You can use this technique for quick iterations and tuning your model before committing to training a model using a full dataset. Subset training helps you make rapid progress and identify potential issues early on. +When it comes to YOLO11, you can easily implement subset training by using the `fraction` parameter. This parameter lets you specify what fraction of your dataset to use for training. For example, setting `fraction=0.1` will train your model on 10% of the data. You can use this technique for quick iterations and tuning your model before committing to training a model using a full dataset. Subset training helps you make rapid progress and identify potential issues early on. ### Multi-scale Training Multiscale training is a technique that improves your model's ability to generalize by training it on images of varying sizes. Your model can learn to detect objects at different scales and distances and become more robust. -For example, when you train YOLOv8, you can enable multiscale training by setting the `scale` parameter. This parameter adjusts the size of training images by a specified factor, simulating objects at different distances. For example, setting `scale=0.5` will reduce the image size by half, while `scale=2.0` will double it. Configuring this parameter allows your model to experience a variety of image scales and improve its detection capabilities across different object sizes and scenarios. +For example, when you train YOLO11, you can enable multiscale training by setting the `scale` parameter. This parameter adjusts the size of training images by a specified factor, simulating objects at different distances. For example, setting `scale=0.5` randomly zooms training images by a factor between 0.5 and 1.5 during training. Configuring this parameter allows your model to experience a variety of image scales and improve its detection capabilities across different object sizes and scenarios. ### Caching Caching is an important technique to improve the efficiency of training machine learning models. By storing preprocessed images in memory, caching reduces the time the GPU spends waiting for data to be loaded from the disk. The model can continuously receive data without delays caused by disk I/O operations. -Caching can be controlled when training YOLOv8 using the `cache` parameter: +Caching can be controlled when training YOLO11 using the `cache` parameter: - _`cache=True`_: Stores dataset images in RAM, providing the fastest access speed but at the cost of increased memory usage. - _`cache='disk'`_: Stores the images on disk, slower than RAM but faster than loading fresh data each time. @@ -80,19 +80,19 @@ Mixed precision training uses both 16-bit (FP16) and 32-bit (FP32) floating-poin To implement mixed precision training, you'll need to modify your training scripts and ensure your hardware (like GPUs) supports it. Many modern [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) frameworks, such as [Tensorflow](https://www.ultralytics.com/glossary/tensorflow), offer built-in support for mixed precision. -Mixed precision training is straightforward when working with YOLOv8. You can use the `amp` flag in your training configuration. Setting `amp=True` enables Automatic Mixed Precision (AMP) training. Mixed precision training is a simple yet effective way to optimize your model training process. +Mixed precision training is straightforward when working with YOLO11. You can use the `amp` flag in your training configuration. Setting `amp=True` enables Automatic Mixed Precision (AMP) training. Mixed precision training is a simple yet effective way to optimize your model training process. ### Pre-trained Weights Using pretrained weights is a smart way to speed up your model's training process. Pretrained weights come from models already trained on large datasets, giving your model a head start. [Transfer learning](https://www.ultralytics.com/glossary/transfer-learning) adapts pretrained models to new, related tasks. Fine-tuning a pre-trained model involves starting with these weights and then continuing training on your specific dataset. This method of training results in faster training times and often better performance because the model starts with a solid understanding of basic features. -The `pretrained` parameter makes transfer learning easy with YOLOv8. Setting `pretrained=True` will use default pre-trained weights, or you can specify a path to a custom pre-trained model. Using pre-trained weights and transfer learning effectively boosts your model's capabilities and reduces training costs. +The `pretrained` parameter makes transfer learning easy with YOLO11. Setting `pretrained=True` will use default pre-trained weights, or you can specify a path to a custom pre-trained model. Using pre-trained weights and transfer learning effectively boosts your model's capabilities and reduces training costs. ### Other Techniques to Consider When Handling a Large Dataset There are a couple of other techniques to consider when handling a large dataset: -- **[Learning Rate](https://www.ultralytics.com/glossary/learning-rate) Schedulers**: Implementing learning rate schedulers dynamically adjusts the learning rate during training. A well-tuned learning rate can prevent the model from overshooting minima and improve stability. When training YOLOv8, the `lrf` parameter helps manage learning rate scheduling by setting the final learning rate as a fraction of the initial rate. +- **[Learning Rate](https://www.ultralytics.com/glossary/learning-rate) Schedulers**: Implementing learning rate schedulers dynamically adjusts the learning rate during training. A well-tuned learning rate can prevent the model from overshooting minima and improve stability. When training YOLO11, the `lrf` parameter helps manage learning rate scheduling by setting the final learning rate as a fraction of the initial rate. - **Distributed Training**: For handling large datasets, distributed training can be a game-changer. You can reduce the training time by spreading the training workload across multiple GPUs or machines. ## The Number of Epochs To Train For @@ -101,7 +101,7 @@ When training a model, an epoch refers to one complete pass through the entire t A common question that comes up is how to determine the number of epochs to train the model for. A good starting point is 300 epochs. If the model overfits early, you can reduce the number of epochs. If [overfitting](https://www.ultralytics.com/glossary/overfitting) does not occur after 300 epochs, you can extend the training to 600, 1200, or more epochs. -However, the ideal number of epochs can vary based on your dataset's size and project goals. Larger datasets might require more epochs for the model to learn effectively, while smaller datasets might need fewer epochs to avoid overfitting. With respect to YOLOv8, you can set the `epochs` parameter in your training script. +However, the ideal number of epochs can vary based on your dataset's size and project goals. Larger datasets might require more epochs for the model to learn effectively, while smaller datasets might need fewer epochs to avoid overfitting. With respect to YOLO11, you can set the `epochs` parameter in your training script. ## Early Stopping @@ -113,7 +113,7 @@ The process involves setting a patience parameter that determines how many [epoc Early Stopping Overview

-For YOLOv8, you can enable early stopping by setting the patience parameter in your training configuration. For example, `patience=5` means training will stop if there's no improvement in validation metrics for 5 consecutive epochs. Using this method ensures the training process remains efficient and achieves optimal performance without excessive computation. +For YOLO11, you can enable early stopping by setting the patience parameter in your training configuration. For example, `patience=5` means training will stop if there's no improvement in validation metrics for 5 consecutive epochs. Using this method ensures the training process remains efficient and achieves optimal performance without excessive computation. ## Choosing Between Cloud and Local Training @@ -143,13 +143,13 @@ Different optimizers have various strengths and weaknesses. Let's take a glimpse - Combines the benefits of both SGD with momentum and RMSProp. - Adjusts the learning rate for each parameter based on estimates of the first and second moments of the gradients. - Well-suited for noisy data and sparse gradients. - - Efficient and generally requires less tuning, making it a recommended optimizer for YOLOv8. + - Efficient and generally requires less tuning, making it a recommended optimizer for YOLO11. - **RMSProp (Root Mean Square Propagation)**: - Adjusts the learning rate for each parameter by dividing the gradient by a running average of the magnitudes of recent gradients. - Helps in handling the vanishing gradient problem and is effective for [recurrent neural networks](https://www.ultralytics.com/glossary/recurrent-neural-network-rnn). -For YOLOv8, the `optimizer` parameter lets you choose from various optimizers, including SGD, Adam, AdamW, NAdam, RAdam, and RMSProp, or you can set it to `auto` for automatic selection based on model configuration. +For YOLO11, the `optimizer` parameter lets you choose from various optimizers, including SGD, Adam, AdamW, NAdam, RAdam, and RMSProp, or you can set it to `auto` for automatic selection based on model configuration. ## Connecting with the Community @@ -157,12 +157,12 @@ Being part of a community of computer vision enthusiasts can help you solve prob ### Community Resources -- **GitHub Issues:** Visit the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help. +- **GitHub Issues:** Visit the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The community and maintainers are very active and ready to help. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to chat with other users and developers, get support, and share your experiences. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Check out the [official YOLOv8 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. +- **Ultralytics YOLO11 Documentation:** Check out the [official YOLO11 documentation](./index.md) for detailed guides and helpful tips on various computer vision projects. Using these resources will help you solve challenges and stay up-to-date with the latest trends and practices in the computer vision community. @@ -174,20 +174,20 @@ Training computer vision models involves following good practices, optimizing yo ### How can I improve GPU utilization when training a large dataset with Ultralytics YOLO? -To improve GPU utilization, set the `batch_size` parameter in your training configuration to the maximum size supported by your GPU. This ensures that you make full use of the GPU's capabilities, reducing training time. If you encounter memory errors, incrementally reduce the batch size until training runs smoothly. For YOLOv8, setting `batch=-1` in your training script will automatically determine the optimal batch size for efficient processing. For further information, refer to the [training configuration](../modes/train.md). +To improve GPU utilization, set the `batch_size` parameter in your training configuration to the maximum size supported by your GPU. This ensures that you make full use of the GPU's capabilities, reducing training time. If you encounter memory errors, incrementally reduce the batch size until training runs smoothly. For YOLO11, setting `batch=-1` in your training script will automatically determine the optimal batch size for efficient processing. For further information, refer to the [training configuration](../modes/train.md). -### What is mixed precision training, and how do I enable it in YOLOv8? +### What is mixed precision training, and how do I enable it in YOLO11? -Mixed precision training utilizes both 16-bit (FP16) and 32-bit (FP32) floating-point types to balance computational speed and precision. This approach speeds up training and reduces memory usage without sacrificing model [accuracy](https://www.ultralytics.com/glossary/accuracy). To enable mixed precision training in YOLOv8, set the `amp` parameter to `True` in your training configuration. This activates Automatic Mixed Precision (AMP) training. For more details on this optimization technique, see the [training configuration](../modes/train.md). +Mixed precision training utilizes both 16-bit (FP16) and 32-bit (FP32) floating-point types to balance computational speed and precision. This approach speeds up training and reduces memory usage without sacrificing model [accuracy](https://www.ultralytics.com/glossary/accuracy). To enable mixed precision training in YOLO11, set the `amp` parameter to `True` in your training configuration. This activates Automatic Mixed Precision (AMP) training. For more details on this optimization technique, see the [training configuration](../modes/train.md). -### How does multiscale training enhance YOLOv8 model performance? +### How does multiscale training enhance YOLO11 model performance? -Multiscale training enhances model performance by training on images of varying sizes, allowing the model to better generalize across different scales and distances. In YOLOv8, you can enable multiscale training by setting the `scale` parameter in the training configuration. For example, `scale=0.5` reduces the image size by half, while `scale=2.0` doubles it. This technique simulates objects at different distances, making the model more robust across various scenarios. For settings and more details, check out the [training configuration](../modes/train.md). +Multiscale training enhances model performance by training on images of varying sizes, allowing the model to better generalize across different scales and distances. In YOLO11, you can enable multiscale training by setting the `scale` parameter in the training configuration. For example, `scale=0.5` reduces the image size by half, while `scale=2.0` doubles it. This technique simulates objects at different distances, making the model more robust across various scenarios. For settings and more details, check out the [training configuration](../modes/train.md). -### How can I use pre-trained weights to speed up training in YOLOv8? +### How can I use pre-trained weights to speed up training in YOLO11? -Using pre-trained weights can significantly reduce training times and improve model performance by starting from a model that already understands basic features. In YOLOv8, you can set the `pretrained` parameter to `True` or specify a path to custom pre-trained weights in your training configuration. This approach, known as transfer learning, leverages knowledge from large datasets to adapt to your specific task. Learn more about pre-trained weights and their advantages [here](../modes/train.md). +Using pre-trained weights can significantly reduce training times and improve model performance by starting from a model that already understands basic features. In YOLO11, you can set the `pretrained` parameter to `True` or specify a path to custom pre-trained weights in your training configuration. This approach, known as transfer learning, leverages knowledge from large datasets to adapt to your specific task. Learn more about pre-trained weights and their advantages [here](../modes/train.md). -### What is the recommended number of epochs for training a model, and how do I set this in YOLOv8? +### What is the recommended number of epochs for training a model, and how do I set this in YOLO11? -The number of epochs refers to the complete passes through the training dataset during model training. A typical starting point is 300 epochs. If your model overfits early, you can reduce the number. Alternatively, if overfitting isn't observed, you might extend training to 600, 1200, or more epochs. To set this in YOLOv8, use the `epochs` parameter in your training script. For additional advice on determining the ideal number of epochs, refer to this section on [number of epochs](#the-number-of-epochs-to-train-for). +The number of epochs refers to the complete passes through the training dataset during model training. A typical starting point is 300 epochs. If your model overfits early, you can reduce the number. Alternatively, if overfitting isn't observed, you might extend training to 600, 1200, or more epochs. To set this in YOLO11, use the `epochs` parameter in your training script. For additional advice on determining the ideal number of epochs, refer to this section on [number of epochs](#the-number-of-epochs-to-train-for). diff --git a/docs/en/guides/nvidia-jetson.md b/docs/en/guides/nvidia-jetson.md index f352c76b8cd..38301b3c4d8 100644 --- a/docs/en/guides/nvidia-jetson.md +++ b/docs/en/guides/nvidia-jetson.md @@ -1,12 +1,17 @@ --- comments: true -description: Learn to deploy Ultralytics YOLOv8 on NVIDIA Jetson devices with our detailed guide. Explore performance benchmarks and maximize AI capabilities. -keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, performance benchmarks, embedded systems, deep learning, TensorRT, computer vision +description: Learn to deploy Ultralytics YOLO11 on NVIDIA Jetson devices with our detailed guide. Explore performance benchmarks and maximize AI capabilities. +keywords: Ultralytics, YOLO11, NVIDIA Jetson, JetPack, AI deployment, performance benchmarks, embedded systems, deep learning, TensorRT, computer vision +benchmark_version: 8.3.51 --- -# Quick Start Guide: NVIDIA Jetson with Ultralytics YOLOv8 +# Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO11 -This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLOv8 on these small and powerful devices. +This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLO11 on these small and powerful devices. + +!!! tip "New product support" + + We have updated this guide with the latest [NVIDIA Jetson Orin Nano Super Developer Kit](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit) which delivers up to 67 TOPS of AI performance โ€” a 1.7X improvement over its predecessor โ€” to seamlessly run the most popular AI models.


@@ -16,14 +21,14 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti allowfullscreen>
- Watch: How to Setup NVIDIA Jetson with Ultralytics YOLOv8 + Watch: How to Setup NVIDIA Jetson with Ultralytics YOLO11

NVIDIA Jetson Ecosystem !!! note - This guide has been tested with both [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running the latest stable JetPack release of [JP6.0](https://developer.nvidia.com/embedded/jetpack-sdk-60), JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.1](https://developer.nvidia.com/embedded/jetpack-sdk-461). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy. + This guide has been tested with [NVIDIA Jetson Orin Nano Super Developer Kit](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/nano-super-developer-kit) running the latest stable JetPack release of [JP6.1](https://developer.nvidia.com/embedded/jetpack-sdk-61), [Seeed Studio reComputer J4012](https://www.seeedstudio.com/reComputer-J4012-p-5586.html) which is based on NVIDIA Jetson Orin NX 16GB running JetPack release of [JP6.0](https://developer.nvidia.com/embedded/jetpack-sdk-60)/ JetPack release of [JP5.1.3](https://developer.nvidia.com/embedded/jetpack-sdk-513) and [Seeed Studio reComputer J1020 v2](https://www.seeedstudio.com/reComputer-J1020-v2-p-5498.html) which is based on NVIDIA Jetson Nano 4GB running JetPack release of [JP4.6.1](https://developer.nvidia.com/embedded/jetpack-sdk-461). It is expected to work across all the NVIDIA Jetson hardware lineup including latest and legacy. ## What is NVIDIA Jetson? @@ -33,14 +38,14 @@ NVIDIA Jetson is a series of embedded computing boards designed to bring acceler [Jetson Orin](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/) is the latest iteration of the NVIDIA Jetson family based on NVIDIA Ampere architecture which brings drastically improved AI performance when compared to the previous generations. Below table compared few of the Jetson devices in the ecosystem. -| | Jetson AGX Orin 64GB | Jetson Orin NX 16GB | Jetson Orin Nano 8GB | Jetson AGX Xavier | Jetson Xavier NX | Jetson Nano | +| | Jetson AGX Orin 64GB | Jetson Orin NX 16GB | Jetson Orin Nano Super | Jetson AGX Xavier | Jetson Xavier NX | Jetson Nano | | ----------------- | ----------------------------------------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------- | ----------------------------------------------------------- | ------------------------------------------------------------- | --------------------------------------------- | -| AI Performance | 275 TOPS | 100 TOPS | 40 TOPs | 32 TOPS | 21 TOPS | 472 GFLOPS | +| AI Performance | 275 TOPS | 100 TOPS | 67 TOPs | 32 TOPS | 21 TOPS | 472 GFLOPS | | GPU | 2048-core NVIDIA Ampere architecture GPU with 64 Tensor Cores | 1024-core NVIDIA Ampere architecture GPU with 32 Tensor Cores | 1024-core NVIDIA Ampere architecture GPU with 32 Tensor Cores | 512-core NVIDIA Volta architecture GPU with 64 Tensor Cores | 384-core NVIDIA Voltaโ„ข architecture GPU with 48 Tensor Cores | 128-core NVIDIA Maxwellโ„ข architecture GPU | -| GPU Max Frequency | 1.3 GHz | 918 MHz | 625 MHz | 1377 MHz | 1100 MHz | 921MHz | +| GPU Max Frequency | 1.3 GHz | 918 MHz | 1020 MHz | 1377 MHz | 1100 MHz | 921MHz | | CPU | 12-core NVIDIA Armยฎ Cortex A78AE v8.2 64-bit CPU 3MB L2 + 6MB L3 | 8-core NVIDIA Armยฎ Cortex A78AE v8.2 64-bit CPU 2MB L2 + 4MB L3 | 6-core Armยฎ Cortexยฎ-A78AE v8.2 64-bit CPU 1.5MB L2 + 4MB L3 | 8-core NVIDIA Carmel Armยฎv8.2 64-bit CPU 8MB L2 + 4MB L3 | 6-core NVIDIA Carmel Armยฎv8.2 64-bit CPU 6MB L2 + 4MB L3 | Quad-Core Armยฎ Cortexยฎ-A57 MPCore processor | -| CPU Max Frequency | 2.2 GHz | 2.0 GHz | 1.5 GHz | 2.2 GHz | 1.9 GHz | 1.43GHz | -| Memory | 64GB 256-bit LPDDR5 204.8GB/s | 16GB 128-bit LPDDR5 102.4GB/s | 8GB 128-bit LPDDR5 68 GB/s | 32GB 256-bit LPDDR4x 136.5GB/s | 8GB 128-bit LPDDR4x 59.7GB/s | 4GB 64-bit LPDDR4 25.6GB/s" | +| CPU Max Frequency | 2.2 GHz | 2.0 GHz | 1.7 GHz | 2.2 GHz | 1.9 GHz | 1.43GHz | +| Memory | 64GB 256-bit LPDDR5 204.8GB/s | 16GB 128-bit LPDDR5 102.4GB/s | 8GB 128-bit LPDDR5 102 GB/s | 32GB 256-bit LPDDR4x 136.5GB/s | 8GB 128-bit LPDDR4x 59.7GB/s | 4GB 64-bit LPDDR4 25.6GB/s" | For a more detailed comparison table, please visit the **Technical Specifications** section of [official NVIDIA Jetson page](https://developer.nvidia.com/embedded/jetson-modules). @@ -77,7 +82,7 @@ The below table highlights NVIDIA JetPack versions supported by different NVIDIA ## Quick Start with Docker -The fastest way to get started with Ultralytics YOLOv8 on NVIDIA Jetson is to run with pre-built docker images for Jetson. Refer to the table above and choose the JetPack version according to the Jetson device you own. +The fastest way to get started with Ultralytics YOLO11 on NVIDIA Jetson is to run with pre-built docker images for Jetson. Refer to the table above and choose the JetPack version according to the Jetson device you own. === "JetPack 4" @@ -106,7 +111,7 @@ After this is done, skip to [Use TensorRT on NVIDIA Jetson section](#use-tensorr For a native installation without Docker, please refer to the steps below. -### Run on JetPack 6.x +### Run on JetPack 6.1 #### Install Ultralytics Package @@ -136,25 +141,34 @@ Here we will install Ultralytics package on the Jetson with optional dependencie The above ultralytics installation will install Torch and Torchvision. However, these 2 packages installed via pip are not compatible to run on Jetson platform which is based on ARM64 architecture. Therefore, we need to manually install pre-built PyTorch pip wheel and compile/ install Torchvision from source. -Install `torch 2.3.0` and `torchvision 0.18` according to JP6.0 +Install `torch 2.5.0` and `torchvision 0.20` according to JP6.1 ```bash -sudo apt-get install libopenmpi-dev libopenblas-base libomp-dev -y -pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.3.0-cp310-cp310-linux_aarch64.whl -pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.18.0a0+6043bc2-cp310-cp310-linux_aarch64.whl +pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.5.0a0+872d972e41.nv24.08-cp310-cp310-linux_aarch64.whl +pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl ``` -Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision). +!!! note + + Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision). + +Install [`cuSPARSELt`](https://developer.nvidia.com/cusparselt-downloads?target_os=Linux&target_arch=aarch64-jetson&Compilation=Native&Distribution=Ubuntu&target_version=22.04&target_type=deb_network) to fix a dependency issue with `torch 2.5.0` + +```bash +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +sudo apt-get update +sudo apt-get -y install libcusparselt0 libcusparselt-dev +``` #### Install `onnxruntime-gpu` The [onnxruntime-gpu](https://pypi.org/project/onnxruntime-gpu/) package hosted in PyPI does not have `aarch64` binaries for the Jetson. So we need to manually install this package. This package is needed for some of the exports. -All different `onnxruntime-gpu` packages corresponding to different JetPack and Python versions are listed [here](https://elinux.org/Jetson_Zoo#ONNX_Runtime). However, here we will download and install `onnxruntime-gpu 1.18.0` with `Python3.10` support. +All different `onnxruntime-gpu` packages corresponding to different JetPack and Python versions are listed [here](https://elinux.org/Jetson_Zoo#ONNX_Runtime). However, here we will download and install `onnxruntime-gpu 1.20.0` with `Python3.10` support. ```bash -wget https://nvidia.box.com/shared/static/48dtuob7meiw6ebgfsfqakc9vse62sg4.whl -O onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl -pip install onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl +pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/onnxruntime_gpu-1.20.0-cp310-cp310-linux_aarch64.whl ``` !!! note @@ -163,7 +177,7 @@ pip install onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl `pip install numpy==1.23.5` -### Run on JetPack 5.x +### Run on JetPack 5.1.2 #### Install Ultralytics Package @@ -199,25 +213,16 @@ The above ultralytics installation will install Torch and Torchvision. However, pip uninstall torch torchvision ``` -2. Install PyTorch 2.1.0 according to JP5.1.3 +2. Install `torch 2.1.0` and `torchvision 0.16.2` according to JP5.1.2 ```bash - sudo apt-get install -y libopenblas-base libopenmpi-dev - wget https://developer.download.nvidia.com/compute/redist/jp/v512/pytorch/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl -O torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl - pip install torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl + pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torch-2.1.0a0+41361538.nv23.06-cp38-cp38-linux_aarch64.whl + pip install https://github.com/ultralytics/assets/releases/download/v0.0.0/torchvision-0.16.2+c6f3977-cp38-cp38-linux_aarch64.whl ``` -3. Install Torchvision v0.16.2 according to PyTorch v2.1.0 - - ```bash - sudo apt install -y libjpeg-dev zlib1g-dev - git clone https://github.com/pytorch/vision torchvision - cd torchvision - git checkout v0.16.2 - python3 setup.py install --user - ``` +!!! note -Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision). + Visit the [PyTorch for Jetson page](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) to access all different versions of PyTorch for different JetPack versions. For a more detailed list on the PyTorch, Torchvision compatibility, visit the [PyTorch and Torchvision compatibility page](https://github.com/pytorch/vision). #### Install `onnxruntime-gpu` @@ -240,9 +245,9 @@ pip install onnxruntime_gpu-1.17.0-cp38-cp38-linux_aarch64.whl Out of all the model export formats supported by Ultralytics, TensorRT delivers the best inference performance when working with NVIDIA Jetson devices and our recommendation is to use TensorRT with Jetson. We also have a detailed document on TensorRT [here](../integrations/tensorrt.md). -## Convert Model to TensorRT and Run Inference +### Convert Model to TensorRT and Run Inference -The YOLOv8n model in PyTorch format is converted to TensorRT to run inference with the exported model. +The YOLO11n model in PyTorch format is converted to TensorRT to run inference with the exported model. !!! example @@ -251,14 +256,14 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi ```python from ultralytics import YOLO - # Load a YOLOv8n PyTorch model - model = YOLO("yolov8n.pt") + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") - # Export the model - model.export(format="engine") # creates 'yolov8n.engine' + # Export the model to TensorRT + model.export(format="engine") # creates 'yolo11n.engine' # Load the exported TensorRT model - trt_model = YOLO("yolov8n.engine") + trt_model = YOLO("yolo11n.engine") # Run inference results = trt_model("https://ultralytics.com/images/bus.jpg") @@ -267,119 +272,279 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TensorRT format - yolo export model=yolov8n.pt format=engine # creates 'yolov8n.engine' + # Export a YOLO11n PyTorch model to TensorRT format + yolo export model=yolo11n.pt format=engine # creates 'yolo11n.engine' # Run inference with the exported model - yolo predict model=yolov8n.engine source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg' ``` !!! note Visit the [Export page](../modes/export.md#arguments) to access additional arguments when exporting models to different model formats -## NVIDIA Jetson Orin YOLOv8 Benchmarks +### Use NVIDIA Deep Learning Accelerator (DLA) + +[NVIDIA Deep Learning Accelerator (DLA)](https://developer.nvidia.com/deep-learning-accelerator) is a specialized hardware component built into NVIDIA Jetson devices that optimizes deep learning inference for energy efficiency and performance. By offloading tasks from the GPU (freeing it up for more intensive processes), DLA enables models to run with lower power consumption while maintaining high throughput, ideal for embedded systems and real-time AI applications. + +The following Jetson devices are equipped with DLA hardware: + +- Jetson Orin NX 16GB +- Jetson AGX Orin Series +- Jetson AGX Xavier Series +- Jetson Xavier NX Series + +!!! example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") + + # Export the model to TensorRT with DLA enabled (only works with FP16 or INT8) + model.export(format="engine", device="dla:0", half=True) # dla:0 or dla:1 corresponds to the DLA cores + + # Load the exported TensorRT model + trt_model = YOLO("yolo11n.engine") + + # Run inference + results = trt_model("https://ultralytics.com/images/bus.jpg") + ``` + + === "CLI" + + ```bash + # Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8) + yolo export model=yolo11n.pt format=engine device="dla:0" half=True # dla:0 or dla:1 corresponds to the DLA cores + + # Run inference with the exported model on the DLA + yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg' + ``` + +!!! note + + When using DLA exports, some layers may not be supported to run on DLA and will fall back to the GPU for execution. This fallback can introduce additional latency and impact the overall inference performance. Therefore, DLA is not primarily designed to reduce inference latency compared to TensorRT running entirely on the GPU. Instead, its primary purpose is to increase throughput and improve energy efficiency. -YOLOv8 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640. +## NVIDIA Jetson Orin YOLO11 Benchmarks -### Comparison Chart +YOLO11 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on both NVIDIA Jetson Orin Nano Super Developer Kit and Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640. + +### Comparison Charts Even though all model exports are working with NVIDIA Jetson, we have only included **PyTorch, TorchScript, TensorRT** for the comparison chart below because, they make use of the GPU on the Jetson and are guaranteed to produce the best results. All the other exports only utilize the CPU and the performance is not as good as the above three. You can find benchmarks for all exports in the section after this chart. -
- NVIDIA Jetson Ecosystem -
+#### NVIDIA Jetson Orin Nano Super Developer Kit + +
+ Jetson Orin Nano Super Benchmarks +
Benchmarked with Ultralytics {{ benchmark_version }}
+
+ +#### NVIDIA Jetson Orin NX 16GB + +
+ Jetson Orin NX 16GB Benchmarks +
Benchmarked with Ultralytics {{ benchmark_version }}
+
-### Detailed Comparison Table +### Detailed Comparison Tables + +The below table represents the benchmark results for five different models (YOLO11n, YOLO11s, YOLO11m, YOLO11l, YOLO11x) across ten different formats (PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), giving us the status, size, mAP50-95(B) metric, and inference time for each combination. + +#### NVIDIA Jetson Orin Nano Super Developer Kit + +!!! performance -The below table represents the benchmark results for five different models (YOLOv8n, YOLOv8s, YOLOv8m, YOLOv8l, YOLOv8x) across ten different formats (PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), giving us the status, size, mAP50-95(B) metric, and inference time for each combination. + === "YOLO11n" + + | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | + |-----------------|--------|-------------------|-------------|------------------------| + | PyTorch | โœ… | 5.4 | 0.6176 | 21.3 | + | TorchScript | โœ… | 10.5 | 0.6100 | 13.40 | + | ONNX | โœ… | 10.2 | 0.6100 | 7.94 | + | OpenVINO | โœ… | 10.4 | 0.6091 | 57.36 | + | TensorRT (FP32) | โœ… | 11.9 | 0.6082 | 7.60 | + | TensorRT (FP16) | โœ… | 8.3 | 0.6096 | 4.91 | + | TensorRT (INT8) | โœ… | 5.6 | 0.3180 | 3.91 | + | TF SavedModel | โœ… | 25.8 | 0.6082 | 223.98 | + | TF GraphDef | โœ… | 10.3 | 0.6082 | 289.95 | + | TF Lite | โœ… | 10.3 | 0.6082 | 328.29 | + | PaddlePaddle | โœ… | 20.4 | 0.6082 | 530.46 | + | MNN | โœ… | 10.1 | 0.6120 | 74.75 | + | NCNN | โœ… | 10.2 | 0.6106 | 46.12 | + + === "YOLO11s" + + | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | + |-----------------|--------|-------------------|-------------|------------------------| + | PyTorch | โœ… | 18.4 | 0.7526 | 22.00 | + | TorchScript | โœ… | 36.5 | 0.7400 | 21.35 | + | ONNX | โœ… | 36.3 | 0.7400 | 13.91 | + | OpenVINO | โœ… | 36.4 | 0.7391 | 126.95 | + | TensorRT (FP32) | โœ… | 38.0 | 0.7400 | 13.29 | + | TensorRT (FP16) | โœ… | 21.3 | 0.7431 | 7.30 | + | TensorRT (INT8) | โœ… | 12.2 | 0.3243 | 5.25 | + | TF SavedModel | โœ… | 91.1 | 0.7400 | 406.73 | + | TF GraphDef | โœ… | 36.4 | 0.7400 | 629.80 | + | TF Lite | โœ… | 36.4 | 0.7400 | 953.98 | + | PaddlePaddle | โœ… | 72.5 | 0.7400 | 1311.67 | + | MNN | โœ… | 36.2 | 0.7392 | 187.66 | + | NCNN | โœ… | 36.2 | 0.7403 | 122.02 | + + === "YOLO11m" + + | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | + |-----------------|--------|-------------------|-------------|------------------------| + | PyTorch | โœ… | 38.8 | 0.7598 | 33.00 | + | TorchScript | โœ… | 77.3 | 0.7643 | 48.17 | + | ONNX | โœ… | 76.9 | 0.7641 | 29.31 | + | OpenVINO | โœ… | 77.1 | 0.7642 | 313.49 | + | TensorRT (FP32) | โœ… | 78.7 | 0.7641 | 28.21 | + | TensorRT (FP16) | โœ… | 41.8 | 0.7653 | 13.99 | + | TensorRT (INT8) | โœ… | 23.2 | 0.4194 | 9.58 | + | TF SavedModel | โœ… | 192.7 | 0.7643 | 802.30 | + | TF GraphDef | โœ… | 77.0 | 0.7643 | 1335.42 | + | TF Lite | โœ… | 77.0 | 0.7643 | 2842.42 | + | PaddlePaddle | โœ… | 153.8 | 0.7643 | 3644.29 | + | MNN | โœ… | 76.8 | 0.7648 | 503.90 | + | NCNN | โœ… | 76.8 | 0.7674 | 298.78 | + + === "YOLO11l" + + | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | + |-----------------|--------|-------------------|-------------|------------------------| + | PyTorch | โœ… | 49.0 | 0.7475 | 43.00 | + | TorchScript | โœ… | 97.6 | 0.7250 | 62.94 | + | ONNX | โœ… | 97.0 | 0.7250 | 36.33 | + | OpenVINO | โœ… | 97.3 | 0.7226 | 387.72 | + | TensorRT (FP32) | โœ… | 99.1 | 0.7250 | 35.59 | + | TensorRT (FP16) | โœ… | 52.0 | 0.7265 | 17.57 | + | TensorRT (INT8) | โœ… | 31.0 | 0.4033 | 12.37 | + | TF SavedModel | โœ… | 243.3 | 0.7250 | 1116.20 | + | TF GraphDef | โœ… | 97.2 | 0.7250 | 1603.32 | + | TF Lite | โœ… | 97.2 | 0.7250 | 3607.51 | + | PaddlePaddle | โœ… | 193.9 | 0.7250 | 4890.90 | + | MNN | โœ… | 96.9 | 0.7222 | 619.04 | + | NCNN | โœ… | 96.9 | 0.7252 | 352.85 | + + === "YOLO11x" + + | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | + |-----------------|--------|-------------------|-------------|------------------------| + | PyTorch | โœ… | 109.3 | 0.8288 | 81.00 | + | TorchScript | โœ… | 218.1 | 0.8308 | 113.49 | + | ONNX | โœ… | 217.5 | 0.8308 | 75.20 | + | OpenVINO | โœ… | 217.8 | 0.8285 | 508.12 | + | TensorRT (FP32) | โœ… | 219.5 | 0.8307 | 67.32 | + | TensorRT (FP16) | โœ… | 112.2 | 0.8248 | 32.94 | + | TensorRT (INT8) | โœ… | 61.7 | 0.4854 | 20.72 | + | TF SavedModel | โœ… | 545.0 | 0.8308 | 1048.8 | + | TF GraphDef | โœ… | 217.8 | 0.8308 | 2961.8 | + | TF Lite | โœ… | 217.8 | 0.8308 | 7898.8 | + | PaddlePaddle | โœ… | 434.8 | 0.8308 | 9903.68 | + | MNN | โœ… | 217.3 | 0.8308 | 1242.97 | + | NCNN | โœ… | 217.3 | 0.8304 | 850.05 | + + Benchmarked with Ultralytics {{ benchmark_version }} + +#### NVIDIA Jetson Orin NX 16GB !!! performance - === "YOLOv8n" + === "YOLO11n" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 6.2 | 0.6381 | 14.3 | - | TorchScript | โœ… | 12.4 | 0.6117 | 13.3 | - | ONNX | โœ… | 12.2 | 0.6092 | 70.6 | - | OpenVINO | โœ… | 12.3 | 0.6092 | 104.2 | - | TensorRT (FP32) | โœ… | 16.1 | 0.6091 | 8.01 | - | TensorRT (FP16) | โœ… | 9.2 | 0.6093 | 4.55 | - | TensorRT (INT8) | โœ… | 5.9 | 0.2759 | 4.09 | - | TF SavedModel | โœ… | 30.6 | 0.6092 | 141.74 | - | TF GraphDef | โœ… | 12.3 | 0.6092 | 199.93 | - | TF Lite | โœ… | 12.3 | 0.6092 | 349.18 | - | PaddlePaddle | โœ… | 24.4 | 0.6030 | 555 | - | NCNN | โœ… | 12.2 | 0.6092 | 32 | - - === "YOLOv8s" + | PyTorch | โœ… | 5.4 | 0.6176 | 19.50 | + | TorchScript | โœ… | 10.5 | 0.6100 | 13.03 | + | ONNX | โœ… | 10.2 | 0.6100 | 8.44 | + | OpenVINO | โœ… | 10.4 | 0.6091 | 40.83 | + | TensorRT (FP32) | โœ… | 11.9 | 0.6100 | 8.05 | + | TensorRT (FP16) | โœ… | 8.2 | 0.6096 | 4.85 | + | TensorRT (INT8) | โœ… | 5.5 | 0.3180 | 4.37 | + | TF SavedModel | โœ… | 25.8 | 0.6082 | 185.39 | + | TF GraphDef | โœ… | 10.3 | 0.6082 | 244.85 | + | TF Lite | โœ… | 10.3 | 0.6082 | 289.77 | + | PaddlePaddle | โœ… | 20.4 | 0.6082 | 476.52 | + | MNN | โœ… | 10.1 | 0.6120 | 53.37 | + | NCNN | โœ… | 10.2 | 0.6106 | 33.55 | + + === "YOLO11s" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 21.5 | 0.6967 | 18 | - | TorchScript | โœ… | 43.0 | 0.7136 | 23.81 | - | ONNX | โœ… | 42.8 | 0.7136 | 185.55 | - | OpenVINO | โœ… | 42.9 | 0.7136 | 243.97 | - | TensorRT (FP32) | โœ… | 46.4 | 0.7136 | 14.01 | - | TensorRT (FP16) | โœ… | 24.2 | 0.722 | 7.16 | - | TensorRT (INT8) | โœ… | 13.7 | 0.4233 | 5.49 | - | TF SavedModel | โœ… | 107 | 0.7136 | 260.03 | - | TF GraphDef | โœ… | 42.8 | 0.7136 | 423.4 | - | TF Lite | โœ… | 42.8 | 0.7136 | 1046.64 | - | PaddlePaddle | โœ… | 85.5 | 0.7140 | 1464 | - | NCNN | โœ… | 42.7 | 0.7200 | 63 | - - === "YOLOv8m" + | PyTorch | โœ… | 18.4 | 0.7526 | 19.00 | + | TorchScript | โœ… | 36.5 | 0.7400 | 22.90 | + | ONNX | โœ… | 36.3 | 0.7400 | 14.44 | + | OpenVINO | โœ… | 36.4 | 0.7391 | 88.70 | + | TensorRT (FP32) | โœ… | 37.9 | 0.7400 | 14.13 | + | TensorRT (FP16) | โœ… | 21.6 | 0.7406 | 7.55 | + | TensorRT (INT8) | โœ… | 12.2 | 0.3243 | 5.63 | + | TF SavedModel | โœ… | 91.1 | 0.7400 | 317.61 | + | TF GraphDef | โœ… | 36.4 | 0.7400 | 515.99 | + | TF Lite | โœ… | 36.4 | 0.7400 | 838.85 | + | PaddlePaddle | โœ… | 72.5 | 0.7400 | 1170.07 | + | MNN | โœ… | 36.2 | 0.7413 | 125.23 | + | NCNN | โœ… | 36.2 | 0.7403 | 68.13 | + + === "YOLO11m" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 49.7 | 0.7370 | 36.4 | - | TorchScript | โœ… | 99.2 | 0.7285 | 53.58 | - | ONNX | โœ… | 99 | 0.7280 | 452.09 | - | OpenVINO | โœ… | 99.1 | 0.7280 | 544.36 | - | TensorRT (FP32) | โœ… | 102.4 | 0.7285 | 31.51 | - | TensorRT (FP16) | โœ… | 52.6 | 0.7324 | 14.88 | - | TensorRT (INT8) | โœ… | 28.6 | 0.3283 | 10.89 | - | TF SavedModel | โœ… | 247.5 | 0.7280 | 543.65 | - | TF GraphDef | โœ… | 99 | 0.7280 | 906.63 | - | TF Lite | โœ… | 99 | 0.7280 | 2758.08 | - | PaddlePaddle | โœ… | 197.9 | 0.7280 | 3678 | - | NCNN | โœ… | 98.9 | 0.7260 | 135 | - - === "YOLOv8l" + | PyTorch | โœ… | 38.8 | 0.7598 | 36.50 | + | TorchScript | โœ… | 77.3 | 0.7643 | 52.55 | + | ONNX | โœ… | 76.9 | 0.7640 | 31.16 | + | OpenVINO | โœ… | 77.1 | 0.7642 | 208.57 | + | TensorRT (FP32) | โœ… | 78.7 | 0.7640 | 30.72 | + | TensorRT (FP16) | โœ… | 41.5 | 0.7651 | 14.45 | + | TensorRT (INT8) | โœ… | 23.3 | 0.4194 | 10.19 | + | TF SavedModel | โœ… | 192.7 | 0.7643 | 590.11 | + | TF GraphDef | โœ… | 77.0 | 0.7643 | 998.57 | + | TF Lite | โœ… | 77.0 | 0.7643 | 2486.11 | + | PaddlePaddle | โœ… | 153.8 | 0.7643 | 3236.09 | + | MNN | โœ… | 76.8 | 0.7661 | 335.78 | + | NCNN | โœ… | 76.8 | 0.7674 | 188.43 | + + === "YOLO11l" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 83.7 | 0.7768 | 61.3 | - | TorchScript | โœ… | 167.2 | 0.7554 | 87.9 | - | ONNX | โœ… | 166.8 | 0.7551 | 852.29 | - | OpenVINO | โœ… | 167 | 0.7551 | 1012.6 | - | TensorRT (FP32) | โœ… | 170.5 | 0.7554 | 49.79 | - | TensorRT (FP16) | โœ… | 86.1 | 0.7535 | 22.89 | - | TensorRT (INT8) | โœ… | 46.4 | 0.4048 | 14.61 | - | TF SavedModel | โœ… | 417.2 | 0.7551 | 990.45 | - | TF GraphDef | โœ… | 166.9 | 0.7551 | 1649.86 | - | TF Lite | โœ… | 166.9 | 0.7551 | 5652.37 | - | PaddlePaddle | โœ… | 333.6 | 0.7551 | 7114.67 | - | NCNN | โœ… | 166.8 | 0.7685 | 231.9 | - - === "YOLOv8x" + | PyTorch | โœ… | 49.0 | 0.7475 | 46.6 | + | TorchScript | โœ… | 97.6 | 0.7250 | 66.54 | + | ONNX | โœ… | 97.0 | 0.7250 | 39.55 | + | OpenVINO | โœ… | 97.3 | 0.7226 | 262.44 | + | TensorRT (FP32) | โœ… | 99.2 | 0.7250 | 38.68 | + | TensorRT (FP16) | โœ… | 51.9 | 0.7265 | 18.53 | + | TensorRT (INT8) | โœ… | 30.9 | 0.4033 | 13.36 | + | TF SavedModel | โœ… | 243.3 | 0.7250 | 850.25 | + | TF GraphDef | โœ… | 97.2 | 0.7250 | 1324.60 | + | TF Lite | โœ… | 97.2 | 0.7250 | 3191.24 | + | PaddlePaddle | โœ… | 193.9 | 0.7250 | 4204.97 | + | MNN | โœ… | 96.9 | 0.7225 | 414.41 | + | NCNN | โœ… | 96.9 | 0.7252 | 237.74 | + + === "YOLO11x" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |-----------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 130.5 | 0.7759 | 93 | - | TorchScript | โœ… | 260.7 | 0.7472 | 135.1 | - | ONNX | โœ… | 260.4 | 0.7479 | 1296.13 | - | OpenVINO | โœ… | 260.6 | 0.7479 | 1502.15 | - | TensorRT (FP32) | โœ… | 264.0 | 0.7469 | 80.01 | - | TensorRT (FP16) | โœ… | 133.3 | 0.7513 | 40.76 | - | TensorRT (INT8) | โœ… | 70.2 | 0.4277 | 22.08 | - | TF SavedModel | โœ… | 651.1 | 0.7479 | 1451.76 | - | TF GraphDef | โœ… | 260.5 | 0.7479 | 4029.36 | - | TF Lite | โœ… | 260.4 | 0.7479 | 8772.86 | - | PaddlePaddle | โœ… | 520.8 | 0.7479 | 10619.53 | - | NCNN | โœ… | 260.4 | 0.7646 | 376.38 | + | PyTorch | โœ… | 109.3 | 0.8288 | 86.00 | + | TorchScript | โœ… | 218.1 | 0.8308 | 122.43 | + | ONNX | โœ… | 217.5 | 0.8307 | 77.50 | + | OpenVINO | โœ… | 217.8 | 0.8285 | 508.12 | + | TensorRT (FP32) | โœ… | 219.5 | 0.8307 | 76.44 | + | TensorRT (FP16) | โœ… | 112.0 | 0.8309 | 35.99 | + | TensorRT (INT8) | โœ… | 61.6 | 0.4854 | 22.32 | + | TF SavedModel | โœ… | 545.0 | 0.8308 | 1470.06 | + | TF GraphDef | โœ… | 217.8 | 0.8308 | 2549.78 | + | TF Lite | โœ… | 217.8 | 0.8308 | 7025.44 | + | PaddlePaddle | โœ… | 434.8 | 0.8308 | 8364.89 | + | MNN | โœ… | 217.3 | 0.8289 | 827.13 | + | NCNN | โœ… | 217.3 | 0.8304 | 490.29 | + + Benchmarked with Ultralytics {{ benchmark_version }} [Explore more benchmarking efforts by Seeed Studio](https://www.seeedstudio.com/blog/2023/03/30/yolov8-performance-benchmarks-on-nvidia-jetson-devices) running on different versions of NVIDIA Jetson hardware. @@ -394,25 +559,25 @@ To reproduce the above Ultralytics benchmarks on all export [formats](../modes/e ```python from ultralytics import YOLO - # Load a YOLOv8n PyTorch model - model = YOLO("yolov8n.pt") + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") - # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats - results = model.benchmarks(data="coco8.yaml", imgsz=640) + # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats + results = model.benchmark(data="coco8.yaml", imgsz=640) ``` === "CLI" ```bash - # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats - yolo benchmark model=yolov8n.pt data=coco8.yaml imgsz=640 + # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats + yolo benchmark model=yolo11n.pt data=coco8.yaml imgsz=640 ``` Note that benchmarking results might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. For the most reliable results use a dataset with a large number of images, i.e. `data='coco8.yaml' (4 val images), or `data='coco.yaml'` (5000 val images). ## Best Practices when using NVIDIA Jetson -When using NVIDIA Jetson, there are a couple of best practices to follow in order to enable maximum performance on the NVIDIA Jetson running YOLOv8. +When using NVIDIA Jetson, there are a couple of best practices to follow in order to enable maximum performance on the NVIDIA Jetson running YOLO11. 1. Enable MAX Power Mode @@ -445,29 +610,29 @@ When using NVIDIA Jetson, there are a couple of best practices to follow in orde ## Next Steps -Congratulations on successfully setting up YOLOv8 on your NVIDIA Jetson! For further learning and support, visit more guide at [Ultralytics YOLOv8 Docs](../index.md)! +Congratulations on successfully setting up YOLO11 on your NVIDIA Jetson! For further learning and support, visit more guide at [Ultralytics YOLO11 Docs](../index.md)! ## FAQ -### How do I deploy Ultralytics YOLOv8 on NVIDIA Jetson devices? +### How do I deploy Ultralytics YOLO11 on NVIDIA Jetson devices? -Deploying Ultralytics YOLOv8 on NVIDIA Jetson devices is a straightforward process. First, flash your Jetson device with the NVIDIA JetPack SDK. Then, either use a pre-built Docker image for quick setup or manually install the required packages. Detailed steps for each approach can be found in sections [Quick Start with Docker](#quick-start-with-docker) and [Start with Native Installation](#start-with-native-installation). +Deploying Ultralytics YOLO11 on NVIDIA Jetson devices is a straightforward process. First, flash your Jetson device with the NVIDIA JetPack SDK. Then, either use a pre-built Docker image for quick setup or manually install the required packages. Detailed steps for each approach can be found in sections [Quick Start with Docker](#quick-start-with-docker) and [Start with Native Installation](#start-with-native-installation). -### What performance benchmarks can I expect from YOLOv8 models on NVIDIA Jetson devices? +### What performance benchmarks can I expect from YOLO11 models on NVIDIA Jetson devices? -YOLOv8 models have been benchmarked on various NVIDIA Jetson devices showing significant performance improvements. For example, the TensorRT format delivers the best inference performance. The table in the [Detailed Comparison Table](#detailed-comparison-table) section provides a comprehensive view of performance metrics like mAP50-95 and inference time across different model formats. +YOLO11 models have been benchmarked on various NVIDIA Jetson devices showing significant performance improvements. For example, the TensorRT format delivers the best inference performance. The table in the [Detailed Comparison Tables](#detailed-comparison-tables) section provides a comprehensive view of performance metrics like mAP50-95 and inference time across different model formats. -### Why should I use TensorRT for deploying YOLOv8 on NVIDIA Jetson? +### Why should I use TensorRT for deploying YOLO11 on NVIDIA Jetson? -TensorRT is highly recommended for deploying YOLOv8 models on NVIDIA Jetson due to its optimal performance. It accelerates inference by leveraging the Jetson's GPU capabilities, ensuring maximum efficiency and speed. Learn more about how to convert to TensorRT and run inference in the [Use TensorRT on NVIDIA Jetson](#use-tensorrt-on-nvidia-jetson) section. +TensorRT is highly recommended for deploying YOLO11 models on NVIDIA Jetson due to its optimal performance. It accelerates inference by leveraging the Jetson's GPU capabilities, ensuring maximum efficiency and speed. Learn more about how to convert to TensorRT and run inference in the [Use TensorRT on NVIDIA Jetson](#use-tensorrt-on-nvidia-jetson) section. ### How can I install PyTorch and Torchvision on NVIDIA Jetson? -To install PyTorch and Torchvision on NVIDIA Jetson, first uninstall any existing versions that may have been installed via pip. Then, manually install the compatible PyTorch and Torchvision versions for the Jetson's ARM64 architecture. Detailed instructions for this process are provided in the [Install PyTorch and Torchvision](#install-pytorch-and-torchvision) section. +To install PyTorch and Torchvision on NVIDIA Jetson, first uninstall any existing versions that may have been installed via pip. Then, manually install the compatible PyTorch and Torchvision versions for the Jetson's ARM64 architecture. Detailed instructions for this process are provided in the [Installation of PyTorch and Torchvision](#install-pytorch-and-torchvision) section. -### What are the best practices for maximizing performance on NVIDIA Jetson when using YOLOv8? +### What are the best practices for maximizing performance on NVIDIA Jetson when using YOLO11? -To maximize performance on NVIDIA Jetson with YOLOv8, follow these best practices: +To maximize performance on NVIDIA Jetson with YOLO11, follow these best practices: 1. Enable MAX Power Mode to utilize all CPU and GPU cores. 2. Enable Jetson Clocks to run all cores at their maximum frequency. diff --git a/docs/en/guides/object-blurring.md b/docs/en/guides/object-blurring.md index 315bcd76ea0..2c6a3bdfc94 100644 --- a/docs/en/guides/object-blurring.md +++ b/docs/en/guides/object-blurring.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to use Ultralytics YOLOv8 for real-time object blurring to enhance privacy and focus in your images and videos. -keywords: YOLOv8, object blurring, real-time processing, privacy protection, image manipulation, video editing, Ultralytics +description: Learn how to use Ultralytics YOLO11 for real-time object blurring to enhance privacy and focus in your images and videos. +keywords: YOLO11, object blurring, real-time processing, privacy protection, image manipulation, video editing, Ultralytics --- -# Object Blurring using Ultralytics YOLOv8 ๐Ÿš€ +# Object Blurring using Ultralytics YOLO11 ๐Ÿš€ ## What is Object Blurring? -Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves applying a blurring effect to specific detected objects in an image or video. This can be achieved using the YOLOv8 model capabilities to identify and manipulate objects within a given scene. +Object blurring with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves applying a blurring effect to specific detected objects in an image or video. This can be achieved using the YOLO11 model capabilities to identify and manipulate objects within a given scene.


@@ -18,16 +18,16 @@ Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly allowfullscreen>
- Watch: Object Blurring using Ultralytics YOLOv8 + Watch: Object Blurring using Ultralytics YOLO11

## Advantages of Object Blurring? - **Privacy Protection**: Object blurring is an effective tool for safeguarding privacy by concealing sensitive or personally identifiable information in images or videos. -- **Selective Focus**: YOLOv8 allows for selective blurring, enabling users to target specific objects, ensuring a balance between privacy and retaining relevant visual information. -- **Real-time Processing**: YOLOv8's efficiency enables object blurring in real-time, making it suitable for applications requiring on-the-fly privacy enhancements in dynamic environments. +- **Selective Focus**: YOLO11 allows for selective blurring, enabling users to target specific objects, ensuring a balance between privacy and retaining relevant visual information. +- **Real-time Processing**: YOLO11's efficiency enables object blurring in real-time, making it suitable for applications requiring on-the-fly privacy enhancements in dynamic environments. -!!! example "Object Blurring using YOLOv8 Example" +!!! example "Object Blurring using YOLO11 Example" === "Object Blurring" @@ -37,7 +37,7 @@ Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") names = model.names cap = cv2.VideoCapture("path/to/video/file.mp4") @@ -86,20 +86,20 @@ Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly ## FAQ -### What is object blurring with Ultralytics YOLOv8? +### What is object blurring with Ultralytics YOLO11? -Object blurring with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves automatically detecting and applying a blurring effect to specific objects in images or videos. This technique enhances privacy by concealing sensitive information while retaining relevant visual data. YOLOv8's real-time processing capabilities make it suitable for applications requiring immediate privacy protection and selective focus adjustments. +Object blurring with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves automatically detecting and applying a blurring effect to specific objects in images or videos. This technique enhances privacy by concealing sensitive information while retaining relevant visual data. YOLO11's real-time processing capabilities make it suitable for applications requiring immediate privacy protection and selective focus adjustments. -### How can I implement real-time object blurring using YOLOv8? +### How can I implement real-time object blurring using YOLO11? -To implement real-time object blurring with YOLOv8, follow the provided Python example. This involves using YOLOv8 for [object detection](https://www.ultralytics.com/glossary/object-detection) and OpenCV for applying the blur effect. Here's a simplified version: +To implement real-time object blurring with YOLO11, follow the provided Python example. This involves using YOLO11 for [object detection](https://www.ultralytics.com/glossary/object-detection) and OpenCV for applying the blur effect. Here's a simplified version: ```python import cv2 from ultralytics import YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") while cap.isOpened(): @@ -112,7 +112,7 @@ while cap.isOpened(): obj = im0[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] im0[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] = cv2.blur(obj, (50, 50)) - cv2.imshow("YOLOv8 Blurring", im0) + cv2.imshow("YOLO11 Blurring", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -120,9 +120,9 @@ cap.release() cv2.destroyAllWindows() ``` -### What are the benefits of using Ultralytics YOLOv8 for object blurring? +### What are the benefits of using Ultralytics YOLO11 for object blurring? -Ultralytics YOLOv8 offers several advantages for object blurring: +Ultralytics YOLO11 offers several advantages for object blurring: - **Privacy Protection**: Effectively obscure sensitive or identifiable information. - **Selective Focus**: Target specific objects for blurring, maintaining essential visual content. @@ -130,10 +130,10 @@ Ultralytics YOLOv8 offers several advantages for object blurring: For more detailed applications, check the [advantages of object blurring section](#advantages-of-object-blurring). -### Can I use Ultralytics YOLOv8 to blur faces in a video for privacy reasons? +### Can I use Ultralytics YOLO11 to blur faces in a video for privacy reasons? -Yes, Ultralytics YOLOv8 can be configured to detect and blur faces in videos to protect privacy. By training or using a pre-trained model to specifically recognize faces, the detection results can be processed with [OpenCV](https://www.ultralytics.com/glossary/opencv) to apply a blur effect. Refer to our guide on [object detection with YOLOv8](https://docs.ultralytics.com/models/yolov8/) and modify the code to target face detection. +Yes, Ultralytics YOLO11 can be configured to detect and blur faces in videos to protect privacy. By training or using a pre-trained model to specifically recognize faces, the detection results can be processed with [OpenCV](https://www.ultralytics.com/glossary/opencv) to apply a blur effect. Refer to our guide on [object detection with YOLO11](https://docs.ultralytics.com/models/yolov8/) and modify the code to target face detection. -### How does YOLOv8 compare to other object detection models like Faster R-CNN for object blurring? +### How does YOLO11 compare to other object detection models like Faster R-CNN for object blurring? -Ultralytics YOLOv8 typically outperforms models like Faster R-CNN in terms of speed, making it more suitable for real-time applications. While both models offer accurate detection, YOLOv8's architecture is optimized for rapid inference, which is critical for tasks like real-time object blurring. Learn more about the technical differences and performance metrics in our [YOLOv8 documentation](https://docs.ultralytics.com/models/yolov8/). +Ultralytics YOLO11 typically outperforms models like Faster R-CNN in terms of speed, making it more suitable for real-time applications. While both models offer accurate detection, YOLO11's architecture is optimized for rapid inference, which is critical for tasks like real-time object blurring. Learn more about the technical differences and performance metrics in our [YOLO11 documentation](https://docs.ultralytics.com/models/yolov8/). diff --git a/docs/en/guides/object-counting.md b/docs/en/guides/object-counting.md index 7c1367b29e3..73dcd3056d8 100644 --- a/docs/en/guides/object-counting.md +++ b/docs/en/guides/object-counting.md @@ -1,37 +1,27 @@ --- comments: true -description: Learn to accurately identify and count objects in real-time using Ultralytics YOLOv8 for applications like crowd analysis and surveillance. -keywords: object counting, YOLOv8, Ultralytics, real-time object detection, AI, deep learning, object tracking, crowd analysis, surveillance, resource optimization +description: Learn to accurately identify and count objects in real-time using Ultralytics YOLO11 for applications like crowd analysis and surveillance. +keywords: object counting, YOLO11, Ultralytics, real-time object detection, AI, deep learning, object tracking, crowd analysis, surveillance, resource optimization --- -# Object Counting using Ultralytics YOLOv8 +# Object Counting using Ultralytics YOLO11 ## What is Object Counting? -Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLOv8 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) capabilities. - - - - - - -
- -
- Watch: Object Counting using Ultralytics YOLOv8 -
- -
- Watch: Class-wise Object Counting using Ultralytics YOLOv8 -
+Open Object Counting In Colab + +Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLO11 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) capabilities. + +

+
+ +
+ Watch: Class-wise Object Counting using Ultralytics YOLOv8 +

## Advantages of Object Counting? @@ -43,86 +33,61 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly | Logistics | Aquaculture | | :-----------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![Conveyor Belt Packets Counting Using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/conveyor-belt-packets-counting.avif) | ![Fish Counting in Sea using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/fish-counting-in-sea-using-ultralytics-yolov8.avif) | -| Conveyor Belt Packets Counting Using Ultralytics YOLOv8 | Fish Counting in Sea using Ultralytics YOLOv8 | - -!!! example "Object Counting using YOLOv8 Example" - - === "Count in Region" - - ```python - import cv2 - - from ultralytics import YOLO, solutions +| ![Conveyor Belt Packets Counting Using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/conveyor-belt-packets-counting.avif) | ![Fish Counting in Sea using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/fish-counting-in-sea-using-ultralytics-yolov8.avif) | +| Conveyor Belt Packets Counting Using Ultralytics YOLO11 | Fish Counting in Sea using Ultralytics YOLO11 | - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Define region points - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] - - # Video writer - video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) +!!! example "Object Counting using YOLO11 Example" - # Init Object Counter - counter = solutions.ObjectCounter( - view_img=True, - reg_pts=region_points, - names=model.names, - draw_tracks=True, - line_thickness=2, - ) + === "CLI" - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - tracks = model.track(im0, persist=True, show=False) + ```bash + # Run a counting example + yolo solutions count show=True - im0 = counter.start_counting(im0, tracks) - video_writer.write(im0) + # Pass a source video + yolo solutions count source="path/to/video/file.mp4" - cap.release() - video_writer.release() - cv2.destroyAllWindows() + # Pass region coordinates + yolo solutions count region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] ``` - === "OBB Object Counting" + === "Python" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolov8n-obb.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) # Define region points - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] + # region_points = [(20, 400), (1080, 400)] # For line counting + region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # For rectangle region counting + # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)] # For polygon region counting # Video writer video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - # Init Object Counter + # Init ObjectCounter counter = solutions.ObjectCounter( - view_img=True, - reg_pts=region_points, - names=model.names, - line_thickness=2, + show=True, # Display the output + region=region_points, # Pass region points + model="yolo11n.pt", # model="yolo11n-obb.pt" for object counting using YOLO11 OBB model. + # classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model. + # show_in=True, # Display in counts + # show_out=True, # Display out counts + # line_width=2, # Adjust the line width for bounding boxes and text display ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -130,146 +95,18 @@ Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly cv2.destroyAllWindows() ``` - === "Count in Polygon" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Define region points as a polygon with 5 points - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] - - # Video writer - video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - # Init Object Counter - counter = solutions.ObjectCounter( - view_img=True, - reg_pts=region_points, - names=model.names, - draw_tracks=True, - line_thickness=2, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` - - === "Count in Line" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - # Define line points - line_points = [(20, 400), (1080, 400)] - - # Video writer - video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - # Init Object Counter - counter = solutions.ObjectCounter( - view_img=True, - reg_pts=line_points, - names=model.names, - draw_tracks=True, - line_thickness=2, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` - - === "Specific Classes" - - ```python - import cv2 - - from ultralytics import YOLO, solutions - - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - line_points = [(20, 400), (1080, 400)] # line or region points - classes_to_count = [0, 2] # person and car classes for count - - # Video writer - video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - # Init Object Counter - counter = solutions.ObjectCounter( - view_img=True, - reg_pts=line_points, - names=model.names, - draw_tracks=True, - line_thickness=2, - ) - - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - tracks = model.track(im0, persist=True, show=False, classes=classes_to_count) - im0 = counter.start_counting(im0, tracks) - video_writer.write(im0) - - cap.release() - video_writer.release() - cv2.destroyAllWindows() - ``` - -???+ tip "Region is Movable" - - You can move the region anywhere in the frame by clicking on its edges - ### Argument `ObjectCounter` Here's a table with the `ObjectCounter` arguments: -| Name | Type | Default | Description | -| ----------------- | ------ | -------------------------- | ---------------------------------------------------------------------- | -| `names` | `dict` | `None` | Dictionary of classes names. | -| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | -| `line_thickness` | `int` | `2` | Line thickness for bounding boxes. | -| `view_img` | `bool` | `False` | Flag to control whether to display the video stream. | -| `view_in_counts` | `bool` | `True` | Flag to control whether to display the in counts on the video stream. | -| `view_out_counts` | `bool` | `True` | Flag to control whether to display the out counts on the video stream. | -| `draw_tracks` | `bool` | `False` | Flag to control whether to draw the object tracks. | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | +| `show_in` | `bool` | `True` | Flag to control whether to display the in counts on the video stream. | +| `show_out` | `bool` | `True` | Flag to control whether to display the out counts on the video stream. | ### Arguments `model.track` @@ -277,43 +114,39 @@ Here's a table with the `ObjectCounter` arguments: ## FAQ -### How do I count objects in a video using Ultralytics YOLOv8? +### How do I count objects in a video using Ultralytics YOLO11? -To count objects in a video using Ultralytics YOLOv8, you can follow these steps: +To count objects in a video using Ultralytics YOLO11, you can follow these steps: 1. Import the necessary libraries (`cv2`, `ultralytics`). -2. Load a pretrained YOLOv8 model. -3. Define the counting region (e.g., a polygon, line, etc.). -4. Set up the video capture and initialize the object counter. -5. Process each frame to track objects and count them within the defined region. +2. Define the counting region (e.g., a polygon, line, etc.). +3. Set up the video capture and initialize the object counter. +4. Process each frame to track objects and count them within the defined region. Here's a simple example for counting in a region: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions def count_objects_in_region(video_path, output_video_path, model_path): """Count objects in a specific region within a video.""" - model = YOLO(model_path) cap = cv2.VideoCapture(video_path) assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - counter = solutions.ObjectCounter( - view_img=True, reg_pts=region_points, names=model.names, draw_tracks=True, line_thickness=2 - ) + + region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] + counter = solutions.ObjectCounter(show=True, region=region_points, model=model_path) while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -321,14 +154,14 @@ def count_objects_in_region(video_path, output_video_path, model_path): cv2.destroyAllWindows() -count_objects_in_region("path/to/video.mp4", "output_video.avi", "yolov8n.pt") +count_objects_in_region("path/to/video.mp4", "output_video.avi", "yolo11n.pt") ``` -Explore more configurations and options in the [Object Counting](#object-counting-using-ultralytics-yolov8) section. +Explore more configurations and options in the [Object Counting](#object-counting-using-ultralytics-yolo11) section. -### What are the advantages of using Ultralytics YOLOv8 for object counting? +### What are the advantages of using Ultralytics YOLO11 for object counting? -Using Ultralytics YOLOv8 for object counting offers several advantages: +Using Ultralytics YOLO11 for object counting offers several advantages: 1. **Resource Optimization:** It facilitates efficient resource management by providing accurate counts, helping optimize resource allocation in industries like inventory management. 2. **Enhanced Security:** It enhances security and surveillance by accurately tracking and counting entities, aiding in proactive threat detection. @@ -336,35 +169,32 @@ Using Ultralytics YOLOv8 for object counting offers several advantages: For real-world applications and code examples, visit the [Advantages of Object Counting](#advantages-of-object-counting) section. -### How can I count specific classes of objects using Ultralytics YOLOv8? +### How can I count specific classes of objects using Ultralytics YOLO11? -To count specific classes of objects using Ultralytics YOLOv8, you need to specify the classes you are interested in during the tracking phase. Below is a Python example: +To count specific classes of objects using Ultralytics YOLO11, you need to specify the classes you are interested in during the tracking phase. Below is a Python example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions def count_specific_classes(video_path, output_video_path, model_path, classes_to_count): """Count specific classes of objects in a video.""" - model = YOLO(model_path) cap = cv2.VideoCapture(video_path) assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - line_points = [(20, 400), (1080, 400)] video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - counter = solutions.ObjectCounter( - view_img=True, reg_pts=line_points, names=model.names, draw_tracks=True, line_thickness=2 - ) + + line_points = [(20, 400), (1080, 400)] + counter = solutions.ObjectCounter(show=True, region=line_points, model=model_path, classes=classes_to_count) while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - tracks = model.track(im0, persist=True, show=False, classes=classes_to_count) - im0 = counter.start_counting(im0, tracks) + im0 = counter.count(im0) video_writer.write(im0) cap.release() @@ -372,27 +202,27 @@ def count_specific_classes(video_path, output_video_path, model_path, classes_to cv2.destroyAllWindows() -count_specific_classes("path/to/video.mp4", "output_specific_classes.avi", "yolov8n.pt", [0, 2]) +count_specific_classes("path/to/video.mp4", "output_specific_classes.avi", "yolo11n.pt", [0, 2]) ``` In this example, `classes_to_count=[0, 2]`, which means it counts objects of class `0` and `2` (e.g., person and car). -### Why should I use YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models for real-time applications? +### Why should I use YOLO11 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models for real-time applications? -Ultralytics YOLOv8 provides several advantages over other object detection models like Faster R-CNN, SSD, and previous YOLO versions: +Ultralytics YOLO11 provides several advantages over other object detection models like Faster R-CNN, SSD, and previous YOLO versions: -1. **Speed and Efficiency:** YOLOv8 offers real-time processing capabilities, making it ideal for applications requiring high-speed inference, such as surveillance and autonomous driving. +1. **Speed and Efficiency:** YOLO11 offers real-time processing capabilities, making it ideal for applications requiring high-speed inference, such as surveillance and autonomous driving. 2. **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** It provides state-of-the-art accuracy for object detection and tracking tasks, reducing the number of false positives and improving overall system reliability. -3. **Ease of Integration:** YOLOv8 offers seamless integration with various platforms and devices, including mobile and edge devices, which is crucial for modern AI applications. +3. **Ease of Integration:** YOLO11 offers seamless integration with various platforms and devices, including mobile and edge devices, which is crucial for modern AI applications. 4. **Flexibility:** Supports various tasks like object detection, segmentation, and tracking with configurable models to meet specific use-case requirements. -Check out Ultralytics [YOLOv8 Documentation](https://docs.ultralytics.com/models/yolov8/) for a deeper dive into its features and performance comparisons. +Check out Ultralytics [YOLO11 Documentation](https://docs.ultralytics.com/models/yolo11/) for a deeper dive into its features and performance comparisons. -### Can I use YOLOv8 for advanced applications like crowd analysis and traffic management? +### Can I use YOLO11 for advanced applications like crowd analysis and traffic management? -Yes, Ultralytics YOLOv8 is perfectly suited for advanced applications like crowd analysis and traffic management due to its real-time detection capabilities, scalability, and integration flexibility. Its advanced features allow for high-accuracy object tracking, counting, and classification in dynamic environments. Example use cases include: +Yes, Ultralytics YOLO11 is perfectly suited for advanced applications like crowd analysis and traffic management due to its real-time detection capabilities, scalability, and integration flexibility. Its advanced features allow for high-accuracy object tracking, counting, and classification in dynamic environments. Example use cases include: - **Crowd Analysis:** Monitor and manage large gatherings, ensuring safety and optimizing crowd flow. - **Traffic Management:** Track and count vehicles, analyze traffic patterns, and manage congestion in real-time. -For more information and implementation details, refer to the guide on [Real World Applications](#real-world-applications) of object counting with YOLOv8. +For more information and implementation details, refer to the guide on [Real World Applications](#real-world-applications) of object counting with YOLO11. diff --git a/docs/en/guides/object-cropping.md b/docs/en/guides/object-cropping.md index f4b50ed0276..8bfcac5fe1f 100644 --- a/docs/en/guides/object-cropping.md +++ b/docs/en/guides/object-cropping.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to crop and extract objects using Ultralytics YOLOv8 for focused analysis, reduced data volume, and enhanced precision. -keywords: Ultralytics, YOLOv8, object cropping, object detection, image processing, video analysis, AI, machine learning +description: Learn how to crop and extract objects using Ultralytics YOLO11 for focused analysis, reduced data volume, and enhanced precision. +keywords: Ultralytics, YOLO11, object cropping, object detection, image processing, video analysis, AI, machine learning --- -# Object Cropping using Ultralytics YOLOv8 +# Object Cropping using Ultralytics YOLO11 ## What is Object Cropping? -Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves isolating and extracting specific detected objects from an image or video. The YOLOv8 model capabilities are utilized to accurately identify and delineate objects, enabling precise cropping for further analysis or manipulation. +Object cropping with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves isolating and extracting specific detected objects from an image or video. The YOLO11 model capabilities are utilized to accurately identify and delineate objects, enabling precise cropping for further analysis or manipulation.


@@ -18,23 +18,23 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly allowfullscreen>
- Watch: Object Cropping using Ultralytics YOLOv8 + Watch: Object Cropping using Ultralytics YOLO

## Advantages of Object Cropping? -- **Focused Analysis**: YOLOv8 facilitates targeted object cropping, allowing for in-depth examination or processing of individual items within a scene. +- **Focused Analysis**: YOLO11 facilitates targeted object cropping, allowing for in-depth examination or processing of individual items within a scene. - **Reduced Data Volume**: By extracting only relevant objects, object cropping helps in minimizing data size, making it efficient for storage, transmission, or subsequent computational tasks. -- **Enhanced Precision**: YOLOv8's [object detection](https://www.ultralytics.com/glossary/object-detection) [accuracy](https://www.ultralytics.com/glossary/accuracy) ensures that the cropped objects maintain their spatial relationships, preserving the integrity of the visual information for detailed analysis. +- **Enhanced Precision**: YOLO11's [object detection](https://www.ultralytics.com/glossary/object-detection) [accuracy](https://www.ultralytics.com/glossary/accuracy) ensures that the cropped objects maintain their spatial relationships, preserving the integrity of the visual information for detailed analysis. ## Visuals | Airport Luggage | | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![Conveyor Belt at Airport Suitcases Cropping using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/suitcases-cropping-airport-conveyor-belt.avif) | -| Suitcases Cropping at airport conveyor belt using Ultralytics YOLOv8 | +| ![Conveyor Belt at Airport Suitcases Cropping using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/suitcases-cropping-airport-conveyor-belt.avif) | +| Suitcases Cropping at airport conveyor belt using Ultralytics YOLO11 | -!!! example "Object Cropping using YOLOv8 Example" +!!! example "Object Cropping using YOLO11 Example" === "Object Cropping" @@ -46,7 +46,7 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") names = model.names cap = cv2.VideoCapture("path/to/video/file.mp4") @@ -98,22 +98,22 @@ Object cropping with [Ultralytics YOLOv8](https://github.com/ultralytics/ultraly ## FAQ -### What is object cropping in Ultralytics YOLOv8 and how does it work? +### What is object cropping in Ultralytics YOLO11 and how does it work? -Object cropping using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves isolating and extracting specific objects from an image or video based on YOLOv8's detection capabilities. This process allows for focused analysis, reduced data volume, and enhanced [precision](https://www.ultralytics.com/glossary/precision) by leveraging YOLOv8 to identify objects with high accuracy and crop them accordingly. For an in-depth tutorial, refer to the [object cropping example](#object-cropping-using-ultralytics-yolov8). +Object cropping using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) involves isolating and extracting specific objects from an image or video based on YOLO11's detection capabilities. This process allows for focused analysis, reduced data volume, and enhanced [precision](https://www.ultralytics.com/glossary/precision) by leveraging YOLO11 to identify objects with high accuracy and crop them accordingly. For an in-depth tutorial, refer to the [object cropping example](#object-cropping-using-ultralytics-yolo11). -### Why should I use Ultralytics YOLOv8 for object cropping over other solutions? +### Why should I use Ultralytics YOLO11 for object cropping over other solutions? -Ultralytics YOLOv8 stands out due to its precision, speed, and ease of use. It allows detailed and accurate object detection and cropping, essential for [focused analysis](#advantages-of-object-cropping) and applications needing high data integrity. Moreover, YOLOv8 integrates seamlessly with tools like OpenVINO and TensorRT for deployments requiring real-time capabilities and optimization on diverse hardware. Explore the benefits in the [guide on model export](../modes/export.md). +Ultralytics YOLO11 stands out due to its precision, speed, and ease of use. It allows detailed and accurate object detection and cropping, essential for [focused analysis](#advantages-of-object-cropping) and applications needing high data integrity. Moreover, YOLO11 integrates seamlessly with tools like OpenVINO and TensorRT for deployments requiring real-time capabilities and optimization on diverse hardware. Explore the benefits in the [guide on model export](../modes/export.md). ### How can I reduce the data volume of my dataset using object cropping? -By using Ultralytics YOLOv8 to crop only relevant objects from your images or videos, you can significantly reduce the data size, making it more efficient for storage and processing. This process involves training the model to detect specific objects and then using the results to crop and save these portions only. For more information on exploiting Ultralytics YOLOv8's capabilities, visit our [quickstart guide](../quickstart.md). +By using Ultralytics YOLO11 to crop only relevant objects from your images or videos, you can significantly reduce the data size, making it more efficient for storage and processing. This process involves training the model to detect specific objects and then using the results to crop and save these portions only. For more information on exploiting Ultralytics YOLO11's capabilities, visit our [quickstart guide](../quickstart.md). -### Can I use Ultralytics YOLOv8 for real-time video analysis and object cropping? +### Can I use Ultralytics YOLO11 for real-time video analysis and object cropping? -Yes, Ultralytics YOLOv8 can process real-time video feeds to detect and crop objects dynamically. The model's high-speed inference capabilities make it ideal for real-time applications such as surveillance, sports analysis, and automated inspection systems. Check out the [tracking and prediction modes](../modes/predict.md) to understand how to implement real-time processing. +Yes, Ultralytics YOLO11 can process real-time video feeds to detect and crop objects dynamically. The model's high-speed inference capabilities make it ideal for real-time applications such as surveillance, sports analysis, and automated inspection systems. Check out the [tracking and prediction modes](../modes/predict.md) to understand how to implement real-time processing. -### What are the hardware requirements for efficiently running YOLOv8 for object cropping? +### What are the hardware requirements for efficiently running YOLO11 for object cropping? -Ultralytics YOLOv8 is optimized for both CPU and GPU environments, but to achieve optimal performance, especially for real-time or high-volume inference, a dedicated GPU (e.g., NVIDIA Tesla, RTX series) is recommended. For deployment on lightweight devices, consider using CoreML for iOS or TFLite for Android. More details on supported devices and formats can be found in our [model deployment options](../guides/model-deployment-options.md). +Ultralytics YOLO11 is optimized for both CPU and GPU environments, but to achieve optimal performance, especially for real-time or high-volume inference, a dedicated GPU (e.g., NVIDIA Tesla, RTX series) is recommended. For deployment on lightweight devices, consider using CoreML for iOS or TFLite for Android. More details on supported devices and formats can be found in our [model deployment options](../guides/model-deployment-options.md). diff --git a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md index 154ec7a893a..cffeb223503 100644 --- a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md +++ b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md @@ -61,7 +61,7 @@ OpenVINO's multi-device mode simplifies scaling throughput by automatically bala Optimizing Ultralytics YOLO models for latency and throughput with OpenVINO can significantly enhance your application's performance. By carefully applying the strategies outlined in this guide, developers can ensure their models run efficiently, meeting the demands of various deployment scenarios. Remember, the choice between optimizing for latency or throughput depends on your specific application needs and the characteristics of the deployment environment. -For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/latest/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models. +For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/2024/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models. --- diff --git a/docs/en/guides/parking-management.md b/docs/en/guides/parking-management.md index 78686bd0613..b6140181aef 100644 --- a/docs/en/guides/parking-management.md +++ b/docs/en/guides/parking-management.md @@ -1,14 +1,14 @@ --- comments: true -description: Optimize parking spaces and enhance safety with Ultralytics YOLOv8. Explore real-time vehicle detection and smart parking solutions. -keywords: parking management, YOLOv8, Ultralytics, vehicle detection, real-time tracking, parking lot optimization, smart parking +description: Optimize parking spaces and enhance safety with Ultralytics YOLO11. Explore real-time vehicle detection and smart parking solutions. +keywords: parking management, YOLO11, Ultralytics, vehicle detection, real-time tracking, parking lot optimization, smart parking --- -# Parking Management using Ultralytics YOLOv8 ๐Ÿš€ +# Parking Management using Ultralytics YOLO11 ๐Ÿš€ ## What is Parking Management System? -Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) ensures efficient and safe parking by organizing spaces and monitoring availability. YOLOv8 can improve parking lot management through real-time vehicle detection, and insights into parking occupancy. +Parking management with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) ensures efficient and safe parking by organizing spaces and monitoring availability. YOLO11 can improve parking lot management through real-time vehicle detection, and insights into parking occupancy.


@@ -18,21 +18,21 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr allowfullscreen>
- Watch: How to Implement Parking Management Using Ultralytics YOLOv8 ๐Ÿš€ + Watch: How to Implement Parking Management Using Ultralytics YOLO ๐Ÿš€

## Advantages of Parking Management System? - **Efficiency**: Parking lot management optimizes the use of parking spaces and reduces congestion. -- **Safety and Security**: Parking management using YOLOv8 improves the safety of both people and vehicles through surveillance and security measures. -- **Reduced Emissions**: Parking management using YOLOv8 manages traffic flow to minimize idle time and emissions in parking lots. +- **Safety and Security**: Parking management using YOLO11 improves the safety of both people and vehicles through surveillance and security measures. +- **Reduced Emissions**: Parking management using YOLO11 manages traffic flow to minimize idle time and emissions in parking lots. ## Real World Applications | Parking Management System | Parking Management System | | :----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![Parking lots Analytics Using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/parking-management-aerial-view-ultralytics-yolov8.avif) | ![Parking management top view using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/parking-management-top-view-ultralytics-yolov8.avif) | -| Parking management Aerial View using Ultralytics YOLOv8 | Parking management Top View using Ultralytics YOLOv8 | +| ![Parking lots Analytics Using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/parking-management-aerial-view-ultralytics-yolov8.avif) | ![Parking management top view using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/parking-management-top-view-ultralytics-yolov8.avif) | +| Parking management Aerial View using Ultralytics YOLO11 | Parking management Top View using Ultralytics YOLO11 | ## Parking Management System Code Workflow @@ -49,7 +49,7 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr Max Image Size of 1920 * 1080 supported -!!! example "Parking slots Annotator Ultralytics YOLOv8" +!!! example "Parking slots Annotator Ultralytics YOLO11" === "Parking Annotator" @@ -61,11 +61,11 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr - After defining the parking areas with polygons, click `save` to store a JSON file with the data in your working directory. -![Ultralytics YOLOv8 Points Selection Demo](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-points-selection-demo.avif) +![Ultralytics YOLO11 Points Selection Demo](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-points-selection-demo.avif) ### Python Code for Parking Management -!!! example "Parking management using YOLOv8 Example" +!!! example "Parking management using YOLO11 Example" === "Parking Management" @@ -84,7 +84,7 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr # Initialize parking management object parking_manager = solutions.ParkingManagement( - model="yolov8n.pt", # path to model file + model="yolo11n.pt", # path to model file json_file="bounding_boxes.json", # path to parking annotations file ) @@ -102,12 +102,10 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr ### Optional Arguments `ParkingManagement` -| Name | Type | Default | Description | -| ------------------------ | ------- | ------------- | -------------------------------------------------------------- | -| `model` | `str` | `None` | Path to the YOLOv8 model. | -| `json_file` | `str` | `None` | Path to the JSON file, that have all parking coordinates data. | -| `occupied_region_color` | `tuple` | `(0, 0, 255)` | RGB color for occupied regions. | -| `available_region_color` | `tuple` | `(0, 255, 0)` | RGB color for available regions. | +| Name | Type | Default | Description | +| ----------- | ----- | ------- | -------------------------------------------------------------- | +| `model` | `str` | `None` | Path to the YOLO11 model. | +| `json_file` | `str` | `None` | Path to the JSON file, that have all parking coordinates data. | ### Arguments `model.track` @@ -115,33 +113,33 @@ Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultr ## FAQ -### How does Ultralytics YOLOv8 enhance parking management systems? +### How does Ultralytics YOLO11 enhance parking management systems? -Ultralytics YOLOv8 greatly enhances parking management systems by providing **real-time vehicle detection** and monitoring. This results in optimized usage of parking spaces, reduced congestion, and improved safety through continuous surveillance. The [Parking Management System](https://github.com/ultralytics/ultralytics) enables efficient traffic flow, minimizing idle times and emissions in parking lots, thereby contributing to environmental sustainability. For further details, refer to the [parking management code workflow](#python-code-for-parking-management). +Ultralytics YOLO11 greatly enhances parking management systems by providing **real-time vehicle detection** and monitoring. This results in optimized usage of parking spaces, reduced congestion, and improved safety through continuous surveillance. The [Parking Management System](https://github.com/ultralytics/ultralytics) enables efficient traffic flow, minimizing idle times and emissions in parking lots, thereby contributing to environmental sustainability. For further details, refer to the [parking management code workflow](#python-code-for-parking-management). -### What are the benefits of using Ultralytics YOLOv8 for smart parking? +### What are the benefits of using Ultralytics YOLO11 for smart parking? -Using Ultralytics YOLOv8 for smart parking yields numerous benefits: +Using Ultralytics YOLO11 for smart parking yields numerous benefits: - **Efficiency**: Optimizes the use of parking spaces and decreases congestion. - **Safety and Security**: Enhances surveillance and ensures the safety of vehicles and pedestrians. - **Environmental Impact**: Helps in reducing emissions by minimizing vehicle idle times. More details on the advantages can be seen [here](#advantages-of-parking-management-system). -### How can I define parking spaces using Ultralytics YOLOv8? +### How can I define parking spaces using Ultralytics YOLO11? -Defining parking spaces is straightforward with Ultralytics YOLOv8: +Defining parking spaces is straightforward with Ultralytics YOLO11: 1. Capture a frame from a video or camera stream. 2. Use the provided code to launch a GUI for selecting an image and drawing polygons to define parking spaces. 3. Save the labeled data in JSON format for further processing. For comprehensive instructions, check the [selection of points](#selection-of-points) section. -### Can I customize the YOLOv8 model for specific parking management needs? +### Can I customize the YOLO11 model for specific parking management needs? -Yes, Ultralytics YOLOv8 allows customization for specific parking management needs. You can adjust parameters such as the **occupied and available region colors**, margins for text display, and much more. Utilizing the `ParkingManagement` class's [optional arguments](#optional-arguments-parkingmanagement), you can tailor the model to suit your particular requirements, ensuring maximum efficiency and effectiveness. +Yes, Ultralytics YOLO11 allows customization for specific parking management needs. You can adjust parameters such as the **occupied and available region colors**, margins for text display, and much more. Utilizing the `ParkingManagement` class's [optional arguments](#optional-arguments-parkingmanagement), you can tailor the model to suit your particular requirements, ensuring maximum efficiency and effectiveness. -### What are some real-world applications of Ultralytics YOLOv8 in parking lot management? +### What are some real-world applications of Ultralytics YOLO11 in parking lot management? -Ultralytics YOLOv8 is utilized in various real-world applications for parking lot management, including: +Ultralytics YOLO11 is utilized in various real-world applications for parking lot management, including: - **Parking Space Detection**: Accurately identifying available and occupied spaces. - **Surveillance**: Enhancing security through real-time monitoring. diff --git a/docs/en/guides/preprocessing_annotated_data.md b/docs/en/guides/preprocessing_annotated_data.md index fcd329c7438..62f69694922 100644 --- a/docs/en/guides/preprocessing_annotated_data.md +++ b/docs/en/guides/preprocessing_annotated_data.md @@ -1,7 +1,7 @@ --- comments: true description: Learn essential data preprocessing techniques for annotated computer vision data, including resizing, normalizing, augmenting, and splitting datasets for optimal model training. -keywords: data preprocessing, computer vision, image resizing, normalization, data augmentation, training dataset, validation dataset, test dataset, YOLOv8 +keywords: data preprocessing, computer vision, image resizing, normalization, data augmentation, training dataset, validation dataset, test dataset, YOLO11 --- # Data Preprocessing Techniques for Annotated [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Data @@ -36,7 +36,7 @@ To make resizing a simpler task, you can use the following tools: - **[OpenCV](https://www.ultralytics.com/glossary/opencv)**: A popular computer vision library with extensive functions for image processing. - **PIL (Pillow)**: A Python Imaging Library for opening, manipulating, and saving image files. -With respect to YOLOv8, the 'imgsz' parameter during [model training](../modes/train.md) allows for flexible input sizes. When set to a specific size, such as 640, the model will resize input images so their largest dimension is 640 pixels while maintaining the original aspect ratio. +With respect to YOLO11, the 'imgsz' parameter during [model training](../modes/train.md) allows for flexible input sizes. When set to a specific size, such as 640, the model will resize input images so their largest dimension is 640 pixels while maintaining the original aspect ratio. By evaluating your model's and dataset's specific needs, you can determine whether resizing is a necessary preprocessing step or if your model can efficiently handle images of varying sizes. @@ -47,7 +47,7 @@ Another preprocessing technique is normalization. Normalization scales the pixel - **Min-Max Scaling**: Scales pixel values to a range of 0 to 1. - **Z-Score Normalization**: Scales pixel values based on their mean and standard deviation. -With respect to YOLOv8, normalization is seamlessly handled as part of its preprocessing pipeline during model training. YOLOv8 automatically performs several preprocessing steps, including conversion to RGB, scaling pixel values to the range [0, 1], and normalization using predefined mean and standard deviation values. +With respect to YOLO11, normalization is seamlessly handled as part of its preprocessing pipeline during model training. YOLO11 automatically performs several preprocessing steps, including conversion to RGB, scaling pixel values to the range [0, 1], and normalization using predefined mean and standard deviation values. ### Splitting the Dataset @@ -76,9 +76,9 @@ Common augmentation techniques include flipping, rotation, scaling, and color ad Overview of Data Augmentations

-With respect to YOLOv8, you can [augment your custom dataset](../modes/train.md) by modifying the dataset configuration file, a .yaml file. In this file, you can add an augmentation section with parameters that specify how you want to augment your data. +With respect to YOLO11, you can [augment your custom dataset](../modes/train.md) by modifying the dataset configuration file, a .yaml file. In this file, you can add an augmentation section with parameters that specify how you want to augment your data. -The [Ultralytics YOLOv8 repository](https://github.com/ultralytics/ultralytics/tree/main) supports a wide range of data augmentations. You can apply various transformations such as: +The [Ultralytics YOLO11 repository](https://github.com/ultralytics/ultralytics/tree/main) supports a wide range of data augmentations. You can apply various transformations such as: - Random Crops - Flipping: Images can be flipped horizontally or vertically. @@ -89,12 +89,12 @@ Also, you can adjust the intensity of these augmentation techniques through spec ## A Case Study of Preprocessing -Consider a project aimed at developing a model to detect and classify different types of vehicles in traffic images using YOLOv8. We've collected traffic images and annotated them with bounding boxes and labels. +Consider a project aimed at developing a model to detect and classify different types of vehicles in traffic images using YOLO11. We've collected traffic images and annotated them with bounding boxes and labels. Here's what each step of preprocessing would look like for this project: -- Resizing Images: Since YOLOv8 handles flexible input sizes and performs resizing automatically, manual resizing is not required. The model will adjust the image size according to the specified 'imgsz' parameter during training. -- Normalizing Pixel Values: YOLOv8 automatically normalizes pixel values to a range of 0 to 1 during preprocessing, so it's not required. +- Resizing Images: Since YOLO11 handles flexible input sizes and performs resizing automatically, manual resizing is not required. The model will adjust the image size according to the specified 'imgsz' parameter during training. +- Normalizing Pixel Values: YOLO11 automatically normalizes pixel values to a range of 0 to 1 during preprocessing, so it's not required. - Splitting the Dataset: Divide the dataset into training (70%), validation (20%), and test (10%) sets using tools like scikit-learn. - [Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation): Modify the dataset configuration file (.yaml) to include data augmentation techniques such as random crops, horizontal flips, and brightness adjustments. @@ -120,6 +120,10 @@ Common tools for visualizations include: ### Using Ultralytics Explorer for EDA +!!! warning "Community Note โš ๏ธ" + + As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!๐Ÿš€ + For a more advanced approach to EDA, you can use the Ultralytics Explorer tool. It offers robust capabilities for exploring computer vision datasets. By supporting semantic search, SQL queries, and vector similarity search, the tool makes it easy to analyze and understand your data. With Ultralytics Explorer, you can create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset to find similar images, run SQL queries for detailed analysis, and perform semantic searches, all through a user-friendly graphical interface.

@@ -132,12 +136,12 @@ Having discussions about your project with other computer vision enthusiasts can ### Channels to Connect with the Community -- **GitHub Issues:** Visit the YOLOv8 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face. +- **GitHub Issues:** Visit the YOLO11 GitHub repository and use the [Issues tab](https://github.com/ultralytics/ultralytics/issues) to raise questions, report bugs, and suggest features. The community and maintainers are there to help with any issues you face. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to connect with other users and developers, get support, share knowledge, and brainstorm ideas. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Refer to the [official YOLOv8 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects. +- **Ultralytics YOLO11 Documentation:** Refer to the [official YOLO11 documentation](./index.md) for thorough guides and valuable insights on numerous computer vision tasks and projects. ## Your Dataset Is Ready! @@ -151,7 +155,7 @@ Data preprocessing is essential in computer vision projects because it ensures t ### How can I use Ultralytics YOLO for data augmentation? -For data augmentation with Ultralytics YOLOv8, you need to modify the dataset configuration file (.yaml). In this file, you can specify various augmentation techniques such as random crops, horizontal flips, and brightness adjustments. This can be effectively done using the training configurations [explained here](../modes/train.md). Data augmentation helps create a more robust dataset, reduce [overfitting](https://www.ultralytics.com/glossary/overfitting), and improve model generalization. +For data augmentation with Ultralytics YOLO11, you need to modify the dataset configuration file (.yaml). In this file, you can specify various augmentation techniques such as random crops, horizontal flips, and brightness adjustments. This can be effectively done using the training configurations [explained here](../modes/train.md). Data augmentation helps create a more robust dataset, reduce [overfitting](https://www.ultralytics.com/glossary/overfitting), and improve model generalization. ### What are the best data normalization techniques for computer vision data? @@ -160,12 +164,12 @@ Normalization scales pixel values to a standard range for faster convergence and - **Min-Max Scaling**: Scales pixel values to a range of 0 to 1. - **Z-Score Normalization**: Scales pixel values based on their mean and standard deviation. -For YOLOv8, normalization is handled automatically, including conversion to RGB and pixel value scaling. Learn more about it in the [model training section](../modes/train.md). +For YOLO11, normalization is handled automatically, including conversion to RGB and pixel value scaling. Learn more about it in the [model training section](../modes/train.md). ### How should I split my annotated dataset for training? To split your dataset, a common practice is to divide it into 70% for training, 20% for validation, and 10% for testing. It is important to maintain the data distribution of classes across these splits and avoid data leakage by performing augmentation only on the training set. Use tools like scikit-learn or [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) for efficient dataset splitting. See the detailed guide on [dataset preparation](../guides/data-collection-and-annotation.md). -### Can I handle varying image sizes in YOLOv8 without manual resizing? +### Can I handle varying image sizes in YOLO11 without manual resizing? -Yes, Ultralytics YOLOv8 can handle varying image sizes through the 'imgsz' parameter during model training. This parameter ensures that images are resized so their largest dimension matches the specified size (e.g., 640 pixels), while maintaining the aspect ratio. For more flexible input handling and automatic adjustments, check the [model training section](../modes/train.md). +Yes, Ultralytics YOLO11 can handle varying image sizes through the 'imgsz' parameter during model training. This parameter ensures that images are resized so their largest dimension matches the specified size (e.g., 640 pixels), while maintaining the aspect ratio. For more flexible input handling and automatic adjustments, check the [model training section](../modes/train.md). diff --git a/docs/en/guides/queue-management.md b/docs/en/guides/queue-management.md index 9fb4897edf3..c97d9eeaa4f 100644 --- a/docs/en/guides/queue-management.md +++ b/docs/en/guides/queue-management.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to manage and optimize queues using Ultralytics YOLOv8 to reduce wait times and increase efficiency in various real-world applications. -keywords: queue management, YOLOv8, Ultralytics, reduce wait times, efficiency, customer satisfaction, retail, airports, healthcare, banks +description: Learn how to manage and optimize queues using Ultralytics YOLO11 to reduce wait times and increase efficiency in various real-world applications. +keywords: queue management, YOLO11, Ultralytics, reduce wait times, efficiency, customer satisfaction, retail, airports, healthcare, banks --- -# Queue Management using Ultralytics YOLOv8 ๐Ÿš€ +# Queue Management using Ultralytics YOLO11 ๐Ÿš€ ## What is Queue Management? -Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves organizing and controlling lines of people or vehicles to reduce wait times and enhance efficiency. It's about optimizing queues to improve customer satisfaction and system performance in various settings like retail, banks, airports, and healthcare facilities. +Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves organizing and controlling lines of people or vehicles to reduce wait times and enhance efficiency. It's about optimizing queues to improve customer satisfaction and system performance in various settings like retail, banks, airports, and healthcare facilities.


@@ -18,7 +18,7 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra allowfullscreen>
- Watch: How to Implement Queue Management with Ultralytics YOLOv8 | Airport and Metro Station + Watch: How to Implement Queue Management with Ultralytics YOLO11 | Airport and Metro Station

## Advantages of Queue Management? @@ -30,104 +30,74 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra | Logistics | Retail | | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![Queue management at airport ticket counter using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/queue-management-airport-ticket-counter-ultralytics-yolov8.avif) | ![Queue monitoring in crowd using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/queue-monitoring-crowd-ultralytics-yolov8.avif) | -| Queue management at airport ticket counter Using Ultralytics YOLOv8 | Queue monitoring in crowd Ultralytics YOLOv8 | +| ![Queue management at airport ticket counter using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/queue-management-airport-ticket-counter-ultralytics-yolov8.avif) | ![Queue monitoring in crowd using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/queue-monitoring-crowd-ultralytics-yolov8.avif) | +| Queue management at airport ticket counter Using Ultralytics YOLO11 | Queue monitoring in crowd Ultralytics YOLO11 | -!!! example "Queue Management using YOLOv8 Example" +!!! example "Queue Management using YOLO11 Example" - === "Queue Manager" + === "CLI" - ```python - import cv2 + ```bash + # Run a queue example + yolo solutions queue show=True - from ultralytics import YOLO, solutions + # Pass a source video + yolo solutions queue source="path/to/video/file.mp4" - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - - queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] - - queue = solutions.QueueManager( - names=model.names, - reg_pts=queue_region, - line_thickness=3, - ) - - while cap.isOpened(): - success, im0 = cap.read() - - if success: - tracks = model.track(im0, persist=True) - out = queue.process_queue(im0, tracks) - - video_writer.write(im0) - if cv2.waitKey(1) & 0xFF == ord("q"): - break - continue - - print("Video frame is empty or video processing has been successfully completed.") - break - - cap.release() - cv2.destroyAllWindows() + # Pass queue coordinates + yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] ``` - === "Queue Manager Specific Classes" + === "Python" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolov8n.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + # Video writer video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] + # Define queue region points + queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Define queue region points + # queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)] # Define queue polygon points + # Init Queue Manager queue = solutions.QueueManager( - names=model.names, - reg_pts=queue_region, - line_thickness=3, + show=True, # Display the output + model="yolo11n.pt", # Path to the YOLO11 model file + region=queue_region, # Pass queue region points + # classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model. + # line_width=2, # Adjust the line width for bounding boxes and text display ) + # Process video while cap.isOpened(): success, im0 = cap.read() - - if success: - tracks = model.track(im0, persist=True, classes=0) # Only person class - out = queue.process_queue(im0, tracks) - - video_writer.write(im0) - if cv2.waitKey(1) & 0xFF == ord("q"): - break - continue - - print("Video frame is empty or video processing has been successfully completed.") - break + if not success: + print("Video frame is empty or video processing has been successfully completed.") + break + out = queue.process_queue(im0) + video_writer.write(im0) cap.release() + video_writer.release() cv2.destroyAllWindows() ``` ### Arguments `QueueManager` -| Name | Type | Default | Description | -| ---------------- | ---------------- | -------------------------- | -------------------------------------------------------------------------------- | -| `names` | `dict` | `model.names` | A dictionary mapping class IDs to class names. | -| `reg_pts` | `list of tuples` | `[(20, 400), (1260, 400)]` | Points defining the counting region polygon. Defaults to a predefined rectangle. | -| `line_thickness` | `int` | `2` | Thickness of the annotation lines. | -| `view_img` | `bool` | `False` | Whether to display the image frames. | -| `draw_tracks` | `bool` | `False` | Whether to draw tracks of the objects. | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the queue region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -135,11 +105,11 @@ Queue management using [Ultralytics YOLOv8](https://github.com/ultralytics/ultra ## FAQ -### How can I use Ultralytics YOLOv8 for real-time queue management? +### How can I use Ultralytics YOLO11 for real-time queue management? -To use Ultralytics YOLOv8 for real-time queue management, you can follow these steps: +To use Ultralytics YOLO11 for real-time queue management, you can follow these steps: -1. Load the YOLOv8 model with `YOLO("yolov8n.pt")`. +1. Load the YOLO11 model with `YOLO("yolo11n.pt")`. 2. Capture the video feed using `cv2.VideoCapture`. 3. Define the region of interest (ROI) for queue management. 4. Process frames to detect objects and manage queues. @@ -149,23 +119,21 @@ Here's a minimal example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8n.pt") cap = cv2.VideoCapture("path/to/video.mp4") -queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] +queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)] queue = solutions.QueueManager( - names=model.names, - reg_pts=queue_region, - line_thickness=3, + model="yolo11n.pt", + region=queue_region, + line_width=3, ) while cap.isOpened(): success, im0 = cap.read() if success: - tracks = model.track(im0, show=False, persist=True, verbose=False) - out = queue.process_queue(im0, tracks) + out = queue.process_queue(im0) cv2.imshow("Queue Management", im0) if cv2.waitKey(1) & 0xFF == ord("q"): break @@ -176,9 +144,9 @@ cv2.destroyAllWindows() Leveraging Ultralytics [HUB](https://docs.ultralytics.com/hub/) can streamline this process by providing a user-friendly platform for deploying and managing your queue management solution. -### What are the key advantages of using Ultralytics YOLOv8 for queue management? +### What are the key advantages of using Ultralytics YOLO11 for queue management? -Using Ultralytics YOLOv8 for queue management offers several benefits: +Using Ultralytics YOLO11 for queue management offers several benefits: - **Plummeting Waiting Times:** Efficiently organizes queues, reducing customer wait times and boosting satisfaction. - **Enhancing Efficiency:** Analyzes queue data to optimize staff deployment and operations, thereby reducing costs. @@ -187,37 +155,37 @@ Using Ultralytics YOLOv8 for queue management offers several benefits: For more details, explore our [Queue Management](https://docs.ultralytics.com/reference/solutions/queue_management/) solutions. -### Why should I choose Ultralytics YOLOv8 over competitors like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or Detectron2 for queue management? +### Why should I choose Ultralytics YOLO11 over competitors like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or Detectron2 for queue management? -Ultralytics YOLOv8 has several advantages over TensorFlow and Detectron2 for queue management: +Ultralytics YOLO11 has several advantages over TensorFlow and Detectron2 for queue management: -- **Real-time Performance:** YOLOv8 is known for its real-time detection capabilities, offering faster processing speeds. +- **Real-time Performance:** YOLO11 is known for its real-time detection capabilities, offering faster processing speeds. - **Ease of Use:** Ultralytics provides a user-friendly experience, from training to deployment, via [Ultralytics HUB](https://docs.ultralytics.com/hub/). - **Pretrained Models:** Access to a range of pretrained models, minimizing the time needed for setup. - **Community Support:** Extensive documentation and active community support make problem-solving easier. Learn how to get started with [Ultralytics YOLO](https://docs.ultralytics.com/quickstart/). -### Can Ultralytics YOLOv8 handle multiple types of queues, such as in airports and retail? +### Can Ultralytics YOLO11 handle multiple types of queues, such as in airports and retail? -Yes, Ultralytics YOLOv8 can manage various types of queues, including those in airports and retail environments. By configuring the QueueManager with specific regions and settings, YOLOv8 can adapt to different queue layouts and densities. +Yes, Ultralytics YOLO11 can manage various types of queues, including those in airports and retail environments. By configuring the QueueManager with specific regions and settings, YOLO11 can adapt to different queue layouts and densities. Example for airports: ```python queue_region_airport = [(50, 600), (1200, 600), (1200, 550), (50, 550)] queue_airport = solutions.QueueManager( - names=model.names, - reg_pts=queue_region_airport, - line_thickness=3, + model="yolo11n.pt", + region=queue_region_airport, + line_width=3, ) ``` For more information on diverse applications, check out our [Real World Applications](#real-world-applications) section. -### What are some real-world applications of Ultralytics YOLOv8 in queue management? +### What are some real-world applications of Ultralytics YOLO11 in queue management? -Ultralytics YOLOv8 is used in various real-world applications for queue management: +Ultralytics YOLO11 is used in various real-world applications for queue management: - **Retail:** Monitors checkout lines to reduce wait times and improve customer satisfaction. - **Airports:** Manages queues at ticket counters and security checkpoints for a smoother passenger experience. diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md index c25557e8a3f..00b8d31572a 100644 --- a/docs/en/guides/raspberry-pi.md +++ b/docs/en/guides/raspberry-pi.md @@ -1,12 +1,13 @@ --- comments: true -description: Learn how to deploy Ultralytics YOLOv8 on Raspberry Pi with our comprehensive guide. Get performance benchmarks, setup instructions, and best practices. -keywords: Ultralytics, YOLOv8, Raspberry Pi, setup, guide, benchmarks, computer vision, object detection, NCNN, Docker, camera modules +description: Learn how to deploy Ultralytics YOLO11 on Raspberry Pi with our comprehensive guide. Get performance benchmarks, setup instructions, and best practices. +keywords: Ultralytics, YOLO11, Raspberry Pi, setup, guide, benchmarks, computer vision, object detection, NCNN, Docker, camera modules +benchmark_version: 8.3.39 --- -# Quick Start Guide: Raspberry Pi with Ultralytics YOLOv8 +# Quick Start Guide: Raspberry Pi with Ultralytics YOLO11 -This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [Raspberry Pi](https://www.raspberrypi.com/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLOv8 on these small and powerful devices. +This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [Raspberry Pi](https://www.raspberrypi.com/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLO11 on these small and powerful devices.


@@ -41,7 +42,7 @@ Raspberry Pi is a small, affordable, single-board computer. It has become popula ## What is Raspberry Pi OS? -[Raspberry Pi OS](https://www.raspberrypi.com/software) (formerly known as Raspbian) is a Unix-like operating system based on the Debian GNU/Linux distribution for the Raspberry Pi family of compact single-board computers distributed by the Raspberry Pi Foundation. Raspberry Pi OS is highly optimized for the Raspberry Pi with ARM CPUs and uses a modified LXDE desktop environment with the Openbox stacking window manager. Raspberry Pi OS is under active development, with an emphasis on improving the stability and performance of as many Debian packages as possible on Raspberry Pi. +[Raspberry Pi OS](https://www.raspberrypi.com/software/) (formerly known as Raspbian) is a Unix-like operating system based on the Debian GNU/Linux distribution for the Raspberry Pi family of compact single-board computers distributed by the Raspberry Pi Foundation. Raspberry Pi OS is highly optimized for the Raspberry Pi with ARM CPUs and uses a modified LXDE desktop environment with the Openbox stacking window manager. Raspberry Pi OS is under active development, with an emphasis on improving the stability and performance of as many Debian packages as possible on Raspberry Pi. ## Flash Raspberry Pi OS to Raspberry Pi @@ -56,7 +57,7 @@ There are two ways of setting up Ultralytics package on Raspberry Pi to build yo ### Start with Docker -The fastest way to get started with Ultralytics YOLOv8 on Raspberry Pi is to run with pre-built docker image for Raspberry Pi. +The fastest way to get started with Ultralytics YOLO11 on Raspberry Pi is to run with pre-built docker image for Raspberry Pi. Execute the below command to pull the Docker container and run on Raspberry Pi. This is based on [arm64v8/debian](https://hub.docker.com/r/arm64v8/debian) docker image which contains Debian 12 (Bookworm) in a Python3 environment. @@ -94,11 +95,11 @@ Here we will install Ultralytics package on the Raspberry Pi with optional depen ## Use NCNN on Raspberry Pi -Out of all the model export formats supported by Ultralytics, [NCNN](https://docs.ultralytics.com/integrations/ncnn/) delivers the best inference performance when working with Raspberry Pi devices because NCNN is highly optimized for mobile/ embedded platforms (such as ARM architecture). Therefor our recommendation is to use NCNN with Raspberry Pi. +Out of all the model export formats supported by Ultralytics, [NCNN](https://docs.ultralytics.com/integrations/ncnn/) delivers the best inference performance when working with Raspberry Pi devices because NCNN is highly optimized for mobile/ embedded platforms (such as ARM architecture). ## Convert Model to NCNN and Run Inference -The YOLOv8n model in PyTorch format is converted to NCNN to run inference with the exported model. +The YOLO11n model in PyTorch format is converted to NCNN to run inference with the exported model. !!! example @@ -107,14 +108,14 @@ The YOLOv8n model in PyTorch format is converted to NCNN to run inference with t ```python from ultralytics import YOLO - # Load a YOLOv8n PyTorch model - model = YOLO("yolov8n.pt") + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") # Export the model to NCNN format - model.export(format="ncnn") # creates 'yolov8n_ncnn_model' + model.export(format="ncnn") # creates 'yolo11n_ncnn_model' # Load the exported NCNN model - ncnn_model = YOLO("yolov8n_ncnn_model") + ncnn_model = YOLO("yolo11n_ncnn_model") # Run inference results = ncnn_model("https://ultralytics.com/images/bus.jpg") @@ -123,102 +124,67 @@ The YOLOv8n model in PyTorch format is converted to NCNN to run inference with t === "CLI" ```bash - # Export a YOLOv8n PyTorch model to NCNN format - yolo export model=yolov8n.pt format=ncnn # creates 'yolov8n_ncnn_model' + # Export a YOLO11n PyTorch model to NCNN format + yolo export model=yolo11n.pt format=ncnn # creates 'yolo11n_ncnn_model' # Run inference with the exported model - yolo predict model='yolov8n_ncnn_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='yolo11n_ncnn_model' source='https://ultralytics.com/images/bus.jpg' ``` !!! tip For more details about supported export options, visit the [Ultralytics documentation page on deployment options](https://docs.ultralytics.com/guides/model-deployment-options/). -## Raspberry Pi 5 vs Raspberry Pi 4 YOLOv8 Benchmarks +## Raspberry Pi 5 YOLO11 Benchmarks -YOLOv8 benchmarks were run by the Ultralytics team on nine different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on both Raspberry Pi 5 and Raspberry Pi 4 at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640. - -!!! note - - We have only included benchmarks for YOLOv8n and YOLOv8s models because other models sizes are too big to run on the Raspberry Pis and does not offer decent performance. +YOLO11 benchmarks were run by the Ultralytics team on nine different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on a Raspberry Pi 5 at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640. ### Comparison Chart -!!! tip "Performance" - - === "YOLOv8n" - -

- NVIDIA Jetson Ecosystem -
- - === "YOLOv8s" +We have only included benchmarks for YOLO11n and YOLO11s models because other models sizes are too big to run on the Raspberry Pis and does not offer decent performance. -
- NVIDIA Jetson Ecosystem -
+
+ YOLO11 benchmarks on RPi 5 +
Benchmarked with Ultralytics {{ benchmark_version }}
+
### Detailed Comparison Table -The below table represents the benchmark results for two different models (YOLOv8n, YOLOv8s) across nine different formats (PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), running on both Raspberry Pi 4 and Raspberry Pi 5, giving us the status, size, mAP50-95(B) metric, and inference time for each combination. +The below table represents the benchmark results for two different models (YOLO11n, YOLO11s) across nine different formats (PyTorch, TorchScript, ONNX, OpenVINO, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), running on a Raspberry Pi 5, giving us the status, size, mAP50-95(B) metric, and inference time for each combination. !!! tip "Performance" - === "YOLOv8n on RPi5" - - | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | - |---------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 6.2 | 0.6381 | 508.61 | - | TorchScript | โœ… | 12.4 | 0.6092 | 558.38 | - | ONNX | โœ… | 12.2 | 0.6092 | 198.69 | - | OpenVINO | โœ… | 12.3 | 0.6092 | 704.70 | - | TF SavedModel | โœ… | 30.6 | 0.6092 | 367.64 | - | TF GraphDef | โœ… | 12.3 | 0.6092 | 473.22 | - | TF Lite | โœ… | 12.3 | 0.6092 | 380.67 | - | PaddlePaddle | โœ… | 24.4 | 0.6092 | 703.51 | - | NCNN | โœ… | 12.2 | 0.6034 | 94.28 | - - === "YOLOv8s on RPi5" - - | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | - |---------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 21.5 | 0.6967 | 969.49 | - | TorchScript | โœ… | 43.0 | 0.7136 | 1110.04 | - | ONNX | โœ… | 42.8 | 0.7136 | 451.37 | - | OpenVINO | โœ… | 42.9 | 0.7136 | 873.51 | - | TF SavedModel | โœ… | 107.0 | 0.7136 | 658.15 | - | TF GraphDef | โœ… | 42.8 | 0.7136 | 946.01 | - | TF Lite | โœ… | 42.8 | 0.7136 | 1013.27 | - | PaddlePaddle | โœ… | 85.5 | 0.7136 | 1560.23 | - | NCNN | โœ… | 42.7 | 0.7204 | 211.26 | - - === "YOLOv8n on RPi4" + === "YOLO11n" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |---------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 6.2 | 0.6381 | 1068.42 | - | TorchScript | โœ… | 12.4 | 0.6092 | 1248.01 | - | ONNX | โœ… | 12.2 | 0.6092 | 560.04 | - | OpenVINO | โœ… | 12.3 | 0.6092 | 534.93 | - | TF SavedModel | โœ… | 30.6 | 0.6092 | 816.50 | - | TF GraphDef | โœ… | 12.3 | 0.6092 | 1007.57 | - | TF Lite | โœ… | 12.3 | 0.6092 | 950.29 | - | PaddlePaddle | โœ… | 24.4 | 0.6092 | 1507.75 | - | NCNN | โœ… | 12.2 | 0.6092 | 414.73 | - - === "YOLOv8s on RPi4" + | PyTorch | โœ… | 5.4 | 0.6100 | 405.238 | + | TorchScript | โœ… | 10.5 | 0.6082 | 526.628 | + | ONNX | โœ… | 10.2 | 0.6082 | 168.082 | + | OpenVINO | โœ… | 10.4 | 0.6082 | 81.192 | + | TF SavedModel | โœ… | 25.8 | 0.6082 | 377.968 | + | TF GraphDef | โœ… | 10.3 | 0.6082 | 487.244 | + | TF Lite | โœ… | 10.3 | 0.6082 | 317.398 | + | PaddlePaddle | โœ… | 20.4 | 0.6082 | 561.892 | + | MNN | โœ… | 10.1 | 0.6106 | 112.554 | + | NCNN | โœ… | 10.2 | 0.6106 | 88.026 | + + === "YOLO11s" | Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) | |---------------|--------|-------------------|-------------|------------------------| - | PyTorch | โœ… | 21.5 | 0.6967 | 2589.58 | - | TorchScript | โœ… | 43.0 | 0.7136 | 2901.33 | - | ONNX | โœ… | 42.8 | 0.7136 | 1436.33 | - | OpenVINO | โœ… | 42.9 | 0.7136 | 1225.19 | - | TF SavedModel | โœ… | 107.0 | 0.7136 | 1770.95 | - | TF GraphDef | โœ… | 42.8 | 0.7136 | 2146.66 | - | TF Lite | โœ… | 42.8 | 0.7136 | 2945.03 | - | PaddlePaddle | โœ… | 85.5 | 0.7136 | 3962.62 | - | NCNN | โœ… | 42.7 | 0.7136 | 1042.39 | + | PyTorch | โœ… | 18.4 | 0.7526 | 1011.60 | + | TorchScript | โœ… | 36.5 | 0.7416 | 1268.502 | + | ONNX | โœ… | 36.3 | 0.7416 | 324.17 | + | OpenVINO | โœ… | 36.4 | 0.7416 | 179.324 | + | TF SavedModel | โœ… | 91.1 | 0.7416 | 714.382 | + | TF GraphDef | โœ… | 36.4 | 0.7416 | 1019.83 | + | TF Lite | โœ… | 36.4 | 0.7416 | 849.86 | + | PaddlePaddle | โœ… | 72.5 | 0.7416 | 1276.34 | + | MNN | โœ… | 36.2 | 0.7409 | 273.032 | + | NCNN | โœ… | 36.2 | 0.7419 | 194.858 | + + Benchmarked with Ultralytics {{ benchmark_version }} ## Reproduce Our Results @@ -231,25 +197,25 @@ To reproduce the above Ultralytics benchmarks on all [export formats](../modes/e ```python from ultralytics import YOLO - # Load a YOLOv8n PyTorch model - model = YOLO("yolov8n.pt") + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") - # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats - results = model.benchmarks(data="coco8.yaml", imgsz=640) + # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats + results = model.benchmark(data="coco8.yaml", imgsz=640) ``` === "CLI" ```bash - # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats - yolo benchmark model=yolov8n.pt data=coco8.yaml imgsz=640 + # Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats + yolo benchmark model=yolo11n.pt data=coco8.yaml imgsz=640 ``` Note that benchmarking results might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. For the most reliable results use a dataset with a large number of images, i.e. `data='coco8.yaml' (4 val images), or `data='coco.yaml'` (5000 val images). ## Use Raspberry Pi Camera -When using Raspberry Pi for Computer Vision projects, it can be essentially to grab real-time video feeds to perform inference. The onboard MIPI CSI connector on the Raspberry Pi allows you to connect official Raspberry PI camera modules. In this guide, we have used a [Raspberry Pi Camera Module 3](https://www.raspberrypi.com/products/camera-module-3) to grab the video feeds and perform inference using YOLOv8 models. +When using Raspberry Pi for Computer Vision projects, it can be essentially to grab real-time video feeds to perform inference. The onboard MIPI CSI connector on the Raspberry Pi allows you to connect official Raspberry PI camera modules. In this guide, we have used a [Raspberry Pi Camera Module 3](https://www.raspberrypi.com/products/camera-module-3/) to grab the video feeds and perform inference using YOLO11 models. !!! tip @@ -257,7 +223,7 @@ When using Raspberry Pi for Computer Vision projects, it can be essentially to g !!! note - Raspberry Pi 5 uses smaller CSI connectors than the Raspberry Pi 4 (15-pin vs 22-pin), so you will need a [15-pin to 22pin adapter cable](https://www.raspberrypi.com/products/camera-cable) to connect to a Raspberry Pi Camera. + Raspberry Pi 5 uses smaller CSI connectors than the Raspberry Pi 4 (15-pin vs 22-pin), so you will need a [15-pin to 22pin adapter cable](https://www.raspberrypi.com/products/camera-cable/) to connect to a Raspberry Pi Camera. ### Test the Camera @@ -273,13 +239,13 @@ rpicam-hello ### Inference with Camera -There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models. +There are 2 methods of using the Raspberry Pi Camera to inference YOLO11 models. !!! usage === "Method 1" - We can use `picamera2`which comes pre-installed with Raspberry Pi OS to access the camera and inference YOLOv8 models. + We can use `picamera2`which comes pre-installed with Raspberry Pi OS to access the camera and inference YOLO11 models. !!! example @@ -299,14 +265,14 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models. picam2.configure("preview") picam2.start() - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") while True: # Capture frame-by-frame frame = picam2.capture_array() - # Run YOLOv8 inference on the frame + # Run YOLO11 inference on the frame results = model(frame) # Visualize the results on the frame @@ -340,8 +306,8 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models. ```python from ultralytics import YOLO - # Load a YOLOv8n PyTorch model - model = YOLO("yolov8n.pt") + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") # Run inference results = model("tcp://127.0.0.1:8888") @@ -350,7 +316,7 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models. === "CLI" ```bash - yolo predict model=yolov8n.pt source="tcp://127.0.0.1:8888" + yolo predict model=yolo11n.pt source="tcp://127.0.0.1:8888" ``` !!! tip @@ -359,7 +325,7 @@ There are 2 methods of using the Raspberry Pi Camera to inference YOLOv8 models. ## Best Practices when using Raspberry Pi -There are a couple of best practices to follow in order to enable maximum performance on Raspberry Pis running YOLOv8. +There are a couple of best practices to follow in order to enable maximum performance on Raspberry Pis running YOLO11. 1. Use an SSD @@ -371,7 +337,7 @@ There are a couple of best practices to follow in order to enable maximum perfor ## Next Steps -Congratulations on successfully setting up YOLO on your Raspberry Pi! For further learning and support, visit [Ultralytics YOLOv8 Docs](../index.md) and [Kashmir World Foundation](https://www.kashmirworldfoundation.org/). +Congratulations on successfully setting up YOLO on your Raspberry Pi! For further learning and support, visit [Ultralytics YOLO11 Docs](../index.md) and [Kashmir World Foundation](https://www.kashmirworldfoundation.org/). ## Acknowledgements and Citations @@ -381,9 +347,9 @@ For more information about Kashmir World Foundation's activities, you can visit ## FAQ -### How do I set up Ultralytics YOLOv8 on a Raspberry Pi without using Docker? +### How do I set up Ultralytics YOLO11 on a Raspberry Pi without using Docker? -To set up Ultralytics YOLOv8 on a Raspberry Pi without Docker, follow these steps: +To set up Ultralytics YOLO11 on a Raspberry Pi without Docker, follow these steps: 1. Update the package list and install `pip`: ```bash @@ -402,13 +368,13 @@ To set up Ultralytics YOLOv8 on a Raspberry Pi without Docker, follow these step For detailed instructions, refer to the [Start without Docker](#start-without-docker) section. -### Why should I use Ultralytics YOLOv8's NCNN format on Raspberry Pi for AI tasks? +### Why should I use Ultralytics YOLO11's NCNN format on Raspberry Pi for AI tasks? -Ultralytics YOLOv8's NCNN format is highly optimized for mobile and embedded platforms, making it ideal for running AI tasks on Raspberry Pi devices. NCNN maximizes inference performance by leveraging ARM architecture, providing faster and more efficient processing compared to other formats. For more details on supported export options, visit the [Ultralytics documentation page on deployment options](../modes/export.md). +Ultralytics YOLO11's NCNN format is highly optimized for mobile and embedded platforms, making it ideal for running AI tasks on Raspberry Pi devices. NCNN maximizes inference performance by leveraging ARM architecture, providing faster and more efficient processing compared to other formats. For more details on supported export options, visit the [Ultralytics documentation page on deployment options](../modes/export.md). -### How can I convert a YOLOv8 model to NCNN format for use on Raspberry Pi? +### How can I convert a YOLO11 model to NCNN format for use on Raspberry Pi? -You can convert a PyTorch YOLOv8 model to NCNN format using either Python or CLI commands: +You can convert a PyTorch YOLO11 model to NCNN format using either Python or CLI commands: !!! example @@ -417,14 +383,14 @@ You can convert a PyTorch YOLOv8 model to NCNN format using either Python or CLI ```python from ultralytics import YOLO - # Load a YOLOv8n PyTorch model - model = YOLO("yolov8n.pt") + # Load a YOLO11n PyTorch model + model = YOLO("yolo11n.pt") # Export the model to NCNN format - model.export(format="ncnn") # creates 'yolov8n_ncnn_model' + model.export(format="ncnn") # creates 'yolo11n_ncnn_model' # Load the exported NCNN model - ncnn_model = YOLO("yolov8n_ncnn_model") + ncnn_model = YOLO("yolo11n_ncnn_model") # Run inference results = ncnn_model("https://ultralytics.com/images/bus.jpg") @@ -433,16 +399,16 @@ You can convert a PyTorch YOLOv8 model to NCNN format using either Python or CLI === "CLI" ```bash - # Export a YOLOv8n PyTorch model to NCNN format - yolo export model=yolov8n.pt format=ncnn # creates 'yolov8n_ncnn_model' + # Export a YOLO11n PyTorch model to NCNN format + yolo export model=yolo11n.pt format=ncnn # creates 'yolo11n_ncnn_model' # Run inference with the exported model - yolo predict model='yolov8n_ncnn_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='yolo11n_ncnn_model' source='https://ultralytics.com/images/bus.jpg' ``` For more details, see the [Use NCNN on Raspberry Pi](#use-ncnn-on-raspberry-pi) section. -### What are the hardware differences between Raspberry Pi 4 and Raspberry Pi 5 relevant to running YOLOv8? +### What are the hardware differences between Raspberry Pi 4 and Raspberry Pi 5 relevant to running YOLO11? Key differences include: @@ -450,11 +416,11 @@ Key differences include: - **Max CPU Frequency**: Raspberry Pi 4 has a max frequency of 1.8GHz, whereas Raspberry Pi 5 reaches 2.4GHz. - **Memory**: Raspberry Pi 4 offers up to 8GB of LPDDR4-3200 SDRAM, while Raspberry Pi 5 features LPDDR4X-4267 SDRAM, available in 4GB and 8GB variants. -These enhancements contribute to better performance benchmarks for YOLOv8 models on Raspberry Pi 5 compared to Raspberry Pi 4. Refer to the [Raspberry Pi Series Comparison](#raspberry-pi-series-comparison) table for more details. +These enhancements contribute to better performance benchmarks for YOLO11 models on Raspberry Pi 5 compared to Raspberry Pi 4. Refer to the [Raspberry Pi Series Comparison](#raspberry-pi-series-comparison) table for more details. -### How can I set up a Raspberry Pi Camera Module to work with Ultralytics YOLOv8? +### How can I set up a Raspberry Pi Camera Module to work with Ultralytics YOLO11? -There are two methods to set up a Raspberry Pi Camera for YOLOv8 inference: +There are two methods to set up a Raspberry Pi Camera for YOLO11 inference: 1. **Using `picamera2`**: @@ -471,7 +437,7 @@ There are two methods to set up a Raspberry Pi Camera for YOLOv8 inference: picam2.configure("preview") picam2.start() - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") while True: frame = picam2.capture_array() @@ -494,7 +460,7 @@ There are two methods to set up a Raspberry Pi Camera for YOLOv8 inference: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model("tcp://127.0.0.1:8888") ``` diff --git a/docs/en/guides/region-counting.md b/docs/en/guides/region-counting.md index a27c2b4e533..3d2b9fbb7ed 100644 --- a/docs/en/guides/region-counting.md +++ b/docs/en/guides/region-counting.md @@ -1,24 +1,24 @@ --- comments: true -description: Learn how to use Ultralytics YOLOv8 for precise object counting in specified regions, enhancing efficiency across various applications. -keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, efficiency, accuracy, automation, real-time, applications, surveillance, monitoring +description: Learn how to use Ultralytics YOLO11 for precise object counting in specified regions, enhancing efficiency across various applications. +keywords: object counting, regions, YOLO11, computer vision, Ultralytics, efficiency, accuracy, automation, real-time, applications, surveillance, monitoring --- -# Object Counting in Different Regions using Ultralytics YOLOv8 ๐Ÿš€ +# Object Counting in Different Regions using Ultralytics YOLO ๐Ÿš€ ## What is Object Counting in Regions? -[Object counting](../guides/object-counting.md) in regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves precisely determining the number of objects within specified areas using advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This approach is valuable for optimizing processes, enhancing security, and improving efficiency in various applications. +[Object counting](../guides/object-counting.md) in regions with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves precisely determining the number of objects within specified areas using advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv). This approach is valuable for optimizing processes, enhancing security, and improving efficiency in various applications.


-
- Watch: Ultralytics YOLOv8 Object Counting in Multiple & Movable Regions + Watch: Object Counting in Different Regions using Ultralytics YOLO11 | Ultralytics Solutions ๐Ÿš€

## Advantages of Object Counting in Regions? @@ -31,69 +31,79 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici | Retail | Market Streets | | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![People Counting in Different Region using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/people-counting-different-region-ultralytics-yolov8.avif) | ![Crowd Counting in Different Region using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/crowd-counting-different-region-ultralytics-yolov8.avif) | -| People Counting in Different Region using Ultralytics YOLOv8 | Crowd Counting in Different Region using Ultralytics YOLOv8 | +| ![People Counting in Different Region using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/people-counting-different-region-ultralytics-yolov8.avif) | ![Crowd Counting in Different Region using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/crowd-counting-different-region-ultralytics-yolov8.avif) | +| People Counting in Different Region using Ultralytics YOLO11 | Crowd Counting in Different Region using Ultralytics YOLO11 | -## Steps to Run +!!! example "Region Counting Example" -### Step 1: Install Required Libraries + === "Python" -Begin by cloning the Ultralytics repository, installing dependencies, and navigating to the local directory using the provided commands in Step 2. + ```python + import cv2 -```bash -# Clone Ultralytics repo -git clone https://github.com/ultralytics/ultralytics + from ultralytics import solutions -# Navigate to the local directory -cd ultralytics/examples/YOLOv8-Region-Counter -``` + cap = cv2.VideoCapture("Path/to/video/file.mp4") + assert cap.isOpened(), "Error reading video file" + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -### Step 2: Run Region Counting Using Ultralytics YOLOv8 + # Define region points + # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Pass region as list -Execute the following basic commands for inference. + # pass region as dictionary + region_points = { + "region-01": [(50, 50), (250, 50), (250, 250), (50, 250)], + "region-02": [(640, 640), (780, 640), (780, 720), (640, 720)], + } -???+ tip "Region is Movable" + # Video writer + video_writer = cv2.VideoWriter("region_counting.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - During video playback, you can interactively move the region within the video by clicking and dragging using the left mouse button. + # Init RegionCounter + region = solutions.RegionCounter( + show=True, + region=region_points, + model="yolo11n.pt", + ) -```bash -# Save results -python yolov8_region_counter.py --source "path/to/video.mp4" --save-img + # Process video + while cap.isOpened(): + success, im0 = cap.read() + if not success: + print("Video frame is empty or video processing has been successfully completed.") + break + im0 = region.count(im0) + video_writer.write(im0) -# Run model on CPU -python yolov8_region_counter.py --source "path/to/video.mp4" --device cpu + cap.release() + video_writer.release() + cv2.destroyAllWindows() + ``` -# Change model file -python yolov8_region_counter.py --source "path/to/video.mp4" --weights "path/to/model.pt" +!!! tip "Ultralytics Example Code" -# Detect specific classes (e.g., first and third classes) -python yolov8_region_counter.py --source "path/to/video.mp4" --classes 0 2 + The Ultralytics region counting module is available in our [examples section](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py). You can explore this example for code customization and modify it to suit your specific use case. -# View results without saving -python yolov8_region_counter.py --source "path/to/video.mp4" --view-img -``` +### Argument `RegionCounter` -### Optional Arguments +Here's a table with the `RegionCounter` arguments: -| Name | Type | Default | Description | -| -------------------- | ------ | ------------ | --------------------------------------------------------------------------- | -| `--source` | `str` | `None` | Path to video file, for webcam 0 | -| `--line_thickness` | `int` | `2` | [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) thickness | -| `--save-img` | `bool` | `False` | Save the predicted video/image | -| `--weights` | `str` | `yolov8n.pt` | Weights file path | -| `--classes` | `list` | `None` | Detect specific classes i.e. --classes 0 2 | -| `--region-thickness` | `int` | `2` | Region Box thickness | -| `--track-thickness` | `int` | `2` | Tracking line thickness | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ## FAQ -### What is object counting in specified regions using Ultralytics YOLOv8? +### What is object counting in specified regions using Ultralytics YOLO11? -Object counting in specified regions with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) involves detecting and tallying the number of objects within defined areas using advanced computer vision. This precise method enhances efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) across various applications like manufacturing, surveillance, and traffic monitoring. +Object counting in specified regions with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) involves detecting and tallying the number of objects within defined areas using advanced computer vision. This precise method enhances efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) across various applications like manufacturing, surveillance, and traffic monitoring. -### How do I run the object counting script with Ultralytics YOLOv8? +### How do I run the region based object counting script with Ultralytics YOLO11? -Follow these steps to run object counting in Ultralytics YOLOv8: +Follow these steps to run object counting in Ultralytics YOLO11: 1. Clone the Ultralytics repository and navigate to the directory: @@ -107,11 +117,11 @@ Follow these steps to run object counting in Ultralytics YOLOv8: python yolov8_region_counter.py --source "path/to/video.mp4" --save-img ``` -For more options, visit the [Run Region Counting](#steps-to-run) section. +For more options, visit the [Run Region Counting](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/readme.md) section. -### Why should I use Ultralytics YOLOv8 for object counting in regions? +### Why should I use Ultralytics YOLO11 for object counting in regions? -Using Ultralytics YOLOv8 for object counting in regions offers several advantages: +Using Ultralytics YOLO11 for object counting in regions offers several advantages: - **Precision and Accuracy:** Minimizes errors often seen in manual counting. - **Efficiency Improvement:** Provides real-time results and streamlines processes. @@ -119,13 +129,9 @@ Using Ultralytics YOLOv8 for object counting in regions offers several advantage Explore deeper benefits in the [Advantages](#advantages-of-object-counting-in-regions) section. -### Can the defined regions be adjusted during video playback? - -Yes, with Ultralytics YOLOv8, regions can be interactively moved during video playback. Simply click and drag with the left mouse button to reposition the region. This feature enhances flexibility for dynamic environments. Learn more in the tip section for [movable regions](#step-2-run-region-counting-using-ultralytics-yolov8). - ### What are some real-world applications of object counting in regions? -Object counting with Ultralytics YOLOv8 can be applied to numerous real-world scenarios: +Object counting with Ultralytics YOLO11 can be applied to numerous real-world scenarios: - **Retail:** Counting people for foot traffic analysis. - **Market Streets:** Crowd density management. diff --git a/docs/en/guides/sahi-tiled-inference.md b/docs/en/guides/sahi-tiled-inference.md index d44bce0253f..1cc3188a7d0 100644 --- a/docs/en/guides/sahi-tiled-inference.md +++ b/docs/en/guides/sahi-tiled-inference.md @@ -1,12 +1,14 @@ --- comments: true -description: Learn how to implement YOLOv8 with SAHI for sliced inference. Optimize memory usage and enhance detection accuracy for large-scale applications. -keywords: YOLOv8, SAHI, Sliced Inference, Object Detection, Ultralytics, High-resolution Images, Computational Efficiency, Integration Guide +description: Learn how to implement YOLO11 with SAHI for sliced inference. Optimize memory usage and enhance detection accuracy for large-scale applications. +keywords: YOLO11, SAHI, Sliced Inference, Object Detection, Ultralytics, High-resolution Images, Computational Efficiency, Integration Guide --- -# Ultralytics Docs: Using YOLOv8 with SAHI for Sliced Inference +# Ultralytics Docs: Using YOLO11 with SAHI for Sliced Inference -Welcome to the Ultralytics documentation on how to use YOLOv8 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLOv8. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLOv8 for enhanced [object detection](https://www.ultralytics.com/glossary/object-detection) performance. +Open SAHI for Sliced Inference In Colab + +Welcome to the Ultralytics documentation on how to use YOLO11 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLO11. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLO11 for enhanced [object detection](https://www.ultralytics.com/glossary/object-detection) performance.

SAHI Sliced Inference Overview @@ -24,7 +26,7 @@ SAHI (Slicing Aided Hyper Inference) is an innovative library designed to optimi allowfullscreen>
- Watch: Inference with SAHI (Slicing Aided Hyper Inference) using Ultralytics YOLOv8 + Watch: Inference with SAHI (Slicing Aided Hyper Inference) using Ultralytics YOLO11

### Key Features of SAHI @@ -47,12 +49,12 @@ Sliced Inference refers to the practice of subdividing a large or high-resolutio - - + + - - + +
YOLOv8 without SAHIYOLOv8 with SAHIYOLO11 without SAHIYOLO11 with SAHI
YOLOv8 without SAHIYOLOv8 with SAHIYOLO11 without SAHIYOLO11 with SAHI
@@ -68,7 +70,7 @@ pip install -U ultralytics sahi ### Import Modules and Download Resources -Here's how to import the necessary modules and download a YOLOv8 model and some test images: +Here's how to import the necessary modules and download a YOLO11 model and some test images: ```python from pathlib import Path @@ -78,11 +80,11 @@ from sahi import AutoDetectionModel from sahi.predict import get_prediction, get_sliced_prediction, predict from sahi.utils.cv import read_image from sahi.utils.file import download_from_url -from sahi.utils.yolov8 import download_yolov8s_model +from sahi.utils.ultralytics import download_yolo11n_model -# Download YOLOv8 model -yolov8_model_path = "models/yolov8s.pt" -download_yolov8s_model(yolov8_model_path) +# Download YOLO11 model +model_path = "models/yolo11n.pt" +download_yolo11n_model(model_path) # Download test images download_from_url( @@ -95,16 +97,16 @@ download_from_url( ) ``` -## Standard Inference with YOLOv8 +## Standard Inference with YOLO11 ### Instantiate the Model -You can instantiate a YOLOv8 model for object detection like this: +You can instantiate a YOLO11 model for object detection like this: ```python detection_model = AutoDetectionModel.from_pretrained( - model_type="yolov8", - model_path=yolov8_model_path, + model_type="ultralytics", + model_path=model_path, confidence_threshold=0.3, device="cpu", # or 'cuda:0' ) @@ -119,7 +121,7 @@ Perform standard inference using an image path or a numpy image. result = get_prediction("demo_data/small-vehicles1.jpeg", detection_model) # With a numpy image -result = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model) +result_with_np_image = get_prediction(read_image("demo_data/small-vehicles1.jpeg"), detection_model) ``` ### Visualize Results @@ -131,7 +133,7 @@ result.export_visuals(export_dir="demo_data/") Image("demo_data/prediction_visual.png") ``` -## Sliced Inference with YOLOv8 +## Sliced Inference with YOLO11 Perform sliced inference by specifying the slice dimensions and overlap ratios: @@ -167,8 +169,8 @@ For batch prediction on a directory of images: ```python predict( - model_type="yolov8", - model_path="path/to/yolov8n.pt", + model_type="ultralytics", + model_path="path/to/yolo11n.pt", model_device="cpu", # or 'cuda:0' model_confidence_threshold=0.4, source="path/to/dir", @@ -179,7 +181,7 @@ predict( ) ``` -That's it! Now you're equipped to use YOLOv8 with SAHI for both standard and sliced inference. +That's it! Now you're equipped to use YOLO11 with SAHI for both standard and sliced inference. ## Citations and Acknowledgments @@ -204,23 +206,23 @@ We extend our thanks to the SAHI research group for creating and maintaining thi ## FAQ -### How can I integrate YOLOv8 with SAHI for sliced inference in object detection? +### How can I integrate YOLO11 with SAHI for sliced inference in object detection? -Integrating Ultralytics YOLOv8 with SAHI (Slicing Aided Hyper Inference) for sliced inference optimizes your object detection tasks on high-resolution images by partitioning them into manageable slices. This approach improves memory usage and ensures high detection accuracy. To get started, you need to install the ultralytics and sahi libraries: +Integrating Ultralytics YOLO11 with SAHI (Slicing Aided Hyper Inference) for sliced inference optimizes your object detection tasks on high-resolution images by partitioning them into manageable slices. This approach improves memory usage and ensures high detection accuracy. To get started, you need to install the ultralytics and sahi libraries: ```bash pip install -U ultralytics sahi ``` -Then, download a YOLOv8 model and test images: +Then, download a YOLO11 model and test images: ```python from sahi.utils.file import download_from_url -from sahi.utils.yolov8 import download_yolov8s_model +from sahi.utils.ultralytics import download_yolo11n_model -# Download YOLOv8 model -yolov8_model_path = "models/yolov8s.pt" -download_yolov8s_model(yolov8_model_path) +# Download YOLO11 model +model_path = "models/yolo11n.pt" +download_yolo11n_model(model_path) # Download test images download_from_url( @@ -229,11 +231,11 @@ download_from_url( ) ``` -For more detailed instructions, refer to our [Sliced Inference guide](#sliced-inference-with-yolov8). +For more detailed instructions, refer to our [Sliced Inference guide](#sliced-inference-with-yolo11). -### Why should I use SAHI with YOLOv8 for object detection on large images? +### Why should I use SAHI with YOLO11 for object detection on large images? -Using SAHI with Ultralytics YOLOv8 for object detection on large images offers several benefits: +Using SAHI with Ultralytics YOLO11 for object detection on large images offers several benefits: - **Reduced Computational Burden**: Smaller slices are faster to process and consume less memory, making it feasible to run high-quality detections on hardware with limited resources. - **Maintained Detection Accuracy**: SAHI uses intelligent algorithms to merge overlapping boxes, preserving the detection quality. @@ -241,9 +243,9 @@ Using SAHI with Ultralytics YOLOv8 for object detection on large images offers s Learn more about the [benefits of sliced inference](#benefits-of-sliced-inference) in our documentation. -### Can I visualize prediction results when using YOLOv8 with SAHI? +### Can I visualize prediction results when using YOLO11 with SAHI? -Yes, you can visualize prediction results when using YOLOv8 with SAHI. Here's how you can export and visualize the results: +Yes, you can visualize prediction results when using YOLO11 with SAHI. Here's how you can export and visualize the results: ```python from IPython.display import Image @@ -252,11 +254,11 @@ result.export_visuals(export_dir="demo_data/") Image("demo_data/prediction_visual.png") ``` -This command will save the visualized predictions to the specified directory and you can then load the image to view it in your notebook or application. For a detailed guide, check out the [Standard Inference section](#visualize-results). +This command will save the visualized predictions to the specified directory, and you can then load the image to view it in your notebook or application. For a detailed guide, check out the [Standard Inference section](#visualize-results). -### What features does SAHI offer for improving YOLOv8 object detection? +### What features does SAHI offer for improving YOLO11 object detection? -SAHI (Slicing Aided Hyper Inference) offers several features that complement Ultralytics YOLOv8 for object detection: +SAHI (Slicing Aided Hyper Inference) offers several features that complement Ultralytics YOLO11 for object detection: - **Seamless Integration**: SAHI easily integrates with YOLO models, requiring minimal code adjustments. - **Resource Efficiency**: It partitions large images into smaller slices, which optimizes memory usage and speed. @@ -264,9 +266,9 @@ SAHI (Slicing Aided Hyper Inference) offers several features that complement Ult For a deeper understanding, read about SAHI's [key features](#key-features-of-sahi). -### How do I handle large-scale inference projects using YOLOv8 and SAHI? +### How do I handle large-scale inference projects using YOLO11 and SAHI? -To handle large-scale inference projects using YOLOv8 and SAHI, follow these best practices: +To handle large-scale inference projects using YOLO11 and SAHI, follow these best practices: 1. **Install Required Libraries**: Ensure that you have the latest versions of ultralytics and sahi. 2. **Configure Sliced Inference**: Determine the optimal slice dimensions and overlap ratios for your specific project. @@ -278,8 +280,8 @@ Example for batch prediction: from sahi.predict import predict predict( - model_type="yolov8", - model_path="path/to/yolov8n.pt", + model_type="ultralytics", + model_path="path/to/yolo11n.pt", model_device="cpu", # or 'cuda:0' model_confidence_threshold=0.4, source="path/to/dir", diff --git a/docs/en/guides/security-alarm-system.md b/docs/en/guides/security-alarm-system.md index 8ff085bf751..9eb4b07221e 100644 --- a/docs/en/guides/security-alarm-system.md +++ b/docs/en/guides/security-alarm-system.md @@ -1,17 +1,17 @@ --- comments: true -description: Enhance your security with real-time object detection using Ultralytics YOLOv8. Reduce false positives and integrate seamlessly with existing systems. -keywords: YOLOv8, Security Alarm System, real-time object detection, Ultralytics, computer vision, integration, false positives +description: Enhance your security with real-time object detection using Ultralytics YOLO11. Reduce false positives and integrate seamlessly with existing systems. +keywords: YOLO11, Security Alarm System, real-time object detection, Ultralytics, computer vision, integration, false positives --- -# Security Alarm System Project Using Ultralytics YOLOv8 +# Security Alarm System Project Using Ultralytics YOLO11 Security Alarm System -The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLOv8, developed by Ultralytics, provides real-time object detection, allowing the system to identify and respond to potential security threats promptly. This project offers several advantages: +The Security Alarm System Project utilizing Ultralytics YOLO11 integrates advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities to enhance security measures. YOLO11, developed by Ultralytics, provides real-time [object detection](https://www.ultralytics.com/glossary/object-detection), allowing the system to identify and respond to potential security threats promptly. This project offers several advantages: -- **Real-time Detection:** YOLOv8's efficiency enables the Security Alarm System to detect and respond to security incidents in real-time, minimizing response time. -- **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** YOLOv8 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system. +- **Real-time Detection:** YOLO11's efficiency enables the Security Alarm System to detect and respond to security incidents in real-time, minimizing response time. +- **[Accuracy](https://www.ultralytics.com/glossary/accuracy):** YOLO11 is known for its accuracy in object detection, reducing false positives and enhancing the reliability of the security alarm system. - **Integration Capabilities:** The project can be seamlessly integrated with existing security infrastructure, providing an upgraded layer of intelligent surveillance.

@@ -22,155 +22,59 @@ The Security Alarm System Project utilizing Ultralytics YOLOv8 integrates advanc allowfullscreen>
- Watch: Security Alarm System Project with Ultralytics YOLOv8 [Object Detection](https://www.ultralytics.com/glossary/object-detection) + Watch: Security Alarm System Project with Ultralytics YOLO11 Object Detection

### Code -#### Import Libraries +???+ note -```python -import smtplib -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from time import time + App Password Generation is necessary -import cv2 -import numpy as np -import torch +- Navigate to [App Password Generator](https://myaccount.google.com/apppasswords), designate an app name such as "security project," and obtain a 16-digit password. Copy this password and paste it into the designated `password` field in the code below. -from ultralytics import YOLO -from ultralytics.utils.plotting import Annotator, colors -``` +!!! example "Security Alarm System using YOLO11 Example" -#### Set up the parameters of the message + === "Python" -???+ note + ```python + import cv2 - App Password Generation is necessary + from ultralytics import solutions + + cap = cv2.VideoCapture("Path/to/video/file.mp4") + assert cap.isOpened(), "Error reading video file" + + # Video writer + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + video_writer = cv2.VideoWriter("security_alarm_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + + from_email = "abc@gmail.com" # The sender email address + password = "---- ---- ---- ----" # 16-digits password generated via: https://myaccount.google.com/apppasswords + to_email = "xyz@gmail.com" # The receiver email address + + # Init SecurityAlarm + security = solutions.SecurityAlarm( + show=True, # Display the output + model="yolo11n.pt", # i.e. YOLO11s.pt + records=1, # Total detections count to send an email about security + ) -- Navigate to [App Password Generator](https://myaccount.google.com/apppasswords), designate an app name such as "security project," and obtain a 16-digit password. Copy this password and paste it into the designated password field as instructed. - -```python -password = "" -from_email = "" # must match the email used to generate the password -to_email = "" # receiver email -``` - -#### Server creation and authentication - -```python -server = smtplib.SMTP("smtp.gmail.com: 587") -server.starttls() -server.login(from_email, password) -``` - -#### Email Send Function - -```python -def send_email(to_email, from_email, object_detected=1): - """Sends an email notification indicating the number of objects detected; defaults to 1 object.""" - message = MIMEMultipart() - message["From"] = from_email - message["To"] = to_email - message["Subject"] = "Security Alert" - # Add in the message body - message_body = f"ALERT - {object_detected} objects has been detected!!" - - message.attach(MIMEText(message_body, "plain")) - server.sendmail(from_email, to_email, message.as_string()) -``` - -#### Object Detection and Alert Sender - -```python -class ObjectDetection: - def __init__(self, capture_index): - """Initializes an ObjectDetection instance with a given camera index.""" - self.capture_index = capture_index - self.email_sent = False - - # model information - self.model = YOLO("yolov8n.pt") - - # visual information - self.annotator = None - self.start_time = 0 - self.end_time = 0 - - # device information - self.device = "cuda" if torch.cuda.is_available() else "cpu" - - def predict(self, im0): - """Run prediction using a YOLO model for the input image `im0`.""" - results = self.model(im0) - return results - - def display_fps(self, im0): - """Displays the FPS on an image `im0` by calculating and overlaying as white text on a black rectangle.""" - self.end_time = time() - fps = 1 / np.round(self.end_time - self.start_time, 2) - text = f"FPS: {int(fps)}" - text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2)[0] - gap = 10 - cv2.rectangle( - im0, - (20 - gap, 70 - text_size[1] - gap), - (20 + text_size[0] + gap, 70 + gap), - (255, 255, 255), - -1, - ) - cv2.putText(im0, text, (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2) - - def plot_bboxes(self, results, im0): - """Plots bounding boxes on an image given detection results; returns annotated image and class IDs.""" - class_ids = [] - self.annotator = Annotator(im0, 3, results[0].names) - boxes = results[0].boxes.xyxy.cpu() - clss = results[0].boxes.cls.cpu().tolist() - names = results[0].names - for box, cls in zip(boxes, clss): - class_ids.append(cls) - self.annotator.box_label(box, label=names[int(cls)], color=colors(int(cls), True)) - return im0, class_ids - - def __call__(self): - """Executes object detection on video frames from a specified camera index, plotting bounding boxes and returning modified frames.""" - cap = cv2.VideoCapture(self.capture_index) - assert cap.isOpened() - cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) - cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) - frame_count = 0 - while True: - self.start_time = time() - ret, im0 = cap.read() - assert ret - results = self.predict(im0) - im0, class_ids = self.plot_bboxes(results, im0) - - if len(class_ids) > 0: # Only send email If not sent before - if not self.email_sent: - send_email(to_email, from_email, len(class_ids)) - self.email_sent = True - else: - self.email_sent = False - - self.display_fps(im0) - cv2.imshow("YOLOv8 Detection", im0) - frame_count += 1 - if cv2.waitKey(5) & 0xFF == 27: - break - cap.release() - cv2.destroyAllWindows() - server.quit() -``` - -#### Call the Object Detection class and Run the Inference - -```python -detector = ObjectDetection(capture_index=0) -detector() -``` + security.authenticate(from_email, password, to_email) # Authenticate the email server + + # Process video + while cap.isOpened(): + success, im0 = cap.read() + if not success: + print("Video frame is empty or video processing has been successfully completed.") + break + im0 = security.monitor(im0) + video_writer.write(im0) + + cap.release() + video_writer.release() + cv2.destroyAllWindows() + ``` That's it! When you execute the code, you'll receive a single notification on your email if any object is detected. The notification is sent immediately, not repeatedly. However, feel free to customize the code to suit your project requirements. @@ -178,24 +82,39 @@ That's it! When you execute the code, you'll receive a single notification on yo Email Received Sample +### Arguments `SecurityAlarm` + +Here's a table with the `SecurityAlarm` arguments: + +| Name | Type | Default | Description | +| ------------ | ------ | ------- | ------------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | +| `records` | `int` | `5` | Total detections count to send an email about security. | + +### Arguments `model.track` + +{% include "macros/track-args.md" %} + ## FAQ -### How does Ultralytics YOLOv8 improve the accuracy of a security alarm system? +### How does Ultralytics YOLO11 improve the accuracy of a security alarm system? -Ultralytics YOLOv8 enhances security alarm systems by delivering high-accuracy, real-time object detection. Its advanced algorithms significantly reduce false positives, ensuring that the system only responds to genuine threats. This increased reliability can be seamlessly integrated with existing security infrastructure, upgrading the overall surveillance quality. +Ultralytics YOLO11 enhances security alarm systems by delivering high-accuracy, real-time object detection. Its advanced algorithms significantly reduce false positives, ensuring that the system only responds to genuine threats. This increased reliability can be seamlessly integrated with existing security infrastructure, upgrading the overall surveillance quality. -### Can I integrate Ultralytics YOLOv8 with my existing security infrastructure? +### Can I integrate Ultralytics YOLO11 with my existing security infrastructure? -Yes, Ultralytics YOLOv8 can be seamlessly integrated with your existing security infrastructure. The system supports various modes and provides flexibility for customization, allowing you to enhance your existing setup with advanced object detection capabilities. For detailed instructions on integrating YOLOv8 in your projects, visit the [integration section](https://docs.ultralytics.com/integrations/). +Yes, Ultralytics YOLO11 can be seamlessly integrated with your existing security infrastructure. The system supports various modes and provides flexibility for customization, allowing you to enhance your existing setup with advanced object detection capabilities. For detailed instructions on integrating YOLO11 in your projects, visit the [integration section](https://docs.ultralytics.com/integrations/). -### What are the storage requirements for running Ultralytics YOLOv8? +### What are the storage requirements for running Ultralytics YOLO11? -Running Ultralytics YOLOv8 on a standard setup typically requires around 5GB of free disk space. This includes space for storing the YOLOv8 model and any additional dependencies. For cloud-based solutions, Ultralytics HUB offers efficient project management and dataset handling, which can optimize storage needs. Learn more about the [Pro Plan](../hub/pro.md) for enhanced features including extended storage. +Running Ultralytics YOLO11 on a standard setup typically requires around 5GB of free disk space. This includes space for storing the YOLO11 model and any additional dependencies. For cloud-based solutions, Ultralytics HUB offers efficient project management and dataset handling, which can optimize storage needs. Learn more about the [Pro Plan](../hub/pro.md) for enhanced features including extended storage. -### What makes Ultralytics YOLOv8 different from other object detection models like Faster R-CNN or SSD? +### What makes Ultralytics YOLO11 different from other object detection models like Faster R-CNN or SSD? -Ultralytics YOLOv8 provides an edge over models like Faster R-CNN or SSD with its real-time detection capabilities and higher accuracy. Its unique architecture allows it to process images much faster without compromising on [precision](https://www.ultralytics.com/glossary/precision), making it ideal for time-sensitive applications like security alarm systems. For a comprehensive comparison of object detection models, you can explore our [guide](https://docs.ultralytics.com/models/). +Ultralytics YOLO11 provides an edge over models like Faster R-CNN or SSD with its real-time detection capabilities and higher accuracy. Its unique architecture allows it to process images much faster without compromising on [precision](https://www.ultralytics.com/glossary/precision), making it ideal for time-sensitive applications like security alarm systems. For a comprehensive comparison of object detection models, you can explore our [guide](https://docs.ultralytics.com/models/). -### How can I reduce the frequency of false positives in my security system using Ultralytics YOLOv8? +### How can I reduce the frequency of false positives in my security system using Ultralytics YOLO11? -To reduce false positives, ensure your Ultralytics YOLOv8 model is adequately trained with a diverse and well-annotated dataset. Fine-tuning hyperparameters and regularly updating the model with new data can significantly improve detection accuracy. Detailed [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) techniques can be found in our [hyperparameter tuning guide](../guides/hyperparameter-tuning.md). +To reduce false positives, ensure your Ultralytics YOLO11 model is adequately trained with a diverse and well-annotated dataset. Fine-tuning hyperparameters and regularly updating the model with new data can significantly improve detection accuracy. Detailed [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) techniques can be found in our [hyperparameter tuning guide](../guides/hyperparameter-tuning.md). diff --git a/docs/en/guides/speed-estimation.md b/docs/en/guides/speed-estimation.md index 6f3726c9219..16a0f710b9d 100644 --- a/docs/en/guides/speed-estimation.md +++ b/docs/en/guides/speed-estimation.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to estimate object speed using Ultralytics YOLOv8 for applications in traffic control, autonomous navigation, and surveillance. -keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision, traffic control, autonomous navigation, surveillance, security +description: Learn how to estimate object speed using Ultralytics YOLO11 for applications in traffic control, autonomous navigation, and surveillance. +keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision, traffic control, autonomous navigation, surveillance, security --- -# Speed Estimation using Ultralytics YOLOv8 ๐Ÿš€ +# Speed Estimation using Ultralytics YOLO11 ๐Ÿš€ ## What is Speed Estimation? -[Speed estimation](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) is the process of calculating the rate of movement of an object within a given context, often employed in [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. Using [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) you can now calculate the speed of object using [object tracking](../modes/track.md) alongside distance and time data, crucial for tasks like traffic and surveillance. The accuracy of speed estimation directly influences the efficiency and reliability of various applications, making it a key component in the advancement of intelligent systems and real-time decision-making processes. +[Speed estimation](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) is the process of calculating the rate of movement of an object within a given context, often employed in [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. Using [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) you can now calculate the speed of object using [object tracking](../modes/track.md) alongside distance and time data, crucial for tasks like traffic and surveillance. The accuracy of speed estimation directly influences the efficiency and reliability of various applications, making it a key component in the advancement of intelligent systems and real-time decision-making processes.


@@ -18,12 +18,12 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision allowfullscreen>
- Watch: Speed Estimation using Ultralytics YOLOv8 + Watch: Speed Estimation using Ultralytics YOLO11

!!! tip "Check Out Our Blog" - For deeper insights into speed estimation, check out our blog post: [Ultralytics YOLOv8 for Speed Estimation in Computer Vision Projects](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) + For deeper insights into speed estimation, check out our blog post: [Ultralytics YOLO11 for Speed Estimation in Computer Vision Projects](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects) ## Advantages of Speed Estimation? @@ -35,46 +35,56 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision | Transportation | Transportation | | :------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![Speed Estimation on Road using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/speed-estimation-on-road-using-ultralytics-yolov8.avif) | ![Speed Estimation on Bridge using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/speed-estimation-on-bridge-using-ultralytics-yolov8.avif) | -| Speed Estimation on Road using Ultralytics YOLOv8 | Speed Estimation on Bridge using Ultralytics YOLOv8 | +| ![Speed Estimation on Road using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/speed-estimation-on-road-using-ultralytics-yolov8.avif) | ![Speed Estimation on Bridge using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/speed-estimation-on-bridge-using-ultralytics-yolov8.avif) | +| Speed Estimation on Road using Ultralytics YOLO11 | Speed Estimation on Bridge using Ultralytics YOLO11 | -!!! example "Speed Estimation using YOLOv8 Example" +!!! example "Speed Estimation using YOLO11 Example" - === "Speed Estimation" + === "CLI" + + ```bash + # Run a speed example + yolo solutions speed show=True + + # Pass a source video + yolo solutions speed source="path/to/video/file.mp4" + + # Pass region coordinates + yolo solutions speed region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] + ``` + + === "Python" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolov8n.pt") - names = model.model.names - - cap = cv2.VideoCapture("path/to/video/file.mp4") + cap = cv2.VideoCapture("Path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) # Video writer - video_writer = cv2.VideoWriter("speed_estimation.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + video_writer = cv2.VideoWriter("speed_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - line_pts = [(0, 360), (1280, 360)] + # Define speed region points + speed_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)] - # Init speed-estimation obj - speed_obj = solutions.SpeedEstimator( - reg_pts=line_pts, - names=names, - view_img=True, + speed = solutions.SpeedEstimator( + show=True, # Display the output + model="yolo11n.pt", # Path to the YOLO11 model file. + region=speed_region, # Pass region points + # classes=[0, 2], # If you want to estimate speed of specific classes. + # line_width=2, # Adjust the line width for bounding boxes and text display ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - - tracks = model.track(im0, persist=True) - - im0 = speed_obj.estimate_speed(im0, tracks) + out = speed.estimate_speed(im0) video_writer.write(im0) cap.release() @@ -88,13 +98,12 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision ### Arguments `SpeedEstimator` -| Name | Type | Default | Description | -| ------------------ | ------ | -------------------------- | ---------------------------------------------------- | -| `names` | `dict` | `None` | Dictionary of class names. | -| `reg_pts` | `list` | `[(20, 400), (1260, 400)]` | List of region points for speed estimation. | -| `view_img` | `bool` | `False` | Whether to display the image with annotations. | -| `line_thickness` | `int` | `2` | Thickness of the lines for drawing boxes and tracks. | -| `spdl_dist_thresh` | `int` | `10` | Distance threshold for speed calculation. | +| Name | Type | Default | Description | +| ------------ | ------ | -------------------------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | ### Arguments `model.track` @@ -102,19 +111,16 @@ keywords: Ultralytics YOLOv8, speed estimation, object tracking, computer vision ## FAQ -### How do I estimate object speed using Ultralytics YOLOv8? +### How do I estimate object speed using Ultralytics YOLO11? -Estimating object speed with Ultralytics YOLOv8 involves combining [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking techniques. First, you need to detect objects in each frame using the YOLOv8 model. Then, track these objects across frames to calculate their movement over time. Finally, use the distance traveled by the object between frames and the frame rate to estimate its speed. +Estimating object speed with Ultralytics YOLO11 involves combining [object detection](https://www.ultralytics.com/glossary/object-detection) and tracking techniques. First, you need to detect objects in each frame using the YOLO11 model. Then, track these objects across frames to calculate their movement over time. Finally, use the distance traveled by the object between frames and the frame rate to estimate its speed. **Example**: ```python import cv2 -from ultralytics import YOLO, solutions - -model = YOLO("yolov8n.pt") -names = model.model.names +from ultralytics import solutions cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -122,17 +128,16 @@ video_writer = cv2.VideoWriter("speed_estimation.avi", cv2.VideoWriter_fourcc(*" # Initialize SpeedEstimator speed_obj = solutions.SpeedEstimator( - reg_pts=[(0, 360), (1280, 360)], - names=names, - view_img=True, + region=[(0, 360), (1280, 360)], + model="yolo11n.pt", + show=True, ) while cap.isOpened(): success, im0 = cap.read() if not success: break - tracks = model.track(im0, persist=True, show=False) - im0 = speed_obj.estimate_speed(im0, tracks) + im0 = speed_obj.estimate_speed(im0) video_writer.write(im0) cap.release() @@ -142,43 +147,43 @@ cv2.destroyAllWindows() For more details, refer to our [official blog post](https://www.ultralytics.com/blog/ultralytics-yolov8-for-speed-estimation-in-computer-vision-projects). -### What are the benefits of using Ultralytics YOLOv8 for speed estimation in traffic management? +### What are the benefits of using Ultralytics YOLO11 for speed estimation in traffic management? -Using Ultralytics YOLOv8 for speed estimation offers significant advantages in traffic management: +Using Ultralytics YOLO11 for speed estimation offers significant advantages in traffic management: - **Enhanced Safety**: Accurately estimate vehicle speeds to detect over-speeding and improve road safety. -- **Real-Time Monitoring**: Benefit from YOLOv8's real-time object detection capability to monitor traffic flow and congestion effectively. +- **Real-Time Monitoring**: Benefit from YOLO11's real-time object detection capability to monitor traffic flow and congestion effectively. - **Scalability**: Deploy the model on various hardware setups, from edge devices to servers, ensuring flexible and scalable solutions for large-scale implementations. For more applications, see [advantages of speed estimation](#advantages-of-speed-estimation). -### Can YOLOv8 be integrated with other AI frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or [PyTorch](https://www.ultralytics.com/glossary/pytorch)? +### Can YOLO11 be integrated with other AI frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) or [PyTorch](https://www.ultralytics.com/glossary/pytorch)? -Yes, YOLOv8 can be integrated with other AI frameworks like TensorFlow and PyTorch. Ultralytics provides support for exporting YOLOv8 models to various formats like ONNX, TensorRT, and CoreML, ensuring smooth interoperability with other ML frameworks. +Yes, YOLO11 can be integrated with other AI frameworks like TensorFlow and PyTorch. Ultralytics provides support for exporting YOLO11 models to various formats like ONNX, TensorRT, and CoreML, ensuring smooth interoperability with other ML frameworks. -To export a YOLOv8 model to ONNX format: +To export a YOLO11 model to ONNX format: ```bash -yolo export --weights yolov8n.pt --include onnx +yolo export --weights yolo11n.pt --include onnx ``` Learn more about exporting models in our [guide on export](../modes/export.md). -### How accurate is the speed estimation using Ultralytics YOLOv8? +### How accurate is the speed estimation using Ultralytics YOLO11? -The [accuracy](https://www.ultralytics.com/glossary/accuracy) of speed estimation using Ultralytics YOLOv8 depends on several factors, including the quality of the object tracking, the resolution and frame rate of the video, and environmental variables. While the speed estimator provides reliable estimates, it may not be 100% accurate due to variances in frame processing speed and object occlusion. +The [accuracy](https://www.ultralytics.com/glossary/accuracy) of speed estimation using Ultralytics YOLO11 depends on several factors, including the quality of the object tracking, the resolution and frame rate of the video, and environmental variables. While the speed estimator provides reliable estimates, it may not be 100% accurate due to variances in frame processing speed and object occlusion. **Note**: Always consider margin of error and validate the estimates with ground truth data when possible. For further accuracy improvement tips, check the [Arguments `SpeedEstimator` section](#arguments-speedestimator). -### Why choose Ultralytics YOLOv8 over other object detection models like TensorFlow Object Detection API? +### Why choose Ultralytics YOLO11 over other object detection models like TensorFlow Object Detection API? -Ultralytics YOLOv8 offers several advantages over other object detection models, such as the TensorFlow Object Detection API: +Ultralytics YOLO11 offers several advantages over other object detection models, such as the TensorFlow Object Detection API: -- **Real-Time Performance**: YOLOv8 is optimized for real-time detection, providing high speed and accuracy. -- **Ease of Use**: Designed with a user-friendly interface, YOLOv8 simplifies model training and deployment. +- **Real-Time Performance**: YOLO11 is optimized for real-time detection, providing high speed and accuracy. +- **Ease of Use**: Designed with a user-friendly interface, YOLO11 simplifies model training and deployment. - **Versatility**: Supports multiple tasks, including object detection, segmentation, and pose estimation. -- **Community and Support**: YOLOv8 is backed by an active community and extensive documentation, ensuring developers have the resources they need. +- **Community and Support**: YOLO11 is backed by an active community and extensive documentation, ensuring developers have the resources they need. -For more information on the benefits of YOLOv8, explore our detailed [model page](../models/yolov8.md). +For more information on the benefits of YOLO11, explore our detailed [model page](../models/yolov8.md). diff --git a/docs/en/guides/steps-of-a-cv-project.md b/docs/en/guides/steps-of-a-cv-project.md index 7f50440a37c..72676d72a54 100644 --- a/docs/en/guides/steps-of-a-cv-project.md +++ b/docs/en/guides/steps-of-a-cv-project.md @@ -18,15 +18,11 @@ Computer vision is a subfield of [artificial intelligence](https://www.ultralyti allowfullscreen>
- Watch: How to Do [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) Projects | A Step-by-Step Guide + Watch: How to Do Computer Vision Projects | A Step-by-Step Guide

Computer vision techniques like [object detection](../tasks/detect.md), [image classification](../tasks/classify.md), and [instance segmentation](../tasks/segment.md) can be applied across various industries, from [autonomous driving](https://www.ultralytics.com/solutions/ai-in-self-driving) to [medical imaging](https://www.ultralytics.com/solutions/ai-in-healthcare) to gain valuable insights. -

- Overview of computer vision techniques -

- Working on your own computer vision projects is a great way to understand and learn more about computer vision. However, a computer vision project can consist of many steps, and it might seem confusing at first. By the end of this guide, you'll be familiar with the steps involved in a computer vision project. We'll walk through everything from the beginning to the end of a project, explaining why each part is important. Let's get started and make your computer vision project a success! ## An Overview of a Computer Vision Project @@ -147,7 +143,7 @@ It's important to keep in mind that proper dataset management is vital for effic It's important to assess your model's performance using various metrics and refine it to improve [accuracy](https://www.ultralytics.com/glossary/accuracy). [Evaluating](../modes/val.md) helps identify areas where the model excels and where it may need improvement. Fine-tuning ensures the model is optimized for the best possible performance. -- **[Performance Metrics](./yolo-performance-metrics.md):** Use metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), recall, and F1-score to evaluate your model's performance. These metrics provide insights into how well your model is making predictions. +- **[Performance Metrics](./yolo-performance-metrics.md):** Use metrics like accuracy, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and F1-score to evaluate your model's performance. These metrics provide insights into how well your model is making predictions. - **[Hyperparameter Tuning](./hyperparameter-tuning.md):** Adjust hyperparameters to optimize model performance. Techniques like grid search or random search can help find the best hyperparameter values. - Fine-Tuning: Make small adjustments to the model architecture or training process to enhance performance. This might involve tweaking [learning rates](https://www.ultralytics.com/glossary/learning-rate), [batch sizes](https://www.ultralytics.com/glossary/batch-size), or other model parameters. @@ -166,7 +162,7 @@ Once your model has been thoroughly tested, it's time to deploy it. Deployment i - Setting Up the Environment: Configure the necessary infrastructure for your chosen deployment option, whether it's cloud-based (AWS, Google Cloud, Azure) or edge-based (local devices, IoT). -- **[Exporting the Model](../modes/export.md):** Export your model to the appropriate format (e.g., ONNX, TensorRT, CoreML for YOLOv8) to ensure compatibility with your deployment platform. +- **[Exporting the Model](../modes/export.md):** Export your model to the appropriate format (e.g., ONNX, TensorRT, CoreML for YOLO11) to ensure compatibility with your deployment platform. - **Deploying the Model:** Deploy the model by setting up APIs or endpoints and integrating it with your application. - **Ensuring Scalability**: Implement load balancers, auto-scaling groups, and monitoring tools to manage resources and handle increasing data and user requests. @@ -188,12 +184,12 @@ Connecting with a community of computer vision enthusiasts can help you tackle a ### Community Resources -- **GitHub Issues:** Check out the [YOLOv8 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The active community and maintainers are there to help with specific issues. +- **GitHub Issues:** Check out the [YOLO11 GitHub repository](https://github.com/ultralytics/ultralytics/issues) and use the Issues tab to ask questions, report bugs, and suggest new features. The active community and maintainers are there to help with specific issues. - **Ultralytics Discord Server:** Join the [Ultralytics Discord server](https://discord.com/invite/ultralytics) to interact with other users and developers, get support, and share insights. ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Explore the [official YOLOv8 documentation](./index.md) for detailed guides with helpful tips on different computer vision tasks and projects. +- **Ultralytics YOLO11 Documentation:** Explore the [official YOLO11 documentation](./index.md) for detailed guides with helpful tips on different computer vision tasks and projects. Using these resources will help you overcome challenges and stay updated with the latest trends and best practices in the computer vision community. @@ -229,7 +225,7 @@ After splitting, apply data augmentation techniques like rotation, scaling, and ### How can I export my trained computer vision model for deployment? -Exporting your model ensures compatibility with different deployment platforms. Ultralytics provides multiple formats, including ONNX, TensorRT, and CoreML. To export your YOLOv8 model, follow this guide: +Exporting your model ensures compatibility with different deployment platforms. Ultralytics provides multiple formats, including ONNX, TensorRT, and CoreML. To export your YOLO11 model, follow this guide: - Use the `export` function with the desired format parameter. - Ensure the exported model fits the specifications of your deployment environment (e.g., edge devices, cloud). diff --git a/docs/en/guides/streamlit-live-inference.md b/docs/en/guides/streamlit-live-inference.md index e8fb5c9165f..68fbe925a22 100644 --- a/docs/en/guides/streamlit-live-inference.md +++ b/docs/en/guides/streamlit-live-inference.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to set up a real-time object detection application using Streamlit and Ultralytics YOLOv8. Follow this step-by-step guide to implement webcam-based object detection. -keywords: Streamlit, YOLOv8, Real-time Object Detection, Streamlit Application, YOLOv8 Streamlit Tutorial, Webcam Object Detection +description: Learn how to set up a real-time object detection application using Streamlit and Ultralytics YOLO11. Follow this step-by-step guide to implement webcam-based object detection. +keywords: Streamlit, YOLO11, Real-time Object Detection, Streamlit Application, YOLO11 Streamlit Tutorial, Webcam Object Detection --- -# Live Inference with Streamlit Application using Ultralytics YOLOv8 +# Live Inference with Streamlit Application using Ultralytics YOLO11 ## Introduction -Streamlit makes it simple to build and deploy interactive web applications. Combining this with Ultralytics YOLOv8 allows for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and analysis directly in your browser. YOLOv8 high accuracy and speed ensure seamless performance for live video streams, making it ideal for applications in security, retail, and beyond. +Streamlit makes it simple to build and deploy interactive web applications. Combining this with Ultralytics YOLO11 allows for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and analysis directly in your browser. YOLO11 high accuracy and speed ensure seamless performance for live video streams, making it ideal for applications in security, retail, and beyond.


@@ -18,19 +18,19 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb allowfullscreen>
- Watch: How to Use Streamlit with Ultralytics for Real-Time [Computer Vision](https://www.ultralytics.com/glossary/computer-vision-cv) in Your Browser + Watch: How to Use Streamlit with Ultralytics for Real-Time Computer Vision in Your Browser

| Aquaculture | Animals husbandry | | :----------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | -| ![Fish Detection using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/fish-detection-ultralytics-yolov8.avif) | ![Animals Detection using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/animals-detection-yolov8.avif) | -| Fish Detection using Ultralytics YOLOv8 | Animals Detection using Ultralytics YOLOv8 | +| ![Fish Detection using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/fish-detection-ultralytics-yolov8.avif) | ![Animals Detection using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/animals-detection-yolov8.avif) | +| Fish Detection using Ultralytics YOLO11 | Animals Detection using Ultralytics YOLO11 | ## Advantages of Live Inference -- **Seamless Real-Time Object Detection**: Streamlit combined with YOLOv8 enables real-time object detection directly from your webcam feed. This allows for immediate analysis and insights, making it ideal for applications requiring instant feedback. +- **Seamless Real-Time Object Detection**: Streamlit combined with YOLO11 enables real-time object detection directly from your webcam feed. This allows for immediate analysis and insights, making it ideal for applications requiring instant feedback. - **User-Friendly Deployment**: Streamlit's interactive interface makes it easy to deploy and use the application without extensive technical knowledge. Users can start live inference with a simple click, enhancing accessibility and usability. -- **Efficient Resource Utilization**: YOLOv8 optimized algorithm ensure high-speed processing with minimal computational resources. This efficiency allows for smooth and reliable webcam inference even on standard hardware, making advanced computer vision accessible to a wider audience. +- **Efficient Resource Utilization**: YOLO11 optimized algorithm ensure high-speed processing with minimal computational resources. This efficiency allows for smooth and reliable webcam inference even on standard hardware, making advanced computer vision accessible to a wider audience. ## Streamlit Application Code @@ -40,23 +40,29 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb !!! example "Streamlit Application" + === "CLI" + + ```bash + yolo solutions inference + + yolo solutions inference model="path/to/model.pt" + ``` + === "Python" ```python from ultralytics import solutions - solutions.inference() - - ### Make sure to run the file using command `streamlit run ` - ``` + inf = solutions.Inference( + model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLO11, or custom trained model + ) - === "CLI" + inf.inference() - ```bash - yolo streamlit-predict + ### Make sure to run the file using command `streamlit run ` ``` -This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLOv8 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection. +This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLO11 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection. You can optionally supply a specific model in Python: @@ -67,15 +73,18 @@ You can optionally supply a specific model in Python: ```python from ultralytics import solutions - # Pass a model as an argument - solutions.inference(model="path/to/model.pt") + inf = solutions.Inference( + model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLO11, YOLOv10 + ) + + inf.inference() ### Make sure to run the file using command `streamlit run ` ``` ## Conclusion -By following this guide, you have successfully created a real-time object detection application using Streamlit and Ultralytics YOLOv8. This application allows you to experience the power of YOLOv8 in detecting objects through your webcam, with a user-friendly interface and the ability to stop the video stream at any time. +By following this guide, you have successfully created a real-time object detection application using Streamlit and Ultralytics YOLO11. This application allows you to experience the power of YOLO11 in detecting objects through your webcam, with a user-friendly interface and the ability to stop the video stream at any time. For further enhancements, you can explore adding more features such as recording the video stream, saving the annotated frames, or integrating with other computer vision libraries. @@ -90,13 +99,13 @@ Engage with the community to learn more, troubleshoot issues, and share your pro ### Official Documentation -- **Ultralytics YOLOv8 Documentation:** Refer to the [official YOLOv8 documentation](https://docs.ultralytics.com/) for comprehensive guides and insights on various computer vision tasks and projects. +- **Ultralytics YOLO11 Documentation:** Refer to the [official YOLO11 documentation](https://docs.ultralytics.com/) for comprehensive guides and insights on various computer vision tasks and projects. ## FAQ -### How can I set up a real-time object detection application using Streamlit and Ultralytics YOLOv8? +### How can I set up a real-time object detection application using Streamlit and Ultralytics YOLO11? -Setting up a real-time object detection application with Streamlit and Ultralytics YOLOv8 is straightforward. First, ensure you have the Ultralytics Python package installed using: +Setting up a real-time object detection application with Streamlit and Ultralytics YOLO11 is straightforward. First, ensure you have the Ultralytics Python package installed using: ```bash pip install ultralytics @@ -111,7 +120,11 @@ Then, you can create a basic Streamlit application to run live inference: ```python from ultralytics import solutions - solutions.inference() + inf = solutions.Inference( + model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLO11, YOLOv10 + ) + + inf.inference() ### Make sure to run the file using command `streamlit run ` ``` @@ -119,34 +132,34 @@ Then, you can create a basic Streamlit application to run live inference: === "CLI" ```bash - yolo streamlit-predict + yolo solutions inference ``` For more details on the practical setup, refer to the [Streamlit Application Code section](#streamlit-application-code) of the documentation. -### What are the main advantages of using Ultralytics YOLOv8 with Streamlit for real-time object detection? +### What are the main advantages of using Ultralytics YOLO11 with Streamlit for real-time object detection? -Using Ultralytics YOLOv8 with Streamlit for real-time object detection offers several advantages: +Using Ultralytics YOLO11 with Streamlit for real-time object detection offers several advantages: - **Seamless Real-Time Detection**: Achieve high-[accuracy](https://www.ultralytics.com/glossary/accuracy), real-time object detection directly from webcam feeds. - **User-Friendly Interface**: Streamlit's intuitive interface allows easy use and deployment without extensive technical knowledge. -- **Resource Efficiency**: YOLOv8's optimized algorithms ensure high-speed processing with minimal computational resources. +- **Resource Efficiency**: YOLO11's optimized algorithms ensure high-speed processing with minimal computational resources. Discover more about these advantages [here](#advantages-of-live-inference). ### How do I deploy a Streamlit object detection application in my web browser? -After coding your Streamlit application integrating Ultralytics YOLOv8, you can deploy it by running: +After coding your Streamlit application integrating Ultralytics YOLO11, you can deploy it by running: ```bash streamlit run ``` -This command will launch the application in your default web browser, enabling you to select YOLOv8 models, set confidence, and NMS thresholds, and start real-time object detection with a simple click. For a detailed guide, refer to the [Streamlit Application Code](#streamlit-application-code) section. +This command will launch the application in your default web browser, enabling you to select YOLO11 models, set confidence, and NMS thresholds, and start real-time object detection with a simple click. For a detailed guide, refer to the [Streamlit Application Code](#streamlit-application-code) section. -### What are some use cases for real-time object detection using Streamlit and Ultralytics YOLOv8? +### What are some use cases for real-time object detection using Streamlit and Ultralytics YOLO11? -Real-time object detection using Streamlit and Ultralytics YOLOv8 can be applied in various sectors: +Real-time object detection using Streamlit and Ultralytics YOLO11 can be applied in various sectors: - **Security**: Real-time monitoring for unauthorized access. - **Retail**: Customer counting, shelf management, and more. @@ -154,12 +167,12 @@ Real-time object detection using Streamlit and Ultralytics YOLOv8 can be applied For more in-depth use cases and examples, explore [Ultralytics Solutions](https://docs.ultralytics.com/solutions/). -### How does Ultralytics YOLOv8 compare to other object detection models like YOLOv5 and RCNNs? +### How does Ultralytics YOLO11 compare to other object detection models like YOLOv5 and RCNNs? -Ultralytics YOLOv8 provides several enhancements over prior models like YOLOv5 and RCNNs: +Ultralytics YOLO11 provides several enhancements over prior models like YOLOv5 and RCNNs: - **Higher Speed and Accuracy**: Improved performance for real-time applications. - **Ease of Use**: Simplified interfaces and deployment. - **Resource Efficiency**: Optimized for better speed with minimal computational requirements. -For a comprehensive comparison, check [Ultralytics YOLOv8 Documentation](https://docs.ultralytics.com/models/yolov8/) and related blog posts discussing model performance. +For a comprehensive comparison, check [Ultralytics YOLO11 Documentation](https://docs.ultralytics.com/models/yolov8/) and related blog posts discussing model performance. diff --git a/docs/en/guides/trackzone.md b/docs/en/guides/trackzone.md new file mode 100644 index 00000000000..09bd0c416a9 --- /dev/null +++ b/docs/en/guides/trackzone.md @@ -0,0 +1,173 @@ +--- +comments: true +description: Discover how TrackZone leverages Ultralytics YOLO11 to precisely track objects within specific zones, enabling real-time insights for crowd analysis, surveillance, and targeted monitoring. +keywords: TrackZone, object tracking, YOLO11, Ultralytics, real-time object detection, AI, deep learning, crowd analysis, surveillance, zone-based tracking, resource optimization +--- + +# TrackZone using Ultralytics YOLO11 + +Open TrackZone In Colab + +## What is TrackZone? + +TrackZone specializes in monitoring objects within designated areas of a frame instead of the whole frame. Built on [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/), it integrates object detection and tracking specifically within zones for videos and live camera feeds. YOLO11's advanced algorithms and [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) technologies make it a perfect choice for real-time use cases, offering precise and efficient object tracking in applications like crowd monitoring and surveillance. + +

+
+ +
+ Watch: How to Track Objects in Region using Ultralytics YOLO11 | TrackZone ๐Ÿš€ +

+ +## Advantages of Object Tracking in Zones (TrackZone) + +- **Targeted Analysis:** Tracking objects within specific zones allows for more focused insights, enabling precise monitoring and analysis of areas of interest, such as entry points or restricted zones. +- **Improved Efficiency:** By narrowing the tracking scope to defined zones, TrackZone reduces computational overhead, ensuring faster processing and optimal performance. +- **Enhanced Security:** Zonal tracking improves surveillance by monitoring critical areas, aiding in the early detection of unusual activity or security breaches. +- **Scalable Solutions:** The ability to focus on specific zones makes TrackZone adaptable to various scenarios, from retail spaces to industrial settings, ensuring seamless integration and scalability. + +## Real World Applications + +| Agriculture | Transportation | +| :-----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| ![Plants Tracking in Field Using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/plants-tracking-in-zone-using-ultralytics-yolo11.avif) | ![Vehicles Tracking on Road using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/vehicle-tracking-in-zone-using-ultralytics-yolo11.avif) | +| Plants Tracking in Field Using Ultralytics YOLO11 | Vehicles Tracking on Road using Ultralytics YOLO11 | + +!!! example "TrackZone using YOLO11 Example" + + === "CLI" + + ```bash + # Run a trackzone example + yolo solutions trackzone show=True + + # Pass a source video + yolo solutions trackzone show=True source="path/to/video/file.mp4" + + # Pass region coordinates + yolo solutions trackzone show=True region=[(150, 150), (1130, 150), (1130, 570), (150, 570)] + ``` + + === "Python" + + ```python + import cv2 + + from ultralytics import solutions + + cap = cv2.VideoCapture("path/to/video/file.mp4") + assert cap.isOpened(), "Error reading video file" + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + + # Define region points + region_points = [(150, 150), (1130, 150), (1130, 570), (150, 570)] + + # Video writer + video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + + # Init TrackZone (Object Tracking in Zones, not complete frame) + trackzone = solutions.TrackZone( + show=True, # Display the output + region=region_points, # Pass region points + model="yolo11n.pt", # You can use any model that Ultralytics support, i.e. YOLOv9, YOLOv10 + # line_width=2, # Adjust the line width for bounding boxes and text display + # classes=[0, 2], # If you want to count specific classes i.e. person and car with COCO pretrained model. + ) + + # Process video + while cap.isOpened(): + success, im0 = cap.read() + if not success: + print("Video frame is empty or video processing has been successfully completed.") + break + im0 = trackzone.trackzone(im0) + video_writer.write(im0) + + cap.release() + video_writer.release() + cv2.destroyAllWindows() + ``` + +### Argument `TrackZone` + +Here's a table with the `TrackZone` arguments: + +| Name | Type | Default | Description | +| ------------ | ------ | ---------------------------------------------------- | ---------------------------------------------------- | +| `model` | `str` | `None` | Path to Ultralytics YOLO Model File | +| `region` | `list` | `[(150, 150), (1130, 150), (1130, 570), (150, 570)]` | List of points defining the object tracking region. | +| `line_width` | `int` | `2` | Line thickness for bounding boxes. | +| `show` | `bool` | `False` | Flag to control whether to display the video stream. | + +### Arguments `model.track` + +{% include "macros/track-args.md" %} + +## FAQ + +### How do I track objects in a specific area or zone of a video frame using Ultralytics YOLO11? + +Tracking objects in a defined area or zone of a video frame is straightforward with Ultralytics YOLO11. Simply use the command provided below to initiate tracking. This approach ensures efficient analysis and accurate results, making it ideal for applications like surveillance, crowd management, or any scenario requiring zonal tracking. + +```bash +yolo solutions trackzone source="path/to/video/file.mp4" show=True +``` + +### How can I use TrackZone in Python with Ultralytics YOLO11? + +With just a few lines of code, you can set up object tracking in specific zones, making it easy to integrate into your projects. + +```python +import cv2 + +from ultralytics import solutions + +cap = cv2.VideoCapture("path/to/video/file.mp4") +assert cap.isOpened(), "Error reading video file" +w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + +# Define region points +region_points = [(150, 150), (1130, 150), (1130, 570), (150, 570)] + +# Video writer +video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + +# Init TrackZone (Object Tracking in Zones, not complete frame) +trackzone = solutions.TrackZone( + show=True, # Display the output + region=region_points, # Pass region points + model="yolo11n.pt", +) + +# Process video +while cap.isOpened(): + success, im0 = cap.read() + if not success: + print("Video frame is empty or video processing has been successfully completed.") + break + im0 = trackzone.trackzone(im0) + video_writer.write(im0) + +cap.release() +video_writer.release() +cv2.destroyAllWindows() +``` + +### How do I configure the zone points for video processing using Ultralytics TrackZone? + +Configuring zone points for video processing with Ultralytics TrackZone is simple and customizable. You can directly define and adjust the zones through a Python script, allowing precise control over the areas you want to monitor. + +```python +# Define region points +region_points = [(150, 150), (1130, 150), (1130, 570), (150, 570)] + +# Init TrackZone (Object Tracking in Zones, not complete frame) +trackzone = solutions.TrackZone( + show=True, # Display the output + region=region_points, # Pass region points +) +``` diff --git a/docs/en/guides/triton-inference-server.md b/docs/en/guides/triton-inference-server.md index d1c7921f68b..71b1eb1c137 100644 --- a/docs/en/guides/triton-inference-server.md +++ b/docs/en/guides/triton-inference-server.md @@ -1,12 +1,12 @@ --- comments: true -description: Learn how to integrate Ultralytics YOLOv8 with NVIDIA Triton Inference Server for scalable, high-performance AI model deployment. -keywords: Triton Inference Server, YOLOv8, Ultralytics, NVIDIA, deep learning, AI model deployment, ONNX, scalable inference +description: Learn how to integrate Ultralytics YOLO11 with NVIDIA Triton Inference Server for scalable, high-performance AI model deployment. +keywords: Triton Inference Server, YOLO11, Ultralytics, NVIDIA, deep learning, AI model deployment, ONNX, scalable inference --- -# Triton Inference Server with Ultralytics YOLOv8 +# Triton Inference Server with Ultralytics YOLO11 -The [Triton Inference Server](https://developer.nvidia.com/triton-inference-server) (formerly known as TensorRT Inference Server) is an open-source software solution developed by NVIDIA. It provides a cloud inference solution optimized for NVIDIA GPUs. Triton simplifies the deployment of AI models at scale in production. Integrating Ultralytics YOLOv8 with Triton Inference Server allows you to deploy scalable, high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference workloads. This guide provides steps to set up and test the integration. +The [Triton Inference Server](https://developer.nvidia.com/triton-inference-server) (formerly known as TensorRT Inference Server) is an open-source software solution developed by NVIDIA. It provides a cloud inference solution optimized for NVIDIA GPUs. Triton simplifies the deployment of AI models at scale in production. Integrating Ultralytics YOLO11 with Triton Inference Server allows you to deploy scalable, high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference workloads. This guide provides steps to set up and test the integration.


@@ -38,7 +38,7 @@ Ensure you have the following prerequisites before proceeding: pip install tritonclient[all] ``` -## Exporting YOLOv8 to ONNX Format +## Exporting YOLO11 to ONNX Format Before deploying the model on Triton, it must be exported to the ONNX format. ONNX (Open Neural Network Exchange) is a format that allows models to be transferred between different deep learning frameworks. Use the `export` function from the `YOLO` class: @@ -46,7 +46,17 @@ Before deploying the model on Triton, it must be exported to the ONNX format. ON from ultralytics import YOLO # Load a model -model = YOLO("yolov8n.pt") # load an official model +model = YOLO("yolo11n.pt") # load an official model + +# Retrieve metadata during export +metadata = [] + + +def export_cb(exporter): + metadata.append(exporter.metadata) + + +model.add_callback("on_export_end", export_cb) # Export the model onnx_file = model.export(format="onnx", dynamic=True) @@ -80,6 +90,43 @@ The Triton Model Repository is a storage location where Triton can access and lo # Create config file (triton_model_path / "config.pbtxt").touch() + + # (Optional) Enable TensorRT for GPU inference + # First run will be slow due to TensorRT engine conversion + data = """ + optimization { + execution_accelerators { + gpu_execution_accelerator { + name: "tensorrt" + parameters { + key: "precision_mode" + value: "FP16" + } + parameters { + key: "max_workspace_size_bytes" + value: "3221225472" + } + parameters { + key: "trt_engine_cache_enable" + value: "1" + } + parameters { + key: "trt_engine_cache_path" + value: "/models/yolo/1" + } + } + } + } + parameters { + key: "metadata" + value: { + string_value: "%s" + } + } + """ % metadata[0] + + with open(triton_model_path / "config.pbtxt", "w") as f: + f.write(data) ``` ## Running Triton Inference Server @@ -94,7 +141,7 @@ import time from tritonclient.http import InferenceServerClient # Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver -tag = "nvcr.io/nvidia/tritonserver:23.09-py3" # 6.4 GB +tag = "nvcr.io/nvidia/tritonserver:24.09-py3" # 8.57 GB # Pull the image subprocess.call(f"docker pull {tag}", shell=True) @@ -102,7 +149,7 @@ subprocess.call(f"docker pull {tag}", shell=True) # Run the Triton server and capture the container ID container_id = ( subprocess.check_output( - f"docker run -d --rm -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models", + f"docker run -d --rm --gpus 0 -v {triton_repo_path}:/models -p 8000:8000 {tag} tritonserver --model-repository=/models", shell=True, ) .decode("utf-8") @@ -141,21 +188,21 @@ subprocess.call(f"docker kill {container_id}", shell=True) --- -By following the above steps, you can deploy and run Ultralytics YOLOv8 models efficiently on Triton Inference Server, providing a scalable and high-performance solution for deep learning inference tasks. If you face any issues or have further queries, refer to the [official Triton documentation](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html) or reach out to the Ultralytics community for support. +By following the above steps, you can deploy and run Ultralytics YOLO11 models efficiently on Triton Inference Server, providing a scalable and high-performance solution for deep learning inference tasks. If you face any issues or have further queries, refer to the [official Triton documentation](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/index.html) or reach out to the Ultralytics community for support. ## FAQ -### How do I set up Ultralytics YOLOv8 with NVIDIA Triton Inference Server? +### How do I set up Ultralytics YOLO11 with NVIDIA Triton Inference Server? -Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) involves a few key steps: +Setting up [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) involves a few key steps: -1. **Export YOLOv8 to ONNX format**: +1. **Export YOLO11 to ONNX format**: ```python from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model # Export the model to ONNX format onnx_file = model.export(format="onnx", dynamic=True) @@ -187,13 +234,13 @@ Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) wit from tritonclient.http import InferenceServerClient # Define image https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver - tag = "nvcr.io/nvidia/tritonserver:23.09-py3" + tag = "nvcr.io/nvidia/tritonserver:24.09-py3" subprocess.call(f"docker pull {tag}", shell=True) container_id = ( subprocess.check_output( - f"docker run -d --rm -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models", + f"docker run -d --rm --gpus 0 -v {triton_repo_path}/models -p 8000:8000 {tag} tritonserver --model-repository=/models", shell=True, ) .decode("utf-8") @@ -209,21 +256,21 @@ Setting up [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) wit time.sleep(1) ``` -This setup can help you efficiently deploy YOLOv8 models at scale on Triton Inference Server for high-performance AI model inference. +This setup can help you efficiently deploy YOLO11 models at scale on Triton Inference Server for high-performance AI model inference. -### What benefits does using Ultralytics YOLOv8 with NVIDIA Triton Inference Server offer? +### What benefits does using Ultralytics YOLO11 with NVIDIA Triton Inference Server offer? -Integrating [Ultralytics YOLOv8](../models/yolov8.md) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) provides several advantages: +Integrating [Ultralytics YOLO11](../models/yolov8.md) with [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) provides several advantages: - **Scalable AI Inference**: Triton allows serving multiple models from a single server instance, supporting dynamic model loading and unloading, making it highly scalable for diverse AI workloads. - **High Performance**: Optimized for NVIDIA GPUs, Triton Inference Server ensures high-speed inference operations, perfect for real-time applications such as [object detection](https://www.ultralytics.com/glossary/object-detection). - **Ensemble and Model Versioning**: Triton's ensemble mode enables combining multiple models to improve results, and its model versioning supports A/B testing and rolling updates. -For detailed instructions on setting up and running YOLOv8 with Triton, you can refer to the [setup guide](#setting-up-triton-model-repository). +For detailed instructions on setting up and running YOLO11 with Triton, you can refer to the [setup guide](#setting-up-triton-model-repository). -### Why should I export my YOLOv8 model to ONNX format before using Triton Inference Server? +### Why should I export my YOLO11 model to ONNX format before using Triton Inference Server? -Using ONNX (Open Neural Network Exchange) format for your [Ultralytics YOLOv8](../models/yolov8.md) model before deploying it on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) offers several key benefits: +Using ONNX (Open Neural Network Exchange) format for your [Ultralytics YOLO11](../models/yolov8.md) model before deploying it on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server) offers several key benefits: - **Interoperability**: ONNX format supports transfer between different deep learning frameworks (such as PyTorch, TensorFlow), ensuring broader compatibility. - **Optimization**: Many deployment environments, including Triton, optimize for ONNX, enabling faster inference and better performance. @@ -234,15 +281,15 @@ To export your model, use: ```python from ultralytics import YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") onnx_file = model.export(format="onnx", dynamic=True) ``` You can follow the steps in the [exporting guide](../modes/export.md) to complete the process. -### Can I run inference using the Ultralytics YOLOv8 model on Triton Inference Server? +### Can I run inference using the Ultralytics YOLO11 model on Triton Inference Server? -Yes, you can run inference using the [Ultralytics YOLOv8](../models/yolov8.md) model on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server). Once your model is set up in the Triton Model Repository and the server is running, you can load and run inference on your model as follows: +Yes, you can run inference using the [Ultralytics YOLO11](../models/yolov8.md) model on [NVIDIA Triton Inference Server](https://developer.nvidia.com/triton-inference-server). Once your model is set up in the Triton Model Repository and the server is running, you can load and run inference on your model as follows: ```python from ultralytics import YOLO @@ -254,14 +301,14 @@ model = YOLO("http://localhost:8000/yolo", task="detect") results = model("path/to/image.jpg") ``` -For an in-depth guide on setting up and running Triton Server with YOLOv8, refer to the [running triton inference server](#running-triton-inference-server) section. +For an in-depth guide on setting up and running Triton Server with YOLO11, refer to the [running triton inference server](#running-triton-inference-server) section. -### How does Ultralytics YOLOv8 compare to [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and PyTorch models for deployment? +### How does Ultralytics YOLO11 compare to [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and PyTorch models for deployment? -[Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) offers several unique advantages compared to TensorFlow and PyTorch models for deployment: +[Ultralytics YOLO11](https://docs.ultralytics.com/models/yolov8/) offers several unique advantages compared to TensorFlow and PyTorch models for deployment: -- **Real-time Performance**: Optimized for real-time object detection tasks, YOLOv8 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, making it ideal for applications requiring live video analytics. -- **Ease of Use**: YOLOv8 integrates seamlessly with Triton Inference Server and supports diverse export formats (ONNX, TensorRT, CoreML), making it flexible for various deployment scenarios. -- **Advanced Features**: YOLOv8 includes features like dynamic model loading, model versioning, and ensemble inference, which are crucial for scalable and reliable AI deployments. +- **Real-time Performance**: Optimized for real-time object detection tasks, YOLO11 provides state-of-the-art [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed, making it ideal for applications requiring live video analytics. +- **Ease of Use**: YOLO11 integrates seamlessly with Triton Inference Server and supports diverse export formats (ONNX, TensorRT, CoreML), making it flexible for various deployment scenarios. +- **Advanced Features**: YOLO11 includes features like dynamic model loading, model versioning, and ensemble inference, which are crucial for scalable and reliable AI deployments. For more details, compare the deployment options in the [model deployment guide](../modes/export.md). diff --git a/docs/en/guides/view-results-in-terminal.md b/docs/en/guides/view-results-in-terminal.md index b159e1a7f60..95d9850d8d4 100644 --- a/docs/en/guides/view-results-in-terminal.md +++ b/docs/en/guides/view-results-in-terminal.md @@ -58,7 +58,7 @@ The VSCode compatible protocols for viewing images using the integrated terminal from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Run inference on an image results = model.predict(source="ultralytics/assets/bus.jpg") @@ -116,7 +116,7 @@ from sixel import SixelWriter from ultralytics import YOLO # Load a model -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") # Run inference on an image results = model.predict(source="ultralytics/assets/bus.jpg") @@ -169,7 +169,7 @@ To view YOLO inference results in a VSCode terminal on macOS or Linux, follow th ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.predict(source="path_to_image") plot = results[0].plot() ``` diff --git a/docs/en/guides/vision-eye.md b/docs/en/guides/vision-eye.md index 9dfc036cd35..db449c64807 100644 --- a/docs/en/guides/vision-eye.md +++ b/docs/en/guides/vision-eye.md @@ -1,23 +1,23 @@ --- comments: true -description: Discover VisionEye's object mapping and tracking powered by Ultralytics YOLOv8. Simulate human eye precision, track objects, and calculate distances effortlessly. -keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, distance calculation, computer vision, AI, machine learning, Python, tutorial +description: Discover VisionEye's object mapping and tracking powered by Ultralytics YOLO11. Simulate human eye precision, track objects, and calculate distances effortlessly. +keywords: VisionEye, YOLO11, Ultralytics, object mapping, object tracking, distance calculation, computer vision, AI, machine learning, Python, tutorial --- -# VisionEye View Object Mapping using Ultralytics YOLOv8 ๐Ÿš€ +# VisionEye View Object Mapping using Ultralytics YOLO11 ๐Ÿš€ ## What is VisionEye Object Mapping? -[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) VisionEye offers the capability for computers to identify and pinpoint objects, simulating the observational [precision](https://www.ultralytics.com/glossary/precision) of the human eye. This functionality enables computers to discern and focus on specific objects, much like the way the human eye observes details from a particular viewpoint. +[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) VisionEye offers the capability for computers to identify and pinpoint objects, simulating the observational [precision](https://www.ultralytics.com/glossary/precision) of the human eye. This functionality enables computers to discern and focus on specific objects, much like the way the human eye observes details from a particular viewpoint. ## Samples | VisionEye View | VisionEye View With Object Tracking | VisionEye View With Distance Calculation | | :----------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| ![VisionEye View Object Mapping using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/visioneye-view-object-mapping-yolov8.avif) | ![VisionEye View Object Mapping with Object Tracking using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/visioneye-object-mapping-with-tracking.avif) | ![VisionEye View with Distance Calculation using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/visioneye-distance-calculation-yolov8.avif) | -| VisionEye View Object Mapping using Ultralytics YOLOv8 | VisionEye View Object Mapping with Object Tracking using Ultralytics YOLOv8 | VisionEye View with Distance Calculation using Ultralytics YOLOv8 | +| ![VisionEye View Object Mapping using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/visioneye-view-object-mapping-yolov8.avif) | ![VisionEye View Object Mapping with Object Tracking using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/visioneye-object-mapping-with-tracking.avif) | ![VisionEye View with Distance Calculation using Ultralytics YOLO11](https://github.com/ultralytics/docs/releases/download/0/visioneye-distance-calculation-yolov8.avif) | +| VisionEye View Object Mapping using Ultralytics YOLO11 | VisionEye View Object Mapping with Object Tracking using Ultralytics YOLO11 | VisionEye View with Distance Calculation using Ultralytics YOLO11 | -!!! example "VisionEye Object Mapping using YOLOv8" +!!! example "VisionEye Object Mapping using YOLO11" === "VisionEye Object Mapping" @@ -27,7 +27,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") names = model.model.names cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -71,7 +71,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -118,7 +118,7 @@ keywords: VisionEye, YOLOv8, Ultralytics, object mapping, object tracking, dista from ultralytics import YOLO from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolov8s.pt") + model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("Path/to/video/file.mp4") w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) @@ -180,16 +180,16 @@ For any inquiries, feel free to post your questions in the [Ultralytics Issue Se ## FAQ -### How do I start using VisionEye Object Mapping with Ultralytics YOLOv8? +### How do I start using VisionEye Object Mapping with Ultralytics YOLO11? -To start using VisionEye Object Mapping with Ultralytics YOLOv8, first, you'll need to install the Ultralytics YOLO package via pip. Then, you can use the sample code provided in the documentation to set up [object detection](https://www.ultralytics.com/glossary/object-detection) with VisionEye. Here's a simple example to get you started: +To start using VisionEye Object Mapping with Ultralytics YOLO11, first, you'll need to install the Ultralytics YOLO package via pip. Then, you can use the sample code provided in the documentation to set up [object detection](https://www.ultralytics.com/glossary/object-detection) with VisionEye. Here's a simple example to get you started: ```python import cv2 from ultralytics import YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") while True: @@ -210,12 +210,12 @@ cap.release() cv2.destroyAllWindows() ``` -### What are the key features of VisionEye's object tracking capability using Ultralytics YOLOv8? +### What are the key features of VisionEye's object tracking capability using Ultralytics YOLO11? -VisionEye's object tracking with Ultralytics YOLOv8 allows users to follow the movement of objects within a video frame. Key features include: +VisionEye's object tracking with Ultralytics YOLO11 allows users to follow the movement of objects within a video frame. Key features include: 1. **Real-Time Object Tracking**: Keeps up with objects as they move. -2. **Object Identification**: Utilizes YOLOv8's powerful detection algorithms. +2. **Object Identification**: Utilizes YOLO11's powerful detection algorithms. 3. **Distance Calculation**: Calculates distances between objects and specified points. 4. **Annotation and Visualization**: Provides visual markers for tracked objects. @@ -226,7 +226,7 @@ import cv2 from ultralytics import YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") while True: @@ -249,9 +249,9 @@ cv2.destroyAllWindows() For a comprehensive guide, visit the [VisionEye Object Mapping with Object Tracking](#samples). -### How can I calculate distances with VisionEye's YOLOv8 model? +### How can I calculate distances with VisionEye's YOLO11 model? -Distance calculation with VisionEye and Ultralytics YOLOv8 involves determining the distance of detected objects from a specified point in the frame. It enhances spatial analysis capabilities, useful in applications such as autonomous driving and surveillance. +Distance calculation with VisionEye and Ultralytics YOLO11 involves determining the distance of detected objects from a specified point in the frame. It enhances spatial analysis capabilities, useful in applications such as autonomous driving and surveillance. Here's a simplified example: @@ -262,7 +262,7 @@ import cv2 from ultralytics import YOLO -model = YOLO("yolov8s.pt") +model = YOLO("yolo11n.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") center_point = (0, 480) # Example center point pixel_per_meter = 10 @@ -290,19 +290,19 @@ cv2.destroyAllWindows() For detailed instructions, refer to the [VisionEye with Distance Calculation](#samples). -### Why should I use Ultralytics YOLOv8 for object mapping and tracking? +### Why should I use Ultralytics YOLO11 for object mapping and tracking? -Ultralytics YOLOv8 is renowned for its speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of integration, making it a top choice for object mapping and tracking. Key advantages include: +Ultralytics YOLO11 is renowned for its speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and ease of integration, making it a top choice for object mapping and tracking. Key advantages include: 1. **State-of-the-art Performance**: Delivers high accuracy in real-time object detection. 2. **Flexibility**: Supports various tasks such as detection, tracking, and distance calculation. 3. **Community and Support**: Extensive documentation and active GitHub community for troubleshooting and enhancements. 4. **Ease of Use**: Intuitive API simplifies complex tasks, allowing for rapid deployment and iteration. -For more information on applications and benefits, check out the [Ultralytics YOLOv8 documentation](https://docs.ultralytics.com/models/yolov8/). +For more information on applications and benefits, check out the [Ultralytics YOLO11 documentation](https://docs.ultralytics.com/models/yolov8/). ### How can I integrate VisionEye with other [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) tools like Comet or ClearML? -Ultralytics YOLOv8 can integrate seamlessly with various machine learning tools like Comet and ClearML, enhancing experiment tracking, collaboration, and reproducibility. Follow the detailed guides on [how to use YOLOv5 with Comet](https://www.ultralytics.com/blog/how-to-use-yolov5-with-comet) and [integrate YOLOv8 with ClearML](https://docs.ultralytics.com/integrations/clearml/) to get started. +Ultralytics YOLO11 can integrate seamlessly with various machine learning tools like Comet and ClearML, enhancing experiment tracking, collaboration, and reproducibility. Follow the detailed guides on [how to use YOLOv5 with Comet](https://www.ultralytics.com/blog/how-to-use-yolov5-with-comet) and [integrate YOLO11 with ClearML](https://docs.ultralytics.com/integrations/clearml/) to get started. For further exploration and integration examples, check our [Ultralytics Integrations Guide](https://docs.ultralytics.com/integrations/). diff --git a/docs/en/guides/workouts-monitoring.md b/docs/en/guides/workouts-monitoring.md index 45856316833..3919a708e70 100644 --- a/docs/en/guides/workouts-monitoring.md +++ b/docs/en/guides/workouts-monitoring.md @@ -1,12 +1,14 @@ --- comments: true -description: Optimize your fitness routine with real-time workouts monitoring using Ultralytics YOLOv8. Track and improve your exercise form and performance. -keywords: workouts monitoring, Ultralytics YOLOv8, pose estimation, fitness tracking, exercise assessment, real-time feedback, exercise form, performance metrics +description: Optimize your fitness routine with real-time workouts monitoring using Ultralytics YOLO11. Track and improve your exercise form and performance. +keywords: workouts monitoring, Ultralytics YOLO11, pose estimation, fitness tracking, exercise assessment, real-time feedback, exercise form, performance metrics --- -# Workouts Monitoring using Ultralytics YOLOv8 +# Workouts Monitoring using Ultralytics YOLO11 -Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) enhances exercise assessment by accurately tracking key body landmarks and joints in real-time. This technology provides instant feedback on exercise form, tracks workout routines, and measures performance metrics, optimizing training sessions for users and trainers alike. +Open Workouts Monitoring In Colab + +Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) enhances exercise assessment by accurately tracking key body landmarks and joints in real-time. This technology provides instant feedback on exercise form, tracks workout routines, and measures performance metrics, optimizing training sessions for users and trainers alike.


@@ -16,7 +18,7 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi allowfullscreen>
- Watch: Workouts Monitoring using Ultralytics YOLOv8 | Pushups, Pullups, Ab Workouts + Watch: Workouts Monitoring using Ultralytics YOLO11 | Push-ups, Pull-ups, Ab Workouts

## Advantages of Workouts Monitoring? @@ -36,90 +38,68 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi !!! example "Workouts Monitoring Example" - === "Workouts Monitoring" - - ```python - import cv2 - - from ultralytics import YOLO, solutions + === "CLI" - model = YOLO("yolov8n-pose.pt") - cap = cv2.VideoCapture("path/to/video/file.mp4") - assert cap.isOpened(), "Error reading video file" - w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) - - gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], - ) + ```bash + # Run a workout example + yolo solutions workout show=True - while cap.isOpened(): - success, im0 = cap.read() - if not success: - print("Video frame is empty or video processing has been successfully completed.") - break - results = model.track(im0, verbose=False) # Tracking recommended - # results = model.predict(im0) # Prediction also supported - im0 = gym_object.start_counting(im0, results) + # Pass a source video + yolo solutions workout source="path/to/video/file.mp4" - cv2.destroyAllWindows() + # Use keypoints for pushups + yolo solutions workout kpts=[6, 8, 10] ``` - === "Workouts Monitoring with Save Output" + === "Python" ```python import cv2 - from ultralytics import YOLO, solutions + from ultralytics import solutions - model = YOLO("yolov8n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + # Video writer video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) - gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], + # Init AIGym + gym = solutions.AIGym( + show=True, # Display the frame + kpts=[6, 8, 10], # keypoints index of person for monitoring specific exercise, by default it's for pushup + model="yolo11n-pose.pt", # Path to the YOLO11 pose estimation model file + # line_width=2, # Adjust the line width for bounding boxes and text display ) + # Process video while cap.isOpened(): success, im0 = cap.read() if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) # Tracking recommended - # results = model.predict(im0) # Prediction also supported - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) video_writer.write(im0) cv2.destroyAllWindows() video_writer.release() ``` -???+ tip "Support" - - "pushup", "pullup" and "abworkout" supported - ### KeyPoints Map -![keyPoints Order Ultralytics YOLOv8 Pose](https://github.com/ultralytics/docs/releases/download/0/keypoints-order-ultralytics-yolov8-pose.avif) +![keyPoints Order Ultralytics YOLO11 Pose](https://github.com/ultralytics/docs/releases/download/0/keypoints-order-ultralytics-yolov8-pose.avif) ### Arguments `AIGym` -| Name | Type | Default | Description | -| ----------------- | ------- | -------- | -------------------------------------------------------------------------------------- | -| `kpts_to_check` | `list` | `None` | List of three keypoints index, for counting specific workout, followed by keypoint Map | -| `line_thickness` | `int` | `2` | Thickness of the lines drawn. | -| `view_img` | `bool` | `False` | Flag to display the image. | -| `pose_up_angle` | `float` | `145.0` | Angle threshold for the 'up' pose. | -| `pose_down_angle` | `float` | `90.0` | Angle threshold for the 'down' pose. | -| `pose_type` | `str` | `pullup` | Type of pose to detect (`'pullup`', `pushup`, `abworkout`, `squat`). | +| Name | Type | Default | Description | +| ------------ | ------- | ------- | -------------------------------------------------------------------------------------- | +| `kpts` | `list` | `None` | List of three keypoints index, for counting specific workout, followed by keypoint Map | +| `line_width` | `int` | `2` | Thickness of the lines drawn. | +| `show` | `bool` | `False` | Flag to display the image. | +| `up_angle` | `float` | `145.0` | Angle threshold for the 'up' pose. | +| `down_angle` | `float` | `90.0` | Angle threshold for the 'down' pose. | +| `model` | `str` | `None` | Path to Ultralytics YOLO Pose Model File | ### Arguments `model.predict` @@ -131,25 +111,23 @@ Monitoring workouts through pose estimation with [Ultralytics YOLOv8](https://gi ## FAQ -### How do I monitor my workouts using Ultralytics YOLOv8? +### How do I monitor my workouts using Ultralytics YOLO11? -To monitor your workouts using Ultralytics YOLOv8, you can utilize the pose estimation capabilities to track and analyze key body landmarks and joints in real-time. This allows you to receive instant feedback on your exercise form, count repetitions, and measure performance metrics. You can start by using the provided example code for pushups, pullups, or ab workouts as shown: +To monitor your workouts using Ultralytics YOLO11, you can utilize the pose estimation capabilities to track and analyze key body landmarks and joints in real-time. This allows you to receive instant feedback on your exercise form, count repetitions, and measure performance metrics. You can start by using the provided example code for push-ups, pull-ups, or ab workouts as shown: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) -gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], +gym = solutions.AIGym( + line_width=2, + show=True, + kpts=[6, 8, 10], ) while cap.isOpened(): @@ -157,17 +135,16 @@ while cap.isOpened(): if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) cv2.destroyAllWindows() ``` For further customization and settings, you can refer to the [AIGym](#arguments-aigym) section in the documentation. -### What are the benefits of using Ultralytics YOLOv8 for workout monitoring? +### What are the benefits of using Ultralytics YOLO11 for workout monitoring? -Using Ultralytics YOLOv8 for workout monitoring provides several key benefits: +Using Ultralytics YOLO11 for workout monitoring provides several key benefits: - **Optimized Performance:** By tailoring workouts based on monitoring data, you can achieve better results. - **Goal Achievement:** Easily track and adjust fitness goals for measurable progress. @@ -177,48 +154,45 @@ Using Ultralytics YOLOv8 for workout monitoring provides several key benefits: You can watch a [YouTube video demonstration](https://www.youtube.com/watch?v=LGGxqLZtvuw) to see these benefits in action. -### How accurate is Ultralytics YOLOv8 in detecting and tracking exercises? +### How accurate is Ultralytics YOLO11 in detecting and tracking exercises? -Ultralytics YOLOv8 is highly accurate in detecting and tracking exercises due to its state-of-the-art pose estimation capabilities. It can accurately track key body landmarks and joints, providing real-time feedback on exercise form and performance metrics. The model's pretrained weights and robust architecture ensure high [precision](https://www.ultralytics.com/glossary/precision) and reliability. For real-world examples, check out the [real-world applications](#real-world-applications) section in the documentation, which showcases pushups and pullups counting. +Ultralytics YOLO11 is highly accurate in detecting and tracking exercises due to its state-of-the-art pose estimation capabilities. It can accurately track key body landmarks and joints, providing real-time feedback on exercise form and performance metrics. The model's pretrained weights and robust architecture ensure high [precision](https://www.ultralytics.com/glossary/precision) and reliability. For real-world examples, check out the [real-world applications](#real-world-applications) section in the documentation, which showcases push-ups and pull-ups counting. -### Can I use Ultralytics YOLOv8 for custom workout routines? +### Can I use Ultralytics YOLO11 for custom workout routines? -Yes, Ultralytics YOLOv8 can be adapted for custom workout routines. The `AIGym` class supports different pose types such as "pushup", "pullup", and "abworkout." You can specify keypoints and angles to detect specific exercises. Here is an example setup: +Yes, Ultralytics YOLO11 can be adapted for custom workout routines. The `AIGym` class supports different pose types such as `pushup`, `pullup`, and `abworkout`. You can specify keypoints and angles to detect specific exercises. Here is an example setup: ```python from ultralytics import solutions -gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="squat", - kpts_to_check=[6, 8, 10], +gym = solutions.AIGym( + line_width=2, + show=True, + kpts=[6, 8, 10], ) ``` For more details on setting arguments, refer to the [Arguments `AIGym`](#arguments-aigym) section. This flexibility allows you to monitor various exercises and customize routines based on your needs. -### How can I save the workout monitoring output using Ultralytics YOLOv8? +### How can I save the workout monitoring output using Ultralytics YOLO11? To save the workout monitoring output, you can modify the code to include a video writer that saves the processed frames. Here's an example: ```python import cv2 -from ultralytics import YOLO, solutions +from ultralytics import solutions -model = YOLO("yolov8n-pose.pt") cap = cv2.VideoCapture("path/to/video/file.mp4") assert cap.isOpened(), "Error reading video file" w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) -gym_object = solutions.AIGym( - line_thickness=2, - view_img=True, - pose_type="pushup", - kpts_to_check=[6, 8, 10], +gym = solutions.AIGym( + line_width=2, + show=True, + kpts=[6, 8, 10], ) while cap.isOpened(): @@ -226,12 +200,11 @@ while cap.isOpened(): if not success: print("Video frame is empty or video processing has been successfully completed.") break - results = model.track(im0, verbose=False) - im0 = gym_object.start_counting(im0, results) + im0 = gym.monitor(im0) video_writer.write(im0) cv2.destroyAllWindows() video_writer.release() ``` -This setup writes the monitored video to an output file. For more details, refer to the [Workouts Monitoring with Save Output](#workouts-monitoring-using-ultralytics-yolov8) section. +This setup writes the monitored video to an output file. For more details, refer to the [Workouts Monitoring with Save Output](#workouts-monitoring-using-ultralytics-yolo11) section. diff --git a/docs/en/guides/yolo-common-issues.md b/docs/en/guides/yolo-common-issues.md index 6da5d164e9e..19a5eb421b1 100644 --- a/docs/en/guides/yolo-common-issues.md +++ b/docs/en/guides/yolo-common-issues.md @@ -1,7 +1,7 @@ --- comments: true -description: Comprehensive guide to troubleshoot common YOLOv8 issues, from installation errors to model training challenges. Enhance your Ultralytics projects with our expert tips. -keywords: YOLO, YOLOv8, troubleshooting, installation errors, model training, GPU issues, Ultralytics, AI, computer vision, deep learning, Python, CUDA, PyTorch, debugging +description: Comprehensive guide to troubleshoot common YOLO11 issues, from installation errors to model training challenges. Enhance your Ultralytics projects with our expert tips. +keywords: YOLO, YOLO11, troubleshooting, installation errors, model training, GPU issues, Ultralytics, AI, computer vision, deep learning, Python, CUDA, PyTorch, debugging --- # Troubleshooting Common YOLO Issues @@ -12,7 +12,7 @@ keywords: YOLO, YOLOv8, troubleshooting, installation errors, model training, GP ## Introduction -This guide serves as a comprehensive aid for troubleshooting common issues encountered while working with YOLOv8 on your Ultralytics projects. Navigating through these issues can be a breeze with the right guidance, ensuring your projects remain on track without unnecessary delays. +This guide serves as a comprehensive aid for troubleshooting common issues encountered while working with YOLO11 on your Ultralytics projects. Navigating through these issues can be a breeze with the right guidance, ensuring your projects remain on track without unnecessary delays.


@@ -22,7 +22,7 @@ This guide serves as a comprehensive aid for troubleshooting common issues encou allowfullscreen>
- Watch: Ultralytics YOLOv8 Common Issues | Installation Errors, Model Training Issues + Watch: Ultralytics YOLO11 Common Issues | Installation Errors, Model Training Issues

## Common Issues @@ -41,7 +41,7 @@ Installation errors can arise due to various reasons, such as incompatible versi Additionally, here are some common installation issues users have encountered, along with their respective solutions: -- Import Errors or Dependency Issues - If you're getting errors during the import of YOLOv8, or you're having issues related to dependencies, consider the following troubleshooting steps: +- Import Errors or Dependency Issues - If you're getting errors during the import of YOLO11, or you're having issues related to dependencies, consider the following troubleshooting steps: - **Fresh Installation**: Sometimes, starting with a fresh installation can resolve unexpected issues. Especially with libraries like Ultralytics, where updates might introduce changes to the file tree structure or functionalities. @@ -53,7 +53,7 @@ Additionally, here are some common installation issues users have encountered, a - Remember, keeping your libraries and dependencies up-to-date is crucial for a smooth and error-free experience. -- Running YOLOv8 on GPU - If you're having trouble running YOLOv8 on GPU, consider the following troubleshooting steps: +- Running YOLO11 on GPU - If you're having trouble running YOLO11 on GPU, consider the following troubleshooting steps: - **Verify CUDA Compatibility and Installation**: Ensure your GPU is CUDA compatible and that CUDA is correctly installed. Use the `nvidia-smi` command to check the status of your NVIDIA GPU and CUDA version. @@ -63,7 +63,7 @@ Additionally, here are some common installation issues users have encountered, a - **Update Your Packages**: Outdated packages might not be compatible with your GPU. Keep them updated. - - **Program Configuration**: Check if the program or code specifies GPU usage. In YOLOv8, this might be in the settings or configuration. + - **Program Configuration**: Check if the program or code specifies GPU usage. In YOLO11, this might be in the settings or configuration. ### Model Training Issues @@ -119,7 +119,7 @@ You can access these metrics from the training logs or by using tools like Tenso **Solution**: To track and visualize training progress, you can consider using the following tools: -- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, [accuracy](https://www.ultralytics.com/glossary/accuracy), and more. You can integrate it with your YOLOv8 training process. +- [TensorBoard](https://www.tensorflow.org/tensorboard): TensorBoard is a popular choice for visualizing training metrics, including loss, [accuracy](https://www.ultralytics.com/glossary/accuracy), and more. You can integrate it with your YOLO11 training process. - [Comet](https://bit.ly/yolov8-readme-comet): Comet provides an extensive toolkit for experiment tracking and comparison. It allows you to track metrics, hyperparameters, and even model weights. Integration with YOLO models is also straightforward, providing you with a complete overview of your experiment cycle. - [Ultralytics HUB](https://hub.ultralytics.com/): Ultralytics HUB offers a specialized environment for tracking YOLO models, giving you a one-stop platform to manage metrics, datasets, and even collaborate with your team. Given its tailored focus on YOLO, it offers more customized tracking options. @@ -177,13 +177,13 @@ Here are some things to keep in mind, if you are facing issues related to model This section will address common issues faced during model prediction. -#### Getting Bounding Box Predictions With Your YOLOv8 Custom Model +#### Getting Bounding Box Predictions With Your YOLO11 Custom Model -**Issue**: When running predictions with a custom YOLOv8 model, there are challenges with the format and visualization of the bounding box coordinates. +**Issue**: When running predictions with a custom YOLO11 model, there are challenges with the format and visualization of the bounding box coordinates. **Solution**: -- Coordinate Format: YOLOv8 provides bounding box coordinates in absolute pixel values. To convert these to relative coordinates (ranging from 0 to 1), you need to divide by the image dimensions. For example, let's say your image size is 640x640. Then you would do the following: +- Coordinate Format: YOLO11 provides bounding box coordinates in absolute pixel values. To convert these to relative coordinates (ranging from 0 to 1), you need to divide by the image dimensions. For example, let's say your image size is 640x640. Then you would do the following: ```python # Convert absolute coordinates to relative coordinates @@ -195,33 +195,33 @@ y2 = y2 / 640 - File Name: To obtain the file name of the image you're predicting on, access the image file path directly from the result object within your prediction loop. -#### Filtering Objects in YOLOv8 Predictions +#### Filtering Objects in YOLO11 Predictions -**Issue**: Facing issues with how to filter and display only specific objects in the prediction results when running YOLOv8 using the Ultralytics library. +**Issue**: Facing issues with how to filter and display only specific objects in the prediction results when running YOLO11 using the Ultralytics library. **Solution**: To detect specific classes use the classes argument to specify the classes you want to include in the output. For instance, to detect only cars (assuming 'cars' have class index 2): ```shell -yolo task=detect mode=segment model=yolov8n-seg.pt source='path/to/car.mp4' show=True classes=2 +yolo task=detect mode=segment model=yolo11n-seg.pt source='path/to/car.mp4' show=True classes=2 ``` -#### Understanding Precision Metrics in YOLOv8 +#### Understanding Precision Metrics in YOLO11 -**Issue**: Confusion regarding the difference between box precision, mask precision, and [confusion matrix](https://www.ultralytics.com/glossary/confusion-matrix) precision in YOLOv8. +**Issue**: Confusion regarding the difference between box precision, mask precision, and [confusion matrix](https://www.ultralytics.com/glossary/confusion-matrix) precision in YOLO11. **Solution**: Box precision measures the accuracy of predicted bounding boxes compared to the actual ground truth boxes using IoU (Intersection over Union) as the metric. Mask precision assesses the agreement between predicted segmentation masks and ground truth masks in pixel-wise object classification. Confusion matrix precision, on the other hand, focuses on overall classification accuracy across all classes and does not consider the geometric accuracy of predictions. It's important to note that a [bounding box](https://www.ultralytics.com/glossary/bounding-box) can be geometrically accurate (true positive) even if the class prediction is wrong, leading to differences between box precision and confusion matrix precision. These metrics evaluate distinct aspects of a model's performance, reflecting the need for different evaluation metrics in various tasks. -#### Extracting Object Dimensions in YOLOv8 +#### Extracting Object Dimensions in YOLO11 -**Issue**: Difficulty in retrieving the length and height of detected objects in YOLOv8, especially when multiple objects are detected in an image. +**Issue**: Difficulty in retrieving the length and height of detected objects in YOLO11, especially when multiple objects are detected in an image. -**Solution**: To retrieve the bounding box dimensions, first use the Ultralytics YOLOv8 model to predict objects in an image. Then, extract the width and height information of bounding boxes from the prediction results. +**Solution**: To retrieve the bounding box dimensions, first use the Ultralytics YOLO11 model to predict objects in an image. Then, extract the width and height information of bounding boxes from the prediction results. ```python from ultralytics import YOLO -# Load a pre-trained YOLOv8 model -model = YOLO("yolov8n.pt") +# Load a pre-trained YOLO11 model +model = YOLO("yolo11n.pt") # Specify the source image source = "https://ultralytics.com/images/bus.jpg" @@ -264,23 +264,23 @@ for box in boxes: ## Community and Support -Engaging with a community of like-minded individuals can significantly enhance your experience and success in working with YOLOv8. Below are some channels and resources you may find helpful. +Engaging with a community of like-minded individuals can significantly enhance your experience and success in working with YOLO11. Below are some channels and resources you may find helpful. ### Forums and Channels for Getting Help -**GitHub Issues:** The YOLOv8 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems. +**GitHub Issues:** The YOLO11 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems. **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://discord.com/invite/ultralytics) where you can interact with other users and the developers. ### Official Documentation and Resources -**Ultralytics YOLOv8 Docs**: The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting. +**Ultralytics YOLO11 Docs**: The [official documentation](../index.md) provides a comprehensive overview of YOLO11, along with guides on installation, usage, and troubleshooting. -These resources should provide a solid foundation for troubleshooting and improving your YOLOv8 projects, as well as connecting with others in the YOLOv8 community. +These resources should provide a solid foundation for troubleshooting and improving your YOLO11 projects, as well as connecting with others in the YOLO11 community. ## Conclusion -Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLOv8 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. +Troubleshooting is an integral part of any development process, and being equipped with the right knowledge can significantly reduce the time and effort spent in resolving issues. This guide aimed to address the most common challenges faced by users of the YOLO11 model within the Ultralytics ecosystem. By understanding and addressing these common issues, you can ensure smoother project progress and achieve better results with your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. Remember, the Ultralytics community is a valuable resource. Engaging with fellow developers and experts can provide additional insights and solutions that might not be covered in standard documentation. Always keep learning, experimenting, and sharing your experiences to contribute to the collective knowledge of the community. @@ -288,11 +288,11 @@ Happy troubleshooting! ## FAQ -### How do I resolve installation errors with YOLOv8? +### How do I resolve installation errors with YOLO11? Installation errors can often be due to compatibility issues or missing dependencies. Ensure you use Python 3.8 or later and have PyTorch 1.8 or later installed. It's beneficial to use virtual environments to avoid conflicts. For a step-by-step installation guide, follow our [official installation guide](../quickstart.md). If you encounter import errors, try a fresh installation or update the library to the latest version. -### Why is my YOLOv8 model training slow on a single GPU? +### Why is my YOLO11 model training slow on a single GPU? Training on a single GPU might be slow due to large batch sizes or insufficient memory. To speed up training, use multiple GPUs. Ensure your system has multiple GPUs available and adjust your `.yaml` configuration file to specify the number of GPUs, e.g., `gpus: 4`. Increase the batch size accordingly to fully utilize the GPUs without exceeding memory limits. Example command: @@ -300,7 +300,7 @@ Training on a single GPU might be slow due to large batch sizes or insufficient model.train(data="/path/to/your/data.yaml", batch=32, multi_scale=True) ``` -### How can I ensure my YOLOv8 model is training on the GPU? +### How can I ensure my YOLO11 model is training on the GPU? If the 'device' value shows 'null' in the training logs, it generally means the training process is set to automatically use an available GPU. To explicitly assign a specific GPU, set the 'device' value in your `.yaml` configuration file. For instance: @@ -310,10 +310,10 @@ device: 0 This sets the training process to the first GPU. Consult the `nvidia-smi` command to confirm your CUDA setup. -### How can I monitor and track my YOLOv8 model training progress? +### How can I monitor and track my YOLO11 model training progress? Tracking and visualizing training progress can be efficiently managed through tools like [TensorBoard](https://www.tensorflow.org/tensorboard), [Comet](https://bit.ly/yolov8-readme-comet), and [Ultralytics HUB](https://hub.ultralytics.com/). These tools allow you to log and visualize metrics such as loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP. Implementing [early stopping](#continuous-monitoring-parameters) based on these metrics can also help achieve better training outcomes. -### What should I do if YOLOv8 is not recognizing my dataset format? +### What should I do if YOLO11 is not recognizing my dataset format? Ensure your dataset and labels conform to the expected format. Verify that annotations are accurate and of high quality. If you face any issues, refer to the [Data Collection and Annotation](https://docs.ultralytics.com/guides/data-collection-and-annotation/) guide for best practices. For more dataset-specific guidance, check the [Datasets](https://docs.ultralytics.com/datasets/) section in the documentation. diff --git a/docs/en/guides/yolo-performance-metrics.md b/docs/en/guides/yolo-performance-metrics.md index aeed82355d1..27ad142dfa9 100644 --- a/docs/en/guides/yolo-performance-metrics.md +++ b/docs/en/guides/yolo-performance-metrics.md @@ -1,14 +1,14 @@ --- comments: true -description: Explore essential YOLOv8 performance metrics like mAP, IoU, F1 Score, Precision, and Recall. Learn how to calculate and interpret them for model evaluation. -keywords: YOLOv8 performance metrics, mAP, IoU, F1 Score, Precision, Recall, object detection, Ultralytics +description: Explore essential YOLO11 performance metrics like mAP, IoU, F1 Score, Precision, and Recall. Learn how to calculate and interpret them for model evaluation. +keywords: YOLO11 performance metrics, mAP, IoU, F1 Score, Precision, Recall, object detection, Ultralytics --- # Performance Metrics Deep Dive ## Introduction -Performance metrics are key tools to evaluate the [accuracy](https://www.ultralytics.com/glossary/accuracy) and efficiency of [object detection](https://www.ultralytics.com/glossary/object-detection) models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLOv8, their significance, and how to interpret them. +Performance metrics are key tools to evaluate the [accuracy](https://www.ultralytics.com/glossary/accuracy) and efficiency of [object detection](https://www.ultralytics.com/glossary/object-detection) models. They shed light on how effectively a model can identify and localize objects within images. Additionally, they help in understanding the model's handling of false positives and false negatives. These insights are crucial for evaluating and enhancing the model's performance. In this guide, we will explore various performance metrics associated with YOLO11, their significance, and how to interpret them.


@@ -18,12 +18,12 @@ Performance metrics are key tools to evaluate the [accuracy](https://www.ultraly allowfullscreen>
- Watch: Ultralytics YOLOv8 Performance Metrics | MAP, F1 Score, [Precision](https://www.ultralytics.com/glossary/precision), IoU & Accuracy + Watch: Ultralytics YOLO11 Performance Metrics | MAP, F1 Score, Precision, IoU & Accuracy

## Object Detection Metrics -Let's start by discussing some metrics that are not only important to YOLOv8 but are broadly applicable across different object detection models. +Let's start by discussing some metrics that are not only important to YOLO11 but are broadly applicable across different object detection models. - **[Intersection over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU):** IoU is a measure that quantifies the overlap between a predicted [bounding box](https://www.ultralytics.com/glossary/bounding-box) and a ground truth bounding box. It plays a fundamental role in evaluating the accuracy of object localization. @@ -35,9 +35,9 @@ Let's start by discussing some metrics that are not only important to YOLOv8 but - **F1 Score:** The F1 Score is the harmonic mean of precision and recall, providing a balanced assessment of a model's performance while considering both false positives and false negatives. -## How to Calculate Metrics for YOLOv8 Model +## How to Calculate Metrics for YOLO11 Model -Now, we can explore [YOLOv8's Validation mode](../modes/val.md) that can be used to compute the above discussed evaluation metrics. +Now, we can explore [YOLO11's Validation mode](../modes/val.md) that can be used to compute the above discussed evaluation metrics. Using the validation mode is simple. Once you have a trained model, you can invoke the model.val() function. This function will then process the validation dataset and return a variety of performance metrics. But what do these metrics mean? And how should you interpret them? @@ -91,7 +91,7 @@ The model.val() function, apart from producing numeric metrics, also yields visu - **Validation Batch Labels (`val_batchX_labels.jpg`)**: These images depict the ground truth labels for distinct batches from the validation dataset. They provide a clear picture of what the objects are and their respective locations as per the dataset. -- **Validation Batch Predictions (`val_batchX_pred.jpg`)**: Contrasting the label images, these visuals display the predictions made by the YOLOv8 model for the respective batches. By comparing these to the label images, you can easily assess how well the model detects and classifies objects visually. +- **Validation Batch Predictions (`val_batchX_pred.jpg`)**: Contrasting the label images, these visuals display the predictions made by the YOLO11 model for the respective batches. By comparing these to the label images, you can easily assess how well the model detects and classifies objects visually. #### Results Storage @@ -153,56 +153,56 @@ Real-world examples can help clarify how these metrics work in practice. ## Connect and Collaborate -Tapping into a community of enthusiasts and experts can amplify your journey with YOLOv8. Here are some avenues that can facilitate learning, troubleshooting, and networking. +Tapping into a community of enthusiasts and experts can amplify your journey with YOLO11. Here are some avenues that can facilitate learning, troubleshooting, and networking. ### Engage with the Broader Community -- **GitHub Issues:** The YOLOv8 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems. +- **GitHub Issues:** The YOLO11 repository on GitHub has an [Issues tab](https://github.com/ultralytics/ultralytics/issues) where you can ask questions, report bugs, and suggest new features. The community and maintainers are active here, and it's a great place to get help with specific problems. - **Ultralytics Discord Server:** Ultralytics has a [Discord server](https://discord.com/invite/ultralytics) where you can interact with other users and the developers. ### Official Documentation and Resources: -- **Ultralytics YOLOv8 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLOv8, along with guides on installation, usage, and troubleshooting. +- **Ultralytics YOLO11 Docs:** The [official documentation](../index.md) provides a comprehensive overview of YOLO11, along with guides on installation, usage, and troubleshooting. -Using these resources will not only guide you through any challenges but also keep you updated with the latest trends and best practices in the YOLOv8 community. +Using these resources will not only guide you through any challenges but also keep you updated with the latest trends and best practices in the YOLO11 community. ## Conclusion -In this guide, we've taken a close look at the essential performance metrics for YOLOv8. These metrics are key to understanding how well a model is performing and are vital for anyone aiming to fine-tune their models. They offer the necessary insights for improvements and to make sure the model works effectively in real-life situations. +In this guide, we've taken a close look at the essential performance metrics for YOLO11. These metrics are key to understanding how well a model is performing and are vital for anyone aiming to fine-tune their models. They offer the necessary insights for improvements and to make sure the model works effectively in real-life situations. -Remember, the YOLOv8 and Ultralytics community is an invaluable asset. Engaging with fellow developers and experts can open doors to insights and solutions not found in standard documentation. As you journey through object detection, keep the spirit of learning alive, experiment with new strategies, and share your findings. By doing so, you contribute to the community's collective wisdom and ensure its growth. +Remember, the YOLO11 and Ultralytics community is an invaluable asset. Engaging with fellow developers and experts can open doors to insights and solutions not found in standard documentation. As you journey through object detection, keep the spirit of learning alive, experiment with new strategies, and share your findings. By doing so, you contribute to the community's collective wisdom and ensure its growth. Happy object detecting! ## FAQ -### What is the significance of [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) in evaluating YOLOv8 model performance? +### What is the significance of [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP) in evaluating YOLO11 model performance? -Mean Average Precision (mAP) is crucial for evaluating YOLOv8 models as it provides a single metric encapsulating precision and recall across multiple classes. mAP@0.50 measures precision at an IoU threshold of 0.50, focusing on the model's ability to detect objects correctly. mAP@0.50:0.95 averages precision across a range of IoU thresholds, offering a comprehensive assessment of detection performance. High mAP scores indicate that the model effectively balances precision and recall, essential for applications like autonomous driving and surveillance. +Mean Average Precision (mAP) is crucial for evaluating YOLO11 models as it provides a single metric encapsulating precision and recall across multiple classes. mAP@0.50 measures precision at an IoU threshold of 0.50, focusing on the model's ability to detect objects correctly. mAP@0.50:0.95 averages precision across a range of IoU thresholds, offering a comprehensive assessment of detection performance. High mAP scores indicate that the model effectively balances precision and recall, essential for applications like autonomous driving and surveillance. -### How do I interpret the Intersection over Union (IoU) value for YOLOv8 object detection? +### How do I interpret the Intersection over Union (IoU) value for YOLO11 object detection? Intersection over Union (IoU) measures the overlap between the predicted and ground truth bounding boxes. IoU values range from 0 to 1, where higher values indicate better localization accuracy. An IoU of 1.0 means perfect alignment. Typically, an IoU threshold of 0.50 is used to define true positives in metrics like mAP. Lower IoU values suggest that the model struggles with precise object localization, which can be improved by refining bounding box regression or increasing annotation accuracy. -### Why is the F1 Score important for evaluating YOLOv8 models in object detection? +### Why is the F1 Score important for evaluating YOLO11 models in object detection? -The F1 Score is important for evaluating YOLOv8 models because it provides a harmonic mean of precision and recall, balancing both false positives and false negatives. It is particularly valuable when dealing with imbalanced datasets or applications where either precision or recall alone is insufficient. A high F1 Score indicates that the model effectively detects objects while minimizing both missed detections and false alarms, making it suitable for critical applications like security systems and medical imaging. +The F1 Score is important for evaluating YOLO11 models because it provides a harmonic mean of precision and recall, balancing both false positives and false negatives. It is particularly valuable when dealing with imbalanced datasets or applications where either precision or recall alone is insufficient. A high F1 Score indicates that the model effectively detects objects while minimizing both missed detections and false alarms, making it suitable for critical applications like security systems and medical imaging. -### What are the key advantages of using Ultralytics YOLOv8 for real-time object detection? +### What are the key advantages of using Ultralytics YOLO11 for real-time object detection? -Ultralytics YOLOv8 offers multiple advantages for real-time object detection: +Ultralytics YOLO11 offers multiple advantages for real-time object detection: - **Speed and Efficiency**: Optimized for high-speed inference, suitable for applications requiring low latency. - **High Accuracy**: Advanced algorithm ensures high mAP and IoU scores, balancing precision and recall. - **Flexibility**: Supports various tasks including object detection, segmentation, and classification. - **Ease of Use**: User-friendly interfaces, extensive documentation, and seamless integration with platforms like Ultralytics HUB ([HUB Quickstart](../hub/quickstart.md)). -This makes YOLOv8 ideal for diverse applications from autonomous vehicles to smart city solutions. +This makes YOLO11 ideal for diverse applications from autonomous vehicles to smart city solutions. -### How can validation metrics from YOLOv8 help improve model performance? +### How can validation metrics from YOLO11 help improve model performance? -Validation metrics from YOLOv8 like precision, recall, mAP, and IoU help diagnose and improve model performance by providing insights into different aspects of detection: +Validation metrics from YOLO11 like precision, recall, mAP, and IoU help diagnose and improve model performance by providing insights into different aspects of detection: - **Precision**: Helps identify and minimize false positives. - **Recall**: Ensures all relevant objects are detected. diff --git a/docs/en/guides/yolo-thread-safe-inference.md b/docs/en/guides/yolo-thread-safe-inference.md index c086685152b..b66af30ab07 100644 --- a/docs/en/guides/yolo-thread-safe-inference.md +++ b/docs/en/guides/yolo-thread-safe-inference.md @@ -33,7 +33,7 @@ from threading import Thread from ultralytics import YOLO # Instantiate the model outside the thread -shared_model = YOLO("yolov8n.pt") +shared_model = YOLO("yolo11n.pt") def predict(image_path): @@ -60,8 +60,8 @@ from threading import Thread from ultralytics import YOLO # Instantiate multiple models outside the thread -shared_model_1 = YOLO("yolov8n_1.pt") -shared_model_2 = YOLO("yolov8n_2.pt") +shared_model_1 = YOLO("yolo11n_1.pt") +shared_model_2 = YOLO("yolo11n_2.pt") def predict(model, image_path): @@ -94,7 +94,7 @@ from ultralytics import YOLO def thread_safe_predict(image_path): """Predict on an image using a new YOLO model instance in a thread-safe manner; takes image path as input.""" - local_model = YOLO("yolov8n.pt") + local_model = YOLO("yolo11n.pt") results = local_model.predict(image_path) # Process results @@ -128,7 +128,7 @@ from ultralytics import YOLO def thread_safe_predict(image_path): """Predict on an image in a thread-safe manner.""" - local_model = YOLO("yolov8n.pt") + local_model = YOLO("yolo11n.pt") results = local_model.predict(image_path) # Process results @@ -157,7 +157,7 @@ from ultralytics import YOLO def thread_safe_predict(image_path): """Runs inference in a thread-safe manner with a new YOLO model instance.""" - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.predict(image_path) # Process results diff --git a/docs/en/help/CI.md b/docs/en/help/CI.md index 93b1ad32228..140303859cd 100644 --- a/docs/en/help/CI.md +++ b/docs/en/help/CI.md @@ -12,8 +12,8 @@ Continuous Integration (CI) is an essential aspect of software development which Here's a brief description of our CI actions: -- **[CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml):** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository. -- **[Docker Deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml):** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly. +- **[CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml):** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository. +- **[Docker Deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml):** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly. - **[Broken Links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml):** This test scans the codebase for any broken or dead links in our markdown or HTML files. - **[CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml):** CodeQL is a tool from GitHub that performs semantic analysis on our code, helping to find potential security vulnerabilities and maintain high-quality code. - **[PyPI Publishing](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml):** This test checks if the project can be packaged and published to PyPi without any errors. @@ -22,13 +22,18 @@ Here's a brief description of our CI actions: Below is the table showing the status of these CI tests for our main repositories: -| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPI and Docs Publishing | -| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [yolov3](https://github.com/ultralytics/yolov3) | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml) | | -| [yolov5](https://github.com/ultralytics/yolov5) | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml) | | -| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml) | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) | -| [hub](https://github.com/ultralytics/hub) | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml) | | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | | -| [docs](https://github.com/ultralytics/docs) | | | [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml)[![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) | | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) | +| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPI and Docs Publishing | +| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [yolov3](https://github.com/ultralytics/yolov3) | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/github-code-scanning/codeql) | | +| [yolov5](https://github.com/ultralytics/yolov5) | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/github-code-scanning/codeql) | | +| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml) | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) | +| [hub-sdk](https://github.com/ultralytics/hub-sdk) | [![HUB-SDK CI](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml) | | [![Check Broken links](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/hub-sdk/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml) | +| [hub](https://github.com/ultralytics/hub) | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yml) | | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | | +| [mkdocs](https://github.com/ultralytics/mkdocs) | [![Ultralytics Actions](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml) | | | [![CodeQL](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml) | +| [thop](https://github.com/ultralytics/thop) | [![Ultralytics Actions](https://github.com/ultralytics/thop/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/thop/actions/workflows/format.yml) | | | [![CodeQL](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI](https://github.com/ultralytics/thop/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml) | +| [actions](https://github.com/ultralytics/mkdocs) | [![Ultralytics Actions](https://github.com/ultralytics/actions/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/format.yml) | | | [![CodeQL](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI](https://github.com/ultralytics/actions/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/publish.yml) | +| [docs](https://github.com/ultralytics/docs) | [![Ultralytics Actions](https://github.com/ultralytics/docs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/format.yml) | | [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml)[![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) | | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) | +| [handbook](https://github.com/ultralytics/handbook) | [![Ultralytics Actions](https://github.com/ultralytics/handbook/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/format.yml) | | [![Check Broken links](https://github.com/ultralytics/handbook/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/links.yml) | | [![pages-build-deployment](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment) | Each badge shows the status of the last run of the corresponding CI test on the `main` branch of the respective repository. If a test fails, the badge will display a "failing" status, and if it passes, it will display a "passing" status. @@ -56,7 +61,7 @@ To quickly get a glimpse of the code coverage status of the `ultralytics` python In the sunburst graphic below, the innermost circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively. - + Ultralytics Codecov Image @@ -64,7 +69,7 @@ In the sunburst graphic below, the innermost circle is the entire project, movin ### What is Continuous Integration (CI) in Ultralytics? -Continuous Integration (CI) in Ultralytics involves automatically integrating and testing code changes to ensure high-quality standards. Our CI setup includes running [unit tests, linting checks, and comprehensive tests](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml). Additionally, we perform [Docker deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml), [broken link checks](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml), [CodeQL analysis](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) for security vulnerabilities, and [PyPI publishing](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) to package and distribute our software. +Continuous Integration (CI) in Ultralytics involves automatically integrating and testing code changes to ensure high-quality standards. Our CI setup includes running [unit tests, linting checks, and comprehensive tests](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yml). Additionally, we perform [Docker deployment](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml), [broken link checks](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml), [CodeQL analysis](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) for security vulnerabilities, and [PyPI publishing](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) to package and distribute our software. ### How does Ultralytics check for broken links in documentation and code? @@ -76,7 +81,7 @@ Ultralytics uses a specific CI action to [check for broken links](https://github ### How does Ultralytics utilize Docker for deployment? -Ultralytics employs Docker to validate the deployment of our projects through a dedicated CI action. This process ensures that our [Dockerfile and associated scripts](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) are functioning correctly, allowing for consistent and reproducible deployment environments which are critical for scalable and reliable AI solutions. +Ultralytics employs Docker to validate the deployment of our projects through a dedicated CI action. This process ensures that our [Dockerfile and associated scripts](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yml) are functioning correctly, allowing for consistent and reproducible deployment environments which are critical for scalable and reliable AI solutions. ### What is the role of automated PyPI publishing in Ultralytics? diff --git a/docs/en/help/CLA.md b/docs/en/help/CLA.md index ebee3c4dc0d..58765c2f2e4 100644 --- a/docs/en/help/CLA.md +++ b/docs/en/help/CLA.md @@ -5,46 +5,126 @@ keywords: Ultralytics, Contributor License Agreement, open source, contributions # Ultralytics Individual Contributor License Agreement -Thank you for your interest in contributing to open source software projects (โ€œProjectsโ€) made available by Ultralytics Inc. (โ€œUltralyticsโ€). This Individual Contributor License Agreement (โ€œAgreementโ€) sets out the terms governing any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that you submit or have submitted, in any form and in any manner, to Ultralytics in respect of any Projects (collectively โ€œContributionsโ€). If you have any questions respecting this Agreement, please contact hello@ultralytics.com. +Thank you for your interest in contributing to software projects managed by Ultralytics Inc. ("**Ultralytics**", "**We**" or "**Us**"). This Contributor License Agreement ("**Agreement**") sets out the rights granted by contributors ("**You**" or "**Your**") to Us and the terms governing any contributions as defined in Section 1. This license is for your protection as a Contributor as well as the protection of Ultralytics; it does not change your rights to use your own Contributions for any other purpose. -You agree that the following terms apply to all of your past, present and future Contributions. Except for the licenses granted in this Agreement, you retain all of your right, title and interest in and to your Contributions. +By accepting and agreeing to these terms and conditions You accept and agree to the following terms and conditions for Your past, present and future Contributions submitted to Ultralytics. Except for the license granted herein to Ultralytics and recipients of software distributed by Ultralytics, You reserve all right, title, and interest in and to Your Contributions. -**Copyright License.** You hereby grant, and agree to grant, to Ultralytics a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute your Contributions and such derivative works, with the right to sublicense the foregoing rights through multiple tiers of sublicensees. +If you have any questions respecting this Agreement, please contact hello@ultralytics.com. -**Patent License.** You hereby grant, and agree to grant, to Ultralytics a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer your Contributions, where such license applies only to those patent claims licensable by you that are necessarily infringed by your Contributions alone or by combination of your Contributions with the Project to which such Contributions were submitted, with the right to sublicense the foregoing rights through multiple tiers of sublicensees. +## 1. Definitions -**Moral Rights.** To the fullest extent permitted under applicable law, you hereby waive, and agree not to assert, all of your โ€œmoral rightsโ€ in or relating to your Contributions for the benefit of Ultralytics, its assigns, and their respective direct and indirect sublicensees. +### 1.1 "You" or "Your" -**Third Party Content/Rights.** If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that were not authored by you (โ€œThird Party Contentโ€) or if you are aware of any third party intellectual property or proprietary rights associated with your Contribution (โ€œThird Party Rightsโ€), then you agree to include with the submission of your Contribution full details respecting such Third Party Content and Third Party Rights, including, without limitation, identification of which aspects of your Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights do not apply to any portion of a Project that is incorporated into your Contribution to that same Project. +Shall mean the individual who submits a Contribution to Ultralytics or the legal entity authorized by the copyright owner that is making this Agreement with Ultralytics. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. -**Representations.** You represent that, other than the Third Party Content and Third Party Rights identified by you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were created in the course of your employment with your past or present employer(s), you represent that such employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer(s) has waived all of their right, title or interest in or to your Contributions. +### 1.2 "Contribution" -**Disclaimer.** To the fullest extent permitted under applicable law, your Contributions are provided on an "asis" basis, without any warranties or conditions, express or implied, including, without limitation, any implied warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not required to provide support for your Contributions, except to the extent you desire to provide support. +Shall mean any original work of authorship, including but not limited to source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information, or any other works of authorship, that is intentionally submitted by You to Ultralytics, in any form and in any manner, for inclusion in, or documentation of, any of the projects managed by Ultralytics (the "**Work**"). This includes any modifications or additions to existing works that are submitted for the purpose of contributing to a Project and improving the Work. -**No Obligation.** You acknowledge that Ultralytics is under no obligation to use or incorporate your Contributions into any of the Projects. The decision to use or incorporate your Contributions into any of the Projects will be made at the sole discretion of Ultralytics or its authorized delegates. +### 1.3 "Copyright" -**Disputes.** This Agreement shall be governed by and construed in accordance with the laws of the State of New York, United States of America, without giving effect to its principles or rules regarding conflicts of laws, other than such principles directing application of New York law. The parties hereby submit to venue in, and jurisdiction of the courts located in New York, New York for purposes relating to this Agreement. In the event that any of the provisions of this Agreement shall be held by a court or other tribunal of competent jurisdiction to be unenforceable, the remaining portions hereof shall remain in full force and effect. +Means all rights protecting works of authorship owned or controlled by You, including copyright, moral and neighboring rights, as appropriate, for the full term of their existence including any extensions by You. -**Assignment.** You agree that Ultralytics may assign this Agreement, and all of its rights, obligations and licenses hereunder. +### 1.4 "Submit" or "Submission" or "Submitted" + +Or any derivatives shall mean any form of electronic, verbal, or written communication sent to Ultralytics or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, Ultralytics for the purpose of discussing and improving the Project, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." + +### 1.5 "Project" + +Shall mean any of the software projects owned, managed, or maintained by Ultralytics, including but not limited to open-source projects made available by Ultralytics to which Contributions may be submitted. + +## 2. Grant of Rights + +### 2.1 Copyright License + +To the maximum extent permitted by the relevant law, and subject to the terms and conditions of this Agreement, You hereby grant to Ultralytics and to recipients of software distributed by Ultralytics a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. + +### 2.2 Patent License + +To the maximum extent permitted by the relevant law, and subject to the terms and conditions of this Agreement, You hereby grant to Ultralytics and to recipients of software distributed by Ultralytics a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. + +### 2.3 Outbound License + +Based on the grant of rights in Sections 2.1 and 2.2, if We include Your Contribution in a Material, We may license the Contribution under any license, including copyleft, permissive, commercial, or proprietary licenses. + +### 2.4 Moral Rights + +To the fullest extent permitted by law, You hereby waive, and agree not to assert, all of Your "moral rights" in or relating to Your Contributions for the benefit of Ultralytics, its assigns, and their respective direct and indirect sublicensees. + +## 3. Representations and Warranties + +You represent that: + +(a) You have the legal authority to enter into this Agreement. + +(b) You own the Copyright and patent claims covering the Contribution which are required to grant the rights under Section 2. + +(c) The grant of rights under Section 2 does not violate any grant of rights which You have made to third parties, including Your employer. If Your Contributions were created in the course of Your employment with Your past or present employer(s), You represent that such employer(s) has authorized You to make Contributions on behalf of such employer(s) or such employer(s) has waived all of their right, title, or interest in or to Your Contributions. + +(d) You have followed the instructions provided by Ultralytics if You do not own the Copyright in the entire work of authorship submitted. + +(e) Should You wish to submit work that is not Your original creation, You may submit it to Ultralytics separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which You are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]." + +(f) You agree to notify Ultralytics of any facts or circumstances of which You become aware that would make these representations inaccurate in any respect. + +## 4. Disclaimer of Warranties + +EXCEPT FOR THE EXPRESS WARRANTIES IN SECTION 3, THE CONTRIBUTION IS PROVIDED "AS IS". MORE PARTICULARLY, ALL EXPRESS OR IMPLIED WARRANTIES INCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED BY YOU TO US. TO THE EXTENT THAT ANY SUCH WARRANTIES CANNOT BE DISCLAIMED, SUCH WARRANTY IS LIMITED IN DURATION TO THE MINIMUM PERIOD PERMITTED BY LAW. + +## 5. Miscellaneous + +### 5.1 Governing Law and Jurisdiction + +This Agreement will be governed by and construed in accordance with the laws of the State of New York, United States of America, excluding its conflicts of law provisions. The parties submit to venue in, and jurisdiction of, the courts located in New York, New York, for purposes relating to this Agreement. You waive all defenses of lack of personal jurisdiction and forum non-conveniens. + +### 5.2 Entire Agreement + +This Agreement sets out the entire agreement between You and Ultralytics for Your Contributions and overrides all other agreements or understandings. + +### 5.3 Assignment + +Ultralytics may assign this Agreement, and all of its rights, obligations, and licenses hereunder, without Your prior consent. + +### 5.4 Waiver of Performance + +The failure of either party to require performance by the other party of any provision of this Agreement in one situation shall not affect the right of a party to require such performance at any time in the future. A waiver of performance under a provision in one situation shall not be considered a waiver of the performance of the provision in the future or a waiver of the provision in its entirety. + +### 5.5 Severability + +If any provision of this Agreement is found void and unenforceable, such provision will be replaced to the extent possible with a provision that comes closest to the meaning of the original provision and which is enforceable. The terms and conditions set forth in this Agreement shall apply notwithstanding any failure of essential purpose of this Agreement or any limited remedy to the maximum extent possible under law. + +### 5.6 No Obligation + +You acknowledge that Ultralytics is under no obligation to use or incorporate your Contributions into any of the Work. The decision to use or incorporate your Contributions into any of the Work will be made at the sole discretion of Ultralytics or its authorized delegates. + +### 5.7 Effective Date + +The Effective Date of this Agreement shall be the date You execute this Agreement or the date You first Submit a Contribution to Ultralytics, whichever is earlier. ## FAQ -### What is the purpose of the Ultralytics Individual Contributor License Agreement? +### What is the purpose of the Ultralytics Contributor License Agreement (CLA)? -The Ultralytics Individual Contributor License Agreement (ICLA) governs the terms under which you contribute to Ultralytics' open-source projects. It sets out the rights and obligations related to your contributions, including granting copyright and patent licenses, waiving moral rights, and disclosing any third-party content. +The Ultralytics CLA defines the terms under which you contribute to Ultralytics' software projects. It outlines the rights and obligations related to your contributions, including granting copyright and patent licenses, and addressing the handling of third-party content. -### Why do I need to agree to the Copyright License in the ICLA? +### Why do I need to agree to the Copyright License in the CLA? -Agreeing to the Copyright License allows Ultralytics to use and distribute your contributions, including making derivative works. This ensures that your contributions can be integrated into Ultralytics projects and shared with the community, fostering collaboration and software development. +Agreeing to the Copyright License allows Ultralytics and its users to use, modify, distribute, and create derivative works from your contributions. This ensures that your contributions can be integrated into Ultralytics projects and shared with the community, fostering collaboration and software development. ### How does the Patent License benefit both contributors and Ultralytics? -The Patent License grants Ultralytics the rights to use, make, and sell contributions covered by your patents, which is crucial for product development and commercialization. In return, it allows your patented innovations to be more widely used and recognized, promoting innovation within the community. +The Patent License grants Ultralytics the rights to use, make, and sell contributions covered by your patents. This is essential for product development and commercialization. In return, your patented innovations gain wider use and recognition, promoting innovation within the community. + +### What should I do if my contribution includes third-party content? + +If your contribution includes third-party content, you must clearly mark it and provide comprehensive details about its source and any applicable licenses or restrictions. This ensures proper attribution and legal compliance within Ultralytics projects, maintaining transparency and respecting the rights of original content creators. -### What should I do if my contribution contains third-party content? +### What happens if Ultralytics decides not to use my contribution? -If your contribution includes third-party content or you are aware of any third-party intellectual property rights, you must provide full details of such content and rights when submitting your contribution. This includes identifying the third-party content, its author, and the applicable license terms. For more information on third-party content, refer to the Third Party Content/Rights section of the Agreement. +Ultralytics is not obligated to use or incorporate your contributions into any projects. The decision to use your contributions is entirely at Ultralytics' discretion, meaning that while your contributions are valuable, they may not always align with the project's current needs or directions. + +--- -### What happens if Ultralytics does not use my contributions? +**Need More Help?** -Ultralytics is not obligated to use or incorporate your contributions into any projects. The decision to use or integrate contributions is at Ultralytics' sole discretion. This means that while your contributions are valuable, they may not always align with the project's current needs or directions. For further details, see the No Obligation section. +If you have any further questions or need clarification regarding the Contributor License Agreement, please contact us at hello@ultralytics.com. diff --git a/docs/en/help/FAQ.md b/docs/en/help/FAQ.md index 234fb9e82fd..0272d8b71b0 100644 --- a/docs/en/help/FAQ.md +++ b/docs/en/help/FAQ.md @@ -14,7 +14,7 @@ This FAQ section addresses common questions and issues users might encounter whi Ultralytics is a [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) AI company specializing in state-of-the-art object detection and [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) models, with a focus on the YOLO (You Only Look Once) family. Their offerings include: -- Open-source implementations of [YOLOv5](https://docs.ultralytics.com/models/yolov5/) and [YOLOv8](https://docs.ultralytics.com/models/yolov8/) +- Open-source implementations of [YOLO11](https://docs.ultralytics.com/models/yolov8/) and [YOLO11](https://docs.ultralytics.com/models/yolo11/) - A wide range of [pre-trained models](https://docs.ultralytics.com/models/) for various computer vision tasks - A comprehensive [Python package](https://docs.ultralytics.com/usage/python/) for seamless integration of YOLO models into projects - Versatile [tools](https://docs.ultralytics.com/modes/) for training, testing, and deploying models @@ -54,9 +54,9 @@ Recommended setup: For troubleshooting common issues, visit the [YOLO Common Issues](https://docs.ultralytics.com/guides/yolo-common-issues/) page. -### How can I train a custom YOLOv8 model on my own dataset? +### How can I train a custom YOLO11 model on my own dataset? -To train a custom YOLOv8 model: +To train a custom YOLO11 model: 1. Prepare your dataset in YOLO format (images and corresponding label txt files). 2. Create a YAML file describing your dataset structure and classes. @@ -77,11 +77,11 @@ For a more in-depth guide, including data preparation and advanced training opti ### What pretrained models are available in Ultralytics? -Ultralytics offers a diverse range of pretrained YOLOv8 models for various tasks: +Ultralytics offers a diverse range of pretrained YOLO11 models for various tasks: -- Object Detection: YOLOv8n, YOLOv8s, YOLOv8m, YOLOv8l, YOLOv8x -- [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation): YOLOv8n-seg, YOLOv8s-seg, YOLOv8m-seg, YOLOv8l-seg, YOLOv8x-seg -- Classification: YOLOv8n-cls, YOLOv8s-cls, YOLOv8m-cls, YOLOv8l-cls, YOLOv8x-cls +- Object Detection: YOLO11n, YOLO11s, YOLO11m, YOLO11l, YOLO11x +- [Instance Segmentation](https://www.ultralytics.com/glossary/instance-segmentation): YOLO11n-seg, YOLO11s-seg, YOLO11m-seg, YOLO11l-seg, YOLO11x-seg +- Classification: YOLO11n-cls, YOLO11s-cls, YOLO11m-cls, YOLO11l-cls, YOLO11x-cls These models vary in size and complexity, offering different trade-offs between speed and [accuracy](https://www.ultralytics.com/glossary/accuracy). Explore the full range of [pretrained models](https://docs.ultralytics.com/models/yolov8/) to find the best fit for your project. @@ -118,17 +118,17 @@ Absolutely! Ultralytics models are designed for versatile deployment across vari Ultralytics provides export functions to convert models to various formats for deployment. Explore the wide range of [deployment options](https://docs.ultralytics.com/guides/model-deployment-options/) to find the best solution for your use case. -### What's the difference between YOLOv5 and YOLOv8? +### What's the difference between YOLOv8 and YOLO11? Key distinctions include: -- Architecture: YOLOv8 features an improved backbone and head design for enhanced performance. -- Performance: YOLOv8 generally offers superior accuracy and speed compared to YOLOv5. -- Tasks: YOLOv8 natively supports [object detection](https://www.ultralytics.com/glossary/object-detection), instance segmentation, and classification in a unified framework. -- Codebase: YOLOv8 is implemented with a more modular and extensible architecture, facilitating easier customization and extension. -- Training: YOLOv8 incorporates advanced training techniques like multi-dataset training and hyperparameter evolution for improved results. +- Architecture: YOLO11 features an improved backbone and head design for enhanced performance. +- Performance: YOLO11 generally offers superior accuracy and speed compared to YOLOv8. +- Tasks: YOLO11 natively supports [object detection](https://www.ultralytics.com/glossary/object-detection), instance segmentation, and classification in a unified framework. +- Codebase: YOLO11 is implemented with a more modular and extensible architecture, facilitating easier customization and extension. +- Training: YOLO11 incorporates advanced training techniques like multi-dataset training and hyperparameter evolution for improved results. -For an in-depth comparison of features and performance metrics, visit the [YOLOv5 vs YOLOv8](https://www.ultralytics.com/yolo) comparison page. +For an in-depth comparison of features and performance metrics, visit the [YOLO](https://www.ultralytics.com/yolo) comparison page. ### How can I contribute to the Ultralytics open-source project? @@ -176,7 +176,7 @@ Enhancing your YOLO model's performance can be achieved through several techniqu 1. [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning): Experiment with different hyperparameters using the [Hyperparameter Tuning Guide](https://docs.ultralytics.com/guides/hyperparameter-tuning/) to optimize model performance. 2. [Data Augmentation](https://www.ultralytics.com/glossary/data-augmentation): Implement techniques like flip, scale, rotate, and color adjustments to enhance your training dataset and improve model generalization. -3. [Transfer Learning](https://www.ultralytics.com/glossary/transfer-learning): Leverage pre-trained models and fine-tune them on your specific dataset using the [Train YOLOv8](https://docs.ultralytics.com/modes/train/) guide. +3. [Transfer Learning](https://www.ultralytics.com/glossary/transfer-learning): Leverage pre-trained models and fine-tune them on your specific dataset using the [Train YOLO11](https://docs.ultralytics.com/modes/train/) guide. 4. Export to Efficient Formats: Convert your model to optimized formats like TensorRT or ONNX for faster inference using the [Export guide](../modes/export.md). 5. Benchmarking: Utilize the [Benchmark Mode](https://docs.ultralytics.com/modes/benchmark/) to measure and improve inference speed and accuracy systematically. @@ -195,22 +195,22 @@ Performing inference with a trained Ultralytics YOLO model is straightforward: 1. Load the Model: -```python -from ultralytics import YOLO + ```python + from ultralytics import YOLO -model = YOLO("path/to/your/model.pt") -``` + model = YOLO("path/to/your/model.pt") + ``` 2. Run Inference: -```python -results = model("path/to/image.jpg") + ```python + results = model("path/to/image.jpg") -for r in results: - print(r.boxes) # print bounding box predictions - print(r.masks) # print mask predictions - print(r.probs) # print class probabilities -``` + for r in results: + print(r.boxes) # print bounding box predictions + print(r.masks) # print mask predictions + print(r.probs) # print class probabilities + ``` For advanced inference techniques, including batch processing, video inference, and custom preprocessing, refer to the detailed [prediction guide](https://docs.ultralytics.com/modes/predict/). diff --git a/docs/en/help/code_of_conduct.md b/docs/en/help/code-of-conduct.md similarity index 100% rename from docs/en/help/code_of_conduct.md rename to docs/en/help/code-of-conduct.md diff --git a/docs/en/help/contributing.md b/docs/en/help/contributing.md index 1dad4f53141..29dd28e14db 100644 --- a/docs/en/help/contributing.md +++ b/docs/en/help/contributing.md @@ -11,21 +11,20 @@ Welcome! We're thrilled that you're considering contributing to our [Ultralytics Ultralytics open-source contributors -## Table of Contents - -1. [Code of Conduct](#code-of-conduct) -2. [Contributing via Pull Requests](#contributing-via-pull-requests) - - [CLA Signing](#cla-signing) - - [Google-Style Docstrings](#google-style-docstrings) - - [GitHub Actions CI Tests](#github-actions-ci-tests) -3. [Reporting Bugs](#reporting-bugs) -4. [License](#license) -5. [Conclusion](#conclusion) -6. [FAQ](#faq) +

+
+ +
+ Watch: How to Contribute to Ultralytics Repository | Ultralytics Models, Datasets and Documentation ๐Ÿš€ +

## Code of Conduct -To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct/). Respect, kindness, and professionalism are at the heart of our community. +To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code-of-conduct/). Respect, kindness, and professionalism are at the heart of our community. ## Contributing via Pull Requests @@ -123,7 +122,7 @@ All pull requests must pass the GitHub Actions [Continuous Integration](https:// ## Reporting Bugs -We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/)โ€”a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem. +We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/)โ€”a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem. ## License @@ -131,6 +130,118 @@ Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https:/ We encourage all contributors to familiarize themselves with the terms of the AGPL-3.0 license to contribute effectively and ethically to the Ultralytics open-source community. +## Open-Sourcing Your Projects with YOLO and AGPL-3.0 Compliance + +If you're planning to develop and release your own project using YOLO models, the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.gnu.org/licenses/agpl-3.0.html) ensures that all derivative works remain open and accessible. This section provides guidance, including steps, best practices, and requirements, to help you open-source your project while complying with AGPL-3.0. + +### Options for Starting Your Project + +You can kick-start your project using one of these approaches: + +1. **Fork the Ultralytics YOLO Repository** + Fork the official Ultralytics YOLO repository directly from [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics). + + - Use this option if you plan to build directly on the latest YOLO implementation. + - Modify the forked code as needed while ensuring compliance with AGPL-3.0. + +2. **Start from the Ultralytics Template Repository** + Use the Ultralytics template repository available at [https://github.com/ultralytics/template](https://github.com/ultralytics/template). + - Ideal for starting a clean, modular project with pre-configured best practices. + - This option provides a lightweight starting point for projects that integrate or extend YOLO models. + +### What You Need to Open-Source + +To comply with AGPL-3.0, you must make the following components of your project openly available: + +1. **Your Entire Project Source Code**: + + - Include all code for the larger project containing your YOLO models, scripts, and utilities. + +2. **Model Weights** (if modified): + + - Share any fine-tuned or modified model weights as part of the open-source project. + +3. **Configuration Files**: + + - Provide configuration files such as `.yaml` or `.json` that define the training setup, hyperparameters, or deployment configurations. + +4. **Training Data (if redistributable)**: + + - If you include preprocessed or generated data that is redistributable, ensure it is part of the repository or clearly linked. + +5. **Web Application Components**: + + - Include all backend and frontend source code if your project is a web application, especially server-side components. + +6. **Documentation**: + + - Include clear documentation on how to use, build, and extend your project. + +7. **Build and Deployment Scripts**: + + - Share scripts for setting up the environment, building the application, and deploying it, such as `Dockerfiles`, `requirements.txt`, or `Makefiles`. + +8. **Testing Framework**: + + - Open-source your test cases, such as unit and integration tests, to ensure reproducibility and reliability. + +9. **Third-Party Modifications**: + - Provide source code for any third-party libraries you've modified. + +### Steps to Open-Source Your Project + +1. **Choose Your Starting Point**: + + - Fork the Ultralytics YOLO repository or start from the Ultralytics template repository. + +2. **Set Your License**: + + - Add a `LICENSE` file containing the AGPL-3.0 text. + +3. **Credit Upstream Contributions**: + + - Include attribution to Ultralytics YOLO in your README. For example: + ``` + This project builds on [Ultralytics YOLO](https://github.com/ultralytics/ultralytics), licensed under AGPL-3.0. + ``` + +4. **Make Your Code Public**: + + - Push your entire project (including the components listed above) to a public GitHub repository. + +5. **Document Your Project**: + + - Write a clear `README.md` with instructions for setup, usage, and contributions. + +6. **Enable Contributions**: + - Set up an issue tracker and contribution guidelines to foster collaboration. + +By following these steps and ensuring you include all necessary components, you'll comply with AGPL-3.0 and contribute meaningfully to the open-source community. Let's continue fostering collaboration and innovation in computer vision together! ๐Ÿš€ + +### Example Repository Structure + +Below is an example structure for an AGPL-3.0 project. See [https://github.com/ultralytics/template](https://github.com/ultralytics/template) for details. + +``` +my-yolo-project/ +โ”‚ +โ”œโ”€โ”€ LICENSE # AGPL-3.0 license text +โ”œโ”€โ”€ README.md # Project overview and license information +โ”œโ”€โ”€ src/ # Source code for the project +โ”‚ โ”œโ”€โ”€ model.py # YOLO-based model implementation +โ”‚ โ”œโ”€โ”€ utils.py # Utility scripts +โ”‚ โ””โ”€โ”€ ... +โ”œโ”€โ”€ pyproject.toml # Python dependencies +โ”œโ”€โ”€ tests/ # Unit and integration tests +โ”œโ”€โ”€ .github/ # GitHub Actions for CI +โ”‚ โ””โ”€โ”€ workflows/ +โ”‚ โ””โ”€โ”€ ci.yml # Continuous integration configuration +โ””โ”€โ”€ docs/ # Project documentation + โ””โ”€โ”€ index.md +``` + +By following this guide, you can ensure your project remains compliant with AGPL-3.0 while contributing to the open-source community. Your adherence strengthens the ethos of collaboration, transparency, and accessibility that drives the success of projects like YOLO. + ## Conclusion Thank you for your interest in contributing to [Ultralytics](https://www.ultralytics.com/) [open-source](https://github.com/ultralytics) YOLO projects. Your participation is essential in shaping the future of our software and building a vibrant community of innovation and collaboration. Whether you're enhancing code, reporting bugs, or suggesting new features, your contributions are invaluable. @@ -165,4 +276,4 @@ the project's quality standards. Review the CI output and fix any issues. For de ### How do I report a bug in Ultralytics YOLO repositories? -To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section. +To report a bug, provide a clear and concise [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum-reproducible-example/) along with your bug report. This helps developers quickly identify and fix the issue. Ensure your example is minimal yet sufficient to replicate the problem. For more detailed steps on reporting bugs, refer to the [Reporting Bugs](#reporting-bugs) section. diff --git a/docs/en/help/index.md b/docs/en/help/index.md index e8f2eecd7ab..cc9b6c60211 100644 --- a/docs/en/help/index.md +++ b/docs/en/help/index.md @@ -10,8 +10,8 @@ Welcome to the Ultralytics Help page! We are dedicated to providing you with det - [Contributing Guide](contributing.md): Discover the protocols for making contributions, including how to submit pull requests, report bugs, and more. - [Continuous Integration (CI) Guide](CI.md): Gain insights into the CI processes we employ, complete with status reports for each Ultralytics repository. - [Contributor License Agreement (CLA)](CLA.md): Review the CLA to understand the rights and responsibilities associated with contributing to Ultralytics projects. -- [Minimum Reproducible Example (MRE) Guide](minimum_reproducible_example.md): Learn the process for creating an MRE, which is crucial for the timely and effective resolution of bug reports. -- [Code of Conduct](code_of_conduct.md): Our community guidelines support a respectful and open atmosphere for all collaborators. +- [Minimum Reproducible Example (MRE) Guide](minimum-reproducible-example.md): Learn the process for creating an MRE, which is crucial for the timely and effective resolution of bug reports. +- [Code of Conduct](code-of-conduct.md): Our community guidelines support a respectful and open atmosphere for all collaborators. - [Environmental, Health and Safety (EHS) Policy](environmental-health-safety.md): Delve into our commitment to sustainability and the well-being of all our stakeholders. - [Security Policy](security.md): Familiarize yourself with our security protocols and the procedure for reporting vulnerabilities. - [Privacy Policy](privacy.md): Read our privacy policy to understand how we protect your data and respect your privacy in all our services and operations. @@ -22,17 +22,17 @@ We encourage you to review these resources for a seamless and productive experie ### What is Ultralytics YOLO and how does it benefit my [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) projects? -Ultralytics YOLO (You Only Look Once) is a state-of-the-art, real-time [object detection](https://www.ultralytics.com/glossary/object-detection) model. Its latest version, YOLOv8, enhances speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and versatility, making it ideal for a wide range of applications, from real-time video analytics to advanced machine learning research. YOLO's efficiency in detecting objects in images and videos has made it the go-to solution for businesses and researchers looking to integrate robust [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities into their projects. +Ultralytics YOLO (You Only Look Once) is a state-of-the-art, real-time [object detection](https://www.ultralytics.com/glossary/object-detection) model. Its latest version, YOLO11, enhances speed, [accuracy](https://www.ultralytics.com/glossary/accuracy), and versatility, making it ideal for a wide range of applications, from real-time video analytics to advanced machine learning research. YOLO's efficiency in detecting objects in images and videos has made it the go-to solution for businesses and researchers looking to integrate robust [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) capabilities into their projects. -For more details on YOLOv8, visit the [YOLOv8 documentation](../tasks/detect.md). +For more details on YOLO11, visit the [YOLO11 documentation](../tasks/detect.md). ### How do I contribute to Ultralytics YOLO repositories? -Contributing to Ultralytics YOLO repositories is straightforward. Start by reviewing the [Contributing Guide](../help/contributing.md) to understand the protocols for submitting pull requests, reporting bugs, and more. You'll also need to sign the [Contributor License Agreement (CLA)](../help/CLA.md) to ensure your contributions are legally recognized. For effective bug reporting, refer to the [Minimum Reproducible Example (MRE) Guide](../help/minimum_reproducible_example.md). +Contributing to Ultralytics YOLO repositories is straightforward. Start by reviewing the [Contributing Guide](../help/contributing.md) to understand the protocols for submitting pull requests, reporting bugs, and more. You'll also need to sign the [Contributor License Agreement (CLA)](../help/CLA.md) to ensure your contributions are legally recognized. For effective bug reporting, refer to the [Minimum Reproducible Example (MRE) Guide](../help/minimum-reproducible-example.md). ### Why should I use Ultralytics HUB for my machine learning projects? -Ultralytics HUB offers a seamless, no-code solution for managing your machine learning projects. It enables you to generate, train, and deploy AI models like YOLOv8 effortlessly. Unique features include cloud training, real-time tracking, and intuitive dataset management. Ultralytics HUB simplifies the entire workflow, from data processing to [model deployment](https://www.ultralytics.com/glossary/model-deployment), making it an indispensable tool for both beginners and advanced users. +Ultralytics HUB offers a seamless, no-code solution for managing your machine learning projects. It enables you to generate, train, and deploy AI models like YOLO11 effortlessly. Unique features include cloud training, real-time tracking, and intuitive dataset management. Ultralytics HUB simplifies the entire workflow, from data processing to [model deployment](https://www.ultralytics.com/glossary/model-deployment), making it an indispensable tool for both beginners and advanced users. To get started, visit [Ultralytics HUB Quickstart](../hub/quickstart.md). diff --git a/docs/en/help/minimum_reproducible_example.md b/docs/en/help/minimum-reproducible-example.md similarity index 100% rename from docs/en/help/minimum_reproducible_example.md rename to docs/en/help/minimum-reproducible-example.md diff --git a/docs/en/help/privacy.md b/docs/en/help/privacy.md index 567a72aea55..fc669286d92 100644 --- a/docs/en/help/privacy.md +++ b/docs/en/help/privacy.md @@ -153,7 +153,8 @@ Ultralytics collects three primary types of data using Google Analytics: - **Usage Metrics**: These include how often and in what ways the YOLO Python package is used, preferred features, and typical command-line arguments. - **System Information**: General non-identifiable information about the computing environments where the package is run. - **Performance Data**: Metrics related to the performance of models during training, validation, and inference. - This data helps us enhance user experience and optimize software performance. Learn more in the [Anonymized Google Analytics](#anonymized-google-analytics) section. + +This data helps us enhance user experience and optimize software performance. Learn more in the [Anonymized Google Analytics](#anonymized-google-analytics) section. ### How can I disable data collection in the Ultralytics YOLO package? diff --git a/docs/en/help/security.md b/docs/en/help/security.md index 39fe3829ff9..73d5e99c4d4 100644 --- a/docs/en/help/security.md +++ b/docs/en/help/security.md @@ -17,7 +17,7 @@ We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct compreh Our security strategy includes GitHub's [CodeQL](https://docs.github.com/en/code-security/code-scanning/introduction-to-code-scanning/about-code-scanning-with-codeql) scanning. CodeQL delves deep into our codebase, identifying complex vulnerabilities like SQL injection and XSS by analyzing the code's semantic structure. This advanced level of analysis ensures early detection and resolution of potential security risks. -[![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) +[![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql) ## GitHub Dependabot Alerts diff --git a/docs/en/hub/app/android.md b/docs/en/hub/app/android.md index bca298fa9d5..d5d19ef91d4 100644 --- a/docs/en/hub/app/android.md +++ b/docs/en/hub/app/android.md @@ -6,7 +6,7 @@ keywords: Ultralytics, Android app, real-time object detection, YOLO models, Ten # Ultralytics Android App: Real-time [Object Detection](https://www.ultralytics.com/glossary/object-detection) with YOLO Models - + Ultralytics HUB preview image
@@ -22,7 +22,7 @@ keywords: Ultralytics, Android app, real-time object detection, YOLO models, Ten space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord

diff --git a/docs/en/hub/app/index.md b/docs/en/hub/app/index.md index e812d686788..c044aa05532 100644 --- a/docs/en/hub/app/index.md +++ b/docs/en/hub/app/index.md @@ -6,7 +6,7 @@ keywords: Ultralytics HUB, YOLO models, mobile app, iOS, Android, hardware accel # Ultralytics HUB App - + Ultralytics HUB preview image
@@ -22,7 +22,7 @@ keywords: Ultralytics HUB, YOLO models, mobile app, iOS, Android, hardware accel space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord

diff --git a/docs/en/hub/app/ios.md b/docs/en/hub/app/ios.md index be896fe80b3..061267b59f4 100644 --- a/docs/en/hub/app/ios.md +++ b/docs/en/hub/app/ios.md @@ -6,7 +6,7 @@ keywords: Ultralytics, iOS App, YOLO models, real-time object detection, Apple N # Ultralytics iOS App: Real-time [Object Detection](https://www.ultralytics.com/glossary/object-detection) with YOLO Models - + Ultralytics HUB preview image
@@ -22,7 +22,7 @@ keywords: Ultralytics, iOS App, YOLO models, real-time object detection, Apple N space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord

diff --git a/docs/en/hub/cloud-training.md b/docs/en/hub/cloud-training.md index f0303673113..5abaf36a111 100644 --- a/docs/en/hub/cloud-training.md +++ b/docs/en/hub/cloud-training.md @@ -34,7 +34,7 @@ Follow the [Train Model](./models.md#train-model) instructions from the [Models] ![Ultralytics HUB screenshot of the Model page with an arrow pointing to the Start Training card](https://github.com/ultralytics/docs/releases/download/0/hub-cloud-training-model-page-start-training.avif) -Most of the times, you will use the Epochs training. The number of epochs can be adjusted on this step (if the training didn't start yet) and represents the number of times your dataset needs to go through the cycle of train, label, and test. The exact pricing based on the number of epochs is hard to determine, reason why we only allow the [Account Balance](./pro.md#account-balance) payment method. +Most of the time, you will use the Epochs training. The number of epochs can be adjusted on this step (if the training didn't start yet) and represents the number of times your dataset needs to go through the cycle of train, label, and test. The exact pricing based on the number of epochs is hard to determine, reason why we only allow the [Account Balance](./pro.md#account-balance) payment method. !!! note diff --git a/docs/en/hub/datasets.md b/docs/en/hub/datasets.md index 5e6f3c4c877..4ebf9eb4498 100644 --- a/docs/en/hub/datasets.md +++ b/docs/en/hub/datasets.md @@ -17,7 +17,7 @@ Once uploaded, datasets can be immediately utilized for model training. This int allowfullscreen>
- Watch: Watch: Upload Datasets to Ultralytics HUB | Complete Walkthrough of Dataset Upload Feature + Watch: Upload Datasets to Ultralytics HUB | Complete Walkthrough of Dataset Upload Feature

## Upload Dataset @@ -48,7 +48,7 @@ The dataset YAML is the same standard YOLOv5 and YOLOv8 YAML format. After zipping your dataset, you should [validate it](https://docs.ultralytics.com/reference/hub/__init__/#ultralytics.hub.check_dataset) before uploading it to [Ultralytics HUB](https://www.ultralytics.com/hub). [Ultralytics HUB](https://www.ultralytics.com/hub) conducts the dataset validation check post-upload, so by ensuring your dataset is correctly formatted and error-free ahead of time, you can forestall any setbacks due to dataset rejection. -```py +```python from ultralytics.hub import check_dataset check_dataset("path/to/dataset.zip", task="detect") diff --git a/docs/en/hub/index.md b/docs/en/hub/index.md index 24dbdd3f577..d48485de14e 100644 --- a/docs/en/hub/index.md +++ b/docs/en/hub/index.md @@ -7,7 +7,7 @@ keywords: Ultralytics HUB, YOLO models, train YOLO, YOLOv5, YOLOv8, object detec # Ultralytics HUB
- +Ultralytics HUB banner ไธญๆ–‡ | ํ•œ๊ตญ์–ด | ๆ—ฅๆœฌ่ชž | @@ -22,13 +22,13 @@ keywords: Ultralytics HUB, YOLO models, train YOLO, YOLOv5, YOLOv8, object detec

-CI CPU Open In Colab Discord Ultralytics Forums Ultralytics Reddit +CI CPU Open In Colab Discord Ultralytics Forums Ultralytics Reddit
๐Ÿ‘‹ Hello from the [Ultralytics](https://www.ultralytics.com/) Team! We've been working hard these last few months to launch [Ultralytics HUB](https://www.ultralytics.com/hub), a new web tool for training and deploying all your YOLOv5 and YOLOv8 ๐Ÿš€ models from one spot! -We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! +We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!

@@ -44,7 +44,7 @@ We hope that the resources here will help you get the most out of HUB. Please br space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
## Introduction @@ -52,16 +52,16 @@ We hope that the resources here will help you get the most out of HUB. Please br [Ultralytics HUB](https://www.ultralytics.com/hub) is designed to be user-friendly and intuitive, allowing users to quickly upload their datasets and train new YOLO models. It also offers a range of pre-trained models to choose from, making it extremely easy for users to get started. Once a model is trained, it can be effortlessly previewed in the [Ultralytics HUB App](app/index.md) before being deployed for real-time classification, [object detection](https://www.ultralytics.com/glossary/object-detection), and [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) tasks.

-
- Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB + Watch: How to train Ultralytics YOLO11 on Custom Dataset using Ultralytics HUB | HUB Datasets ๐Ÿš€

-We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! +We hope that the resources here will help you get the most out of HUB. Please browse the HUB Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions! - [**Quickstart**](quickstart.md): Start training and deploying models in seconds. - [**Datasets**](datasets.md): Learn how to prepare and upload your datasets. diff --git a/docs/en/hub/inference-api.md b/docs/en/hub/inference-api.md index b532e8150ce..fce59c8b21c 100644 --- a/docs/en/hub/inference-api.md +++ b/docs/en/hub/inference-api.md @@ -49,15 +49,9 @@ To shut down the dedicated endpoint, click on the **Stop Endpoint** button. To use the [Ultralytics HUB](https://www.ultralytics.com/hub) Shared Inference API, follow the guides below. -Free users have the following usage limits: +The [Ultralytics HUB](https://www.ultralytics.com/hub) Shared Inference API has the following usage limits: - 100 calls / hour -- 1000 calls / month - -[Pro](./pro.md) users have the following usage limits: - -- 1000 calls / hour -- 10000 calls / month ## Python diff --git a/docs/en/hub/models.md b/docs/en/hub/models.md index db098669ac3..c123f25b6e8 100644 --- a/docs/en/hub/models.md +++ b/docs/en/hub/models.md @@ -1,7 +1,7 @@ --- comments: true -description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLOv8. Start training today!. -keywords: Ultralytics HUB, YOLOv8, custom AI models, model training, model deployment, model analysis, vision AI +description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLO11. Start training today!. +keywords: Ultralytics HUB, YOLO11, custom AI models, model training, model deployment, model analysis, vision AI --- # Ultralytics HUB Models @@ -66,7 +66,7 @@ In this step, you have to choose the project in which you want to create your mo !!! info - You can read more about the available [YOLOv8](https://docs.ultralytics.com/models/yolov8/) (and [YOLOv5](https://docs.ultralytics.com/models/yolov5/)) architectures in our documentation. + You can read more about the available [YOLO models](https://docs.ultralytics.com/models/) and architectures in our documentation. By default, your model will use a pre-trained model (trained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset) to reduce training time. You can change this behavior and tweak your model's configuration by opening the **Advanced Model Configuration** accordion. @@ -221,6 +221,16 @@ Furthermore, you can preview your model in real-time directly on your [iOS](http After you [train a model](#train-model), you can export it to 13 different formats, including ONNX, OpenVINO, CoreML, [TensorFlow](https://www.ultralytics.com/glossary/tensorflow), Paddle and many others. +

+ +
+ Watch: How to Export the Ultralytics YOLO11 to ONNX, OpenVINO and Other Formats using Ultralytics HUB ๐Ÿš€ +

+ ![Ultralytics HUB screenshot of the Deploy tab inside the Model page with an arrow pointing to the Export card and all formats exported](https://github.com/ultralytics/docs/releases/download/0/ultralytics-hub-deploy-export-formats.avif) ??? tip diff --git a/docs/en/hub/quickstart.md b/docs/en/hub/quickstart.md index 3fbcf23af14..6e377d27be7 100644 --- a/docs/en/hub/quickstart.md +++ b/docs/en/hub/quickstart.md @@ -9,13 +9,13 @@ keywords: Ultralytics HUB, Quickstart, YOLO models, dataset upload, project mana [Ultralytics HUB](https://www.ultralytics.com/hub) is designed to be user-friendly and intuitive, allowing users to quickly upload their datasets and train new YOLO models. It also offers a range of pre-trained models to choose from, making it extremely easy for users to get started. Once a model is trained, it can be effortlessly previewed in the [Ultralytics HUB App](app/index.md) before being deployed for real-time classification, [object detection](https://www.ultralytics.com/glossary/object-detection), and [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) tasks.

-
- Watch: Train Your Custom YOLO Models In A Few Clicks with Ultralytics HUB + Watch: How to train Ultralytics YOLO11 on Custom Dataset using Ultralytics HUB | HUB Datasets ๐Ÿš€

## Get Started @@ -98,4 +98,4 @@ You can report a bug, request a feature, or ask a question on Discord community for questions and discussions! + You can join our Discord community for questions and discussions! diff --git a/docs/en/index.md b/docs/en/index.md index 71abe212e65..91b8f87b913 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -6,32 +6,33 @@ keywords: Ultralytics, YOLO, YOLO11, object detection, image segmentation, deep -Introducing [Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics), the latest version of the acclaimed real-time object detection and image segmentation model. YOLO11 is built on cutting-edge advancements in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), offering unparalleled performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy). Its streamlined design makes it suitable for various applications and easily adaptable to different hardware platforms, from edge devices to cloud APIs. +Introducing [Ultralytics](https://www.ultralytics.com/) [YOLO11](https://github.com/ultralytics/ultralytics), the latest version of the acclaimed real-time object detection and image segmentation model. YOLO11 is built on cutting-edge advancements in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) and [computer vision](https://www.ultralytics.com/blog/everything-you-need-to-know-about-computer-vision-in-2025), offering unparalleled performance in terms of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy). Its streamlined design makes it suitable for various applications and easily adaptable to different hardware platforms, from edge devices to cloud APIs. Explore the Ultralytics Docs, a comprehensive resource designed to help you understand and utilize its features and capabilities. Whether you are a seasoned [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) practitioner or new to the field, this hub aims to maximize YOLO's potential in your projects @@ -49,16 +50,74 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
## Where to Start -- **Install** `ultralytics` with pip and get up and running in minutes   [:material-clock-fast: Get Started](quickstart.md){ .md-button } -- **Predict** new images and videos with YOLO   [:octicons-image-16: Predict on Images](modes/predict.md){ .md-button } -- **Train** a new YOLO model on your own custom dataset   [:fontawesome-solid-brain: Train a Model](modes/train.md){ .md-button } -- **Tasks** YOLO tasks like segment, classify, pose and track   [:material-magnify-expand: Explore Tasks](tasks/index.md){ .md-button } -- **[YOLO11](models/yolo11.md) NEW ๐Ÿš€**: Ultralytics' latest SOTA models   [:material-magnify-expand: Explore a Dataset](models/yolo11.md){ .md-button } +
+ +- :material-clock-fast:{ .lg .middle }   **Getting Started** + + *** + + Install `ultralytics` with pip and get up and running in minutes to train a YOLO model + + *** + + [:octicons-arrow-right-24: Quickstart](quickstart.md) + +- :material-image:{ .lg .middle }   **Predict** + + *** + + Predict on new images, videos and streams with YOLO
  + + *** + + [:octicons-arrow-right-24: Learn more](modes/predict.md) + +- :fontawesome-solid-brain:{ .lg .middle }   **Train a Model** + + *** + + Train a new YOLO model on your own custom dataset from scratch or load and train on a pretrained model + + *** + + [:octicons-arrow-right-24: Learn more](modes/train.md) + +- :material-magnify-expand:{ .lg .middle }   **Explore computer vision tasks** + + *** + + Discover YOLO tasks like detect, segment, classify, pose, OBB and track
  + + *** + + [:octicons-arrow-right-24: Explore Tasks](tasks/index.md) + +- :rocket:{ .lg .middle }   **Explore YOLO11 NEW** + + *** + + Discover Ultralytics' latest state-of-the-art YOLO11 models and their capabilities
  + + *** + + [:octicons-arrow-right-24: YOLO11 Models ๐Ÿš€ NEW](models/yolo11.md) + +- :material-scale-balance:{ .lg .middle }   **Open Source, AGPL-3.0** + + *** + + Ultralytics offers two YOLO licenses: AGPL-3.0 and Enterprise. Explore YOLO on [GitHub](https://github.com/ultralytics/ultralytics). + + *** + + [:octicons-arrow-right-24: YOLO License](https://www.ultralytics.com/license) + +


@@ -79,12 +138,12 @@ Explore the Ultralytics Docs, a comprehensive resource designed to help you unde - [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf), launched in 2018, further enhanced the model's performance using a more efficient backbone network, multiple anchors and spatial pyramid pooling. - [YOLOv4](https://arxiv.org/abs/2004.10934) was released in 2020, introducing innovations like Mosaic [data augmentation](https://www.ultralytics.com/glossary/data-augmentation), a new anchor-free detection head, and a new [loss function](https://www.ultralytics.com/glossary/loss-function). - [YOLOv5](https://github.com/ultralytics/yolov5) further improved the model's performance and added new features such as hyperparameter optimization, integrated experiment tracking and automatic export to popular export formats. -- [YOLOv6](https://github.com/meituan/YOLOv6) was open-sourced by [Meituan](https://about.meituan.com/) in 2022 and is in use in many of the company's autonomous delivery robots. +- [YOLOv6](https://github.com/meituan/YOLOv6) was open-sourced by [Meituan](https://www.meituan.com/) in 2022 and is in use in many of the company's autonomous delivery robots. - [YOLOv7](https://github.com/WongKinYiu/yolov7) added additional tasks such as pose estimation on the COCO keypoints dataset. -- [YOLOv8](https://github.com/ultralytics/ultralytics) is the latest version of YOLO by Ultralytics. As a cutting-edge, state-of-the-art (SOTA) model, YOLOv8 builds on the success of previous versions, introducing new features and improvements for enhanced performance, flexibility, and efficiency. YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains. +- [YOLOv8](https://github.com/ultralytics/ultralytics) released in 2023 by Ultralytics. YOLOv8 introduced new features and improvements for enhanced performance, flexibility, and efficiency, supporting a full range of vision AI tasks, - [YOLOv9](models/yolov9.md) introduces innovative methods like Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN). - [YOLOv10](models/yolov10.md) is created by researchers from [Tsinghua University](https://www.tsinghua.edu.cn/en/) using the [Ultralytics](https://www.ultralytics.com/) [Python package](https://pypi.org/project/ultralytics/). This version provides real-time [object detection](tasks/detect.md) advancements by introducing an End-to-End head that eliminates Non-Maximum Suppression (NMS) requirements. -- **[YOLO11](models/yolo11.md) NEW ๐Ÿš€**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks. +- **[YOLO11](models/yolo11.md) ๐Ÿš€ NEW**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks, including [object detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md), leverage capabilities across diverse AI applications and domains. ## YOLO Licenses: How is Ultralytics YOLO licensed? @@ -103,11 +162,15 @@ Ultralytics YOLO is the latest advancement in the acclaimed YOLO (You Only Look ### How can I get started with YOLO installation and setup? -Getting started with YOLO is quick and straightforward. You can install the Ultralytics package using pip and get up and running in minutes. Here's a basic installation command: +Getting started with YOLO is quick and straightforward. You can install the Ultralytics package using [pip](https://pypi.org/project/ultralytics/) and get up and running in minutes. Here's a basic installation command: -```bash -pip install ultralytics -``` +!!! example "Installation using pip" + + === "CLI" + + ```bash + pip install ultralytics + ``` For a comprehensive step-by-step guide, visit our [quickstart guide](quickstart.md). This resource will help you with installation instructions, initial setup, and running your first model. @@ -117,13 +180,30 @@ Training a custom YOLO model on your dataset involves a few detailed steps: 1. Prepare your annotated dataset. 2. Configure the training parameters in a YAML file. -3. Use the `yolo train` command to start training. +3. Use the `yolo TASK train` command to start training. (Each `TASK` has its own argument) + +Here's example code for the Object Detection Task: -Here's an example command: +!!! example "Train Example for Object Detection Task" -```bash -yolo train model=yolo11n.pt data=coco128.yaml epochs=100 imgsz=640 -``` + === "Python" + + ```python + from ultralytics import YOLO + + # Load a pre-trained YOLO model (you can choose n, s, m, l, or x versions) + model = YOLO("yolo11n.pt") + + # Start training on your custom dataset + model.train(data="path/to/dataset.yaml", epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Train a YOLO model from the command line + yolo detect train data=path/to/dataset.yaml epochs=100 imgsz=640 + ``` For a detailed walkthrough, check out our [Train a Model](modes/train.md) guide, which includes examples and tips for optimizing your training process. @@ -140,8 +220,27 @@ For more details, visit our [Licensing](https://www.ultralytics.com/license) pag Ultralytics YOLO supports efficient and customizable multi-object tracking. To utilize tracking capabilities, you can use the `yolo track` command as shown below: -```bash -yolo track model=yolo11n.pt source=video.mp4 -``` +!!! example "Example for Object Tracking on a Video" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a pre-trained YOLO model + model = YOLO("yolo11n.pt") + + # Start tracking objects in a video + # You can also use live video streams or webcam input + model.track(source="path/to/video.mp4") + ``` + + === "CLI" + + ```bash + # Perform object tracking on a video from the command line + # You can specify different sources like webcam (0) or RTSP streams + yolo track source=path/to/video.mp4 + ``` For a detailed guide on setting up and running object tracking, check our [tracking mode](modes/track.md) documentation, which explains the configuration and practical applications in real-time scenarios. diff --git a/docs/en/integrations/albumentations.md b/docs/en/integrations/albumentations.md new file mode 100644 index 00000000000..1302d039bf7 --- /dev/null +++ b/docs/en/integrations/albumentations.md @@ -0,0 +1,199 @@ +--- +comments: true +description: Learn how to use Albumentations with YOLO11 to enhance data augmentation, improve model performance, and streamline your computer vision projects. +keywords: Albumentations, YOLO11, data augmentation, Ultralytics, computer vision, object detection, model training, image transformations, machine learning +--- + +# Enhance Your Dataset to Train YOLO11 Using Albumentations + +When you are building [computer vision models](../models/index.md), the quality and variety of your [training data](../datasets/index.md) can play a big role in how well your model performs. Albumentations offers a fast, flexible, and efficient way to apply a wide range of image transformations that can improve your model's ability to adapt to real-world scenarios. It easily integrates with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) and can help you create robust datasets for [object detection](../tasks/detect.md), [segmentation](../tasks/segment.md), and [classification](../tasks/classify.md) tasks. + +By using Albumentations, you can boost your YOLO11 training data with techniques like geometric transformations and color adjustments. In this article, we'll see how Albumentations can improve your [data augmentation](../guides/preprocessing_annotated_data.md) process and make your [YOLO11 projects](../solutions/index.md) even more impactful. Let's get started! + +## Albumentations for Image Augmentation + +[Albumentations](https://albumentations.ai/) is an open-source image augmentation library created in [June 2018](https://arxiv.org/pdf/1809.06839). It is designed to simplify and accelerate the image augmentation process in [computer vision](https://www.ultralytics.com/blog/exploring-image-processing-computer-vision-and-machine-vision). Created with [performance](https://www.ultralytics.com/blog/measuring-ai-performance-to-weigh-the-impact-of-your-innovations) and flexibility in mind, it supports many diverse augmentation techniques, ranging from simple transformations like rotations and flips to more complex adjustments like brightness and contrast changes. Albumentations helps developers generate rich, varied datasets for tasks like [image classification](https://www.youtube.com/watch?v=5BO0Il_YYAg), [object detection](https://www.youtube.com/watch?v=5ku7npMrW40&t=1s), and [segmentation](https://www.youtube.com/watch?v=o4Zd-IeMlSY). + +You can use Albumentations to easily apply augmentations to images, [segmentation masks](https://www.ultralytics.com/glossary/image-segmentation), [bounding boxes](https://www.ultralytics.com/glossary/bounding-box), and [key points](../datasets/pose/index.md), and make sure that all elements of your dataset are transformed together. It works seamlessly with popular deep learning frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md), making it accessible for a wide range of projects. + +Also, Albumentations is a great option for augmentation whether you're handling small datasets or large-scale [computer vision tasks](../tasks/index.md). It ensures fast and efficient processing, cutting down the time spent on data preparation. At the same time, it helps improve [model performance](../guides/yolo-performance-metrics.md), making your models more effective in real-world applications. + +## Key Features of Albumentations + +Albumentations offers many useful features that simplify complex image augmentations for a wide range of [computer vision applications](https://www.ultralytics.com/blog/exploring-how-the-applications-of-computer-vision-work). Here are some of the key features: + +- **Wide Range of Transformations**: Albumentations offers over [70 different transformations](https://github.com/albumentations-team/albumentations?tab=readme-ov-file#list-of-augmentations), including geometric changes (e.g., rotation, flipping), color adjustments (e.g., brightness, contrast), and noise addition (e.g., Gaussian noise). Having multiple options enables the creation of highly diverse and robust training datasets. + +

+ Example of Image Augmentations +

+ +- **High Performance Optimization**: Built on OpenCV and NumPy, Albumentations uses advanced optimization techniques like SIMD (Single Instruction, Multiple Data), which processes multiple data points simultaneously to speed up processing. It handles large datasets quickly, making it one of the fastest options available for image augmentation. + +- **Three Levels of Augmentation**: Albumentations supports three levels of augmentation: pixel-level transformations, spatial-level transformations, and mixing-level transformation. Pixel-level transformations only affect the input images without altering masks, bounding boxes, or key points. Meanwhile, both the image and its elements, like masks and bounding boxes, are transformed using spatial-level transformations. Furthermore, mixing-level transformations are a unique way to augment data as it combines multiple images into one. + +![Overview of the Different Levels of Augmentations](https://github.com/ultralytics/docs/releases/download/0/levels-of-augmentation.avif) + +- **[Benchmarking Results](https://albumentations.ai/docs/benchmarking_results/)**: When it comes to benchmarking, Albumentations consistently outperforms other libraries, especially with large datasets. + +## Why Should You Use Albumentations for Your Vision AI Projects? + +With respect to image augmentation, Albumentations stands out as a reliable tool for computer vision tasks. Here are a few key reasons why you should consider using it for your Vision AI projects: + +- **Easy-to-Use API**: Albumentations provides a single, straightforward API for applying a wide range of augmentations to images, masks, bounding boxes, and keypoints. It's designed to adapt easily to different datasets, making [data preparation](../guides/data-collection-and-annotation.md) simpler and more efficient. + +- **Rigorous Bug Testing**: Bugs in the augmentation pipeline can silently corrupt input data, often going unnoticed but ultimately degrading model performance. Albumentations addresses this with a thorough test suite that helps catch bugs early in development. + +- **Extensibility**: Albumentations can be used to easily add new augmentations and use them in computer vision pipelines through a single interface along with built-in transformations. + +## How to Use Albumentations to Augment Data for YOLO11 Training + +Now that we've covered what Albumentations is and what it can do, let's look at how to use it to augment your data for YOLO11 model training. It's easy to set up because it integrates directly into [Ultralytics' training mode](../modes/train.md) and applies automatically if you have the Albumentations package installed. + +### Installation + +To use Albumentations with YOLO11, start by making sure you have the necessary packages installed. If Albumentations isn't installed, the augmentations won't be applied during training. Once set up, you'll be ready to create an augmented dataset for training, with Albumentations integrated to enhance your model automatically. + +!!! tip "Installation" + + === "CLI" + + ```bash + # Install the required packages + pip install albumentations ultralytics + ``` + +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. + +### Usage + +After installing the necessary packages, you're ready to start using Albumentations with YOLO11. When you train YOLO11, a set of augmentations is automatically applied through its integration with Albumentations, making it easy to enhance your model's performance. + +!!! example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a pre-trained model + model = YOLO("yolo11n.pt") + + # Train the model + results = model.train(data="coco8.yaml", epochs=100, imgsz=640) + ``` + +Next, let's take look a closer look at the specific augmentations that are applied during training. + +### Blur + +The Blur transformation in Albumentations applies a simple blur effect to the image by averaging pixel values within a small square area, or kernel. This is done using OpenCV `cv2.blur` function, which helps reduce noise in the image, though it also slightly reduces image details. + +Here are the parameters and values used in this integration: + +- **blur_limit**: This controls the size range of the blur effect. The default range is (3, 7), meaning the kernel size for the blur can vary between 3 and 7 pixels, with only odd numbers allowed to keep the blur centered. + +- **p**: The probability of applying the blur. In the integration, p=0.01, so there's a 1% chance that this blur will be applied to each image. The low probability allows for occasional blur effects, introducing a bit of variation to help the model generalize without over-blurring the images. + +An Example of the Blur Augmentation + +### Median Blur + +The MedianBlur transformation in Albumentations applies a median blur effect to the image, which is particularly useful for reducing noise while preserving edges. Unlike typical blurring methods, MedianBlur uses a median filter, which is especially effective at removing salt-and-pepper noise while maintaining sharpness around the edges. + +Here are the parameters and values used in this integration: + +- **blur_limit**: This parameter controls the maximum size of the blurring kernel. In this integration, it defaults to a range of (3, 7), meaning the kernel size for the blur is randomly chosen between 3 and 7 pixels, with only odd values allowed to ensure proper alignment. + +- **p**: Sets the probability of applying the median blur. Here, p=0.01, so the transformation has a 1% chance of being applied to each image. This low probability ensures that the median blur is used sparingly, helping the model generalize by occasionally seeing images with reduced noise and preserved edges. + +The image below shows an example of this augmentation applied to an image. + +An Example of the MedianBlur Augmentation + +### Grayscale + +The ToGray transformation in Albumentations converts an image to grayscale, reducing it to a single-channel format and optionally replicating this channel to match a specified number of output channels. Different methods can be used to adjust how grayscale brightness is calculated, ranging from simple averaging to more advanced techniques for realistic perception of contrast and brightness. + +Here are the parameters and values used in this integration: + +- **num_output_channels**: Sets the number of channels in the output image. If this value is more than 1, the single grayscale channel will be replicated to create a multichannel grayscale image. By default, it's set to 3, giving a grayscale image with three identical channels. + +- **method**: Defines the grayscale conversion method. The default method, "weighted_average", applies a formula (0.299R + 0.587G + 0.114B) that closely aligns with human perception, providing a natural-looking grayscale effect. Other options, like "from_lab", "desaturation", "average", "max", and "pca", offer alternative ways to create grayscale images based on various needs for speed, brightness emphasis, or detail preservation. + +- **p**: Controls how often the grayscale transformation is applied. With p=0.01, there is a 1% chance of converting each image to grayscale, making it possible for a mix of color and grayscale images to help the model generalize better. + +The image below shows an example of this grayscale transformation applied. + +An Example of the ToGray Augmentation + +### Contrast Limited Adaptive Histogram Equalization (CLAHE) + +The CLAHE transformation in Albumentations applies Contrast Limited Adaptive Histogram Equalization (CLAHE), a technique that enhances image contrast by equalizing the histogram in localized regions (tiles) instead of across the whole image. CLAHE produces a balanced enhancement effect, avoiding the overly amplified contrast that can result from standard histogram equalization, especially in areas with initially low contrast. + +Here are the parameters and values used in this integration: + +- **clip_limit**: Controls the contrast enhancement range. Set to a default range of (1, 4), it determines the maximum contrast allowed in each tile. Higher values are used for more contrast but may also introduce noise. + +- **tile_grid_size**: Defines the size of the grid of tiles, typically as (rows, columns). The default value is (8, 8), meaning the image is divided into a 8x8 grid. Smaller tile sizes provide more localized adjustments, while larger ones create effects closer to global equalization. + +- **p**: The probability of applying CLAHE. Here, p=0.01 introduces the enhancement effect only 1% of the time, ensuring that contrast adjustments are applied sparingly for occasional variation in training images. + +The image below shows an example of the CLAHE transformation applied. + +An Example of the CLAHE Augmentation + +## Keep Learning about Albumentations + +If you are interested in learning more about Albumentations, check out the following resources for more in-depth instructions and examples: + +- **[Albumentations Documentation](https://albumentations.ai/docs/)**: The official documentation provides a full range of supported transformations and advanced usage techniques. + +- **[Ultralytics Albumentations Guide](https://docs.ultralytics.com/reference/data/augment/?h=albumentation#ultralytics.data.augment.Albumentations)**: Get a closer look at the details of the function that facilitate this integration. + +- **[Albumentations GitHub Repository](https://github.com/albumentations-team/albumentations/)**: The repository includes examples, benchmarks, and discussions to help you get started with customizing augmentations. + +## Key Takeaways + +In this guide, we explored the key aspects of Albumentations, a great Python library for image augmentation. We discussed its wide range of transformations, optimized performance, and how you can use it in your next YOLO11 project. + +Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find valuable resources and insights there. + +## FAQ + +### How can I integrate Albumentations with YOLO11 for improved data augmentation? + +Albumentations integrates seamlessly with YOLO11 and applies automatically during training if you have the package installed. Here's how to get started: + +```python +# Install required packages +# !pip install albumentations ultralytics +from ultralytics import YOLO + +# Load and train model with automatic augmentations +model = YOLO("yolo11n.pt") +model.train(data="coco8.yaml", epochs=100) +``` + +The integration includes optimized augmentations like blur, median blur, grayscale conversion, and CLAHE with carefully tuned probabilities to enhance model performance. + +### What are the key benefits of using Albumentations over other augmentation libraries? + +Albumentations stands out for several reasons: + +1. Performance: Built on OpenCV and NumPy with SIMD optimization for superior speed +2. Flexibility: Supports 70+ transformations across pixel-level, spatial-level, and mixing-level augmentations +3. Compatibility: Works seamlessly with popular frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md) +4. Reliability: Extensive test suite prevents silent data corruption +5. Ease of use: Single unified API for all augmentation types + +### What types of computer vision tasks can benefit from Albumentations augmentation? + +Albumentations enhances various [computer vision tasks](../tasks/index.md) including: + +- [Object Detection](../tasks/detect.md): Improves model robustness to lighting, scale, and orientation variations +- [Instance Segmentation](../tasks/segment.md): Enhances mask prediction accuracy through diverse transformations +- [Classification](../tasks/classify.md): Increases model generalization with color and geometric augmentations +- [Pose Estimation](../tasks/pose.md): Helps models adapt to different viewpoints and lighting conditions + +The library's diverse augmentation options make it valuable for any vision task requiring robust model performance. diff --git a/docs/en/integrations/amazon-sagemaker.md b/docs/en/integrations/amazon-sagemaker.md index 9a82037ac24..366a4f13884 100644 --- a/docs/en/integrations/amazon-sagemaker.md +++ b/docs/en/integrations/amazon-sagemaker.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn step-by-step how to deploy Ultralytics' YOLOv8 on Amazon SageMaker Endpoints, from setup to testing, for powerful real-time inference with AWS services. -keywords: YOLOv8, Amazon SageMaker, AWS, Ultralytics, machine learning, computer vision, model deployment, AWS CloudFormation, AWS CDK, real-time inference +description: Learn step-by-step how to deploy Ultralytics' YOLO11 on Amazon SageMaker Endpoints, from setup to testing, for powerful real-time inference with AWS services. +keywords: YOLO11, Amazon SageMaker, AWS, Ultralytics, machine learning, computer vision, model deployment, AWS CloudFormation, AWS CDK, real-time inference --- -# A Guide to Deploying YOLOv8 on Amazon SageMaker Endpoints +# A Guide to Deploying YOLO11 on Amazon SageMaker Endpoints -Deploying advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLOv8](https://github.com/ultralytics/ultralytics) on Amazon SageMaker Endpoints opens up a wide range of possibilities for various [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) applications. The key to effectively using these models lies in understanding their setup, configuration, and deployment processes. YOLOv8 becomes even more powerful when integrated seamlessly with Amazon SageMaker, a robust and scalable machine learning service by AWS. +Deploying advanced [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLO11](https://github.com/ultralytics/ultralytics) on Amazon SageMaker Endpoints opens up a wide range of possibilities for various [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) applications. The key to effectively using these models lies in understanding their setup, configuration, and deployment processes. YOLO11 becomes even more powerful when integrated seamlessly with Amazon SageMaker, a robust and scalable machine learning service by AWS. -This guide will take you through the process of deploying YOLOv8 [PyTorch](https://www.ultralytics.com/glossary/pytorch) models on Amazon SageMaker Endpoints step by step. You'll learn the essentials of preparing your AWS environment, configuring the model appropriately, and using tools like AWS CloudFormation and the AWS Cloud Development Kit (CDK) for deployment. +This guide will take you through the process of deploying YOLO11 [PyTorch](https://www.ultralytics.com/glossary/pytorch) models on Amazon SageMaker Endpoints step by step. You'll learn the essentials of preparing your AWS environment, configuring the model appropriately, and using tools like AWS CloudFormation and the AWS Cloud Development Kit (CDK) for deployment. ## Amazon SageMaker @@ -18,9 +18,9 @@ This guide will take you through the process of deploying YOLOv8 [PyTorch](https [Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a machine learning service from Amazon Web Services (AWS) that simplifies the process of building, training, and deploying machine learning models. It provides a broad range of tools for handling various aspects of machine learning workflows. This includes automated features for tuning models, options for training models at scale, and straightforward methods for deploying models into production. SageMaker supports popular machine learning frameworks, offering the flexibility needed for diverse projects. Its features also cover data labeling, workflow management, and performance analysis. -## Deploying YOLOv8 on Amazon SageMaker Endpoints +## Deploying YOLO11 on Amazon SageMaker Endpoints -Deploying YOLOv8 on Amazon SageMaker lets you use its managed environment for real-time inference and take advantage of features like autoscaling. Take a look at the AWS architecture below. +Deploying YOLO11 on Amazon SageMaker lets you use its managed environment for real-time inference and take advantage of features like autoscaling. Take a look at the AWS architecture below.

AWS Architecture @@ -40,9 +40,9 @@ First, ensure you have the following prerequisites in place: - Adequate Service Quota: Confirm that you have sufficient quotas for two separate resources in Amazon SageMaker: one for `ml.m5.4xlarge` for endpoint usage and another for `ml.m5.4xlarge` for notebook instance usage. Each of these requires a minimum of one quota value. If your current quotas are below this requirement, it's important to request an increase for each. You can request a quota increase by following the detailed instructions in the [AWS Service Quotas documentation](https://docs.aws.amazon.com/servicequotas/latest/userguide/request-quota-increase.html#quota-console-increase). -### Step 2: Clone the YOLOv8 SageMaker Repository +### Step 2: Clone the YOLO11 SageMaker Repository -The next step is to clone the specific AWS repository that contains the resources for deploying YOLOv8 on SageMaker. This repository, hosted on GitHub, includes the necessary CDK scripts and configuration files. +The next step is to clone the specific AWS repository that contains the resources for deploying YOLO11 on SageMaker. This repository, hosted on GitHub, includes the necessary CDK scripts and configuration files. - Clone the GitHub Repository: Execute the following command in your terminal to clone the host-yolov8-on-sagemaker-endpoint repository: @@ -104,11 +104,11 @@ cdk bootstrap cdk deploy ``` -### Step 5: Deploy the YOLOv8 Model +### Step 5: Deploy the YOLO Model -Before diving into the deployment instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the deployment instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. -After creating the AWS CloudFormation Stack, the next step is to deploy YOLOv8. +After creating the AWS CloudFormation Stack, the next step is to deploy YOLO11. - Open the Notebook Instance: Go to the AWS Console and navigate to the Amazon SageMaker service. Select "Notebook Instances" from the dashboard, then locate the notebook instance that was created by your CDK deployment script. Open the notebook instance to access the Jupyter environment. @@ -136,18 +136,18 @@ def output_fn(prediction_output): return json.dumps(infer) ``` -- Deploy the Endpoint Using 1_DeployEndpoint.ipynb: In the Jupyter environment, open the 1_DeployEndpoint.ipynb notebook located in the sm-notebook directory. Follow the instructions in the notebook and run the cells to download the YOLOv8 model, package it with the updated inference code, and upload it to an Amazon S3 bucket. The notebook will guide you through creating and deploying a SageMaker endpoint for the YOLOv8 model. +- Deploy the Endpoint Using 1_DeployEndpoint.ipynb: In the Jupyter environment, open the 1_DeployEndpoint.ipynb notebook located in the sm-notebook directory. Follow the instructions in the notebook and run the cells to download the YOLO11 model, package it with the updated inference code, and upload it to an Amazon S3 bucket. The notebook will guide you through creating and deploying a SageMaker endpoint for the YOLO11 model. ### Step 6: Testing Your Deployment -Now that your YOLOv8 model is deployed, it's important to test its performance and functionality. +Now that your YOLO11 model is deployed, it's important to test its performance and functionality. - Open the Test Notebook: In the same Jupyter environment, locate and open the 2_TestEndpoint.ipynb notebook, also in the sm-notebook directory. - Run the Test Notebook: Follow the instructions within the notebook to test the deployed SageMaker endpoint. This includes sending an image to the endpoint and running inferences. Then, you'll plot the output to visualize the model's performance and [accuracy](https://www.ultralytics.com/glossary/accuracy), as shown below.

- Testing Results YOLOv8 + Testing Results YOLO11

- Clean-Up Resources: The test notebook will also guide you through the process of cleaning up the endpoint and the hosted model. This is an important step to manage costs and resources effectively, especially if you do not plan to use the deployed model immediately. @@ -160,24 +160,24 @@ After testing, continuous monitoring and management of your deployed model are e - Manage the Endpoint: Use the SageMaker console for ongoing management of the endpoint. This includes scaling, updating, or redeploying the model as required. -By completing these steps, you will have successfully deployed and tested a YOLOv8 model on Amazon SageMaker Endpoints. This process not only equips you with practical experience in using AWS services for machine learning deployment but also lays the foundation for deploying other advanced models in the future. +By completing these steps, you will have successfully deployed and tested a YOLO11 model on Amazon SageMaker Endpoints. This process not only equips you with practical experience in using AWS services for machine learning deployment but also lays the foundation for deploying other advanced models in the future. ## Summary -This guide took you step by step through deploying YOLOv8 on Amazon SageMaker Endpoints using AWS CloudFormation and the AWS Cloud Development Kit (CDK). The process includes cloning the necessary GitHub repository, setting up the CDK environment, deploying the model using AWS services, and testing its performance on SageMaker. +This guide took you step by step through deploying YOLO11 on Amazon SageMaker Endpoints using AWS CloudFormation and the AWS Cloud Development Kit (CDK). The process includes cloning the necessary GitHub repository, setting up the CDK environment, deploying the model using AWS services, and testing its performance on SageMaker. For more technical details, refer to [this article](https://aws.amazon.com/blogs/machine-learning/hosting-yolov8-pytorch-model-on-amazon-sagemaker-endpoints/) on the AWS Machine Learning Blog. You can also check out the official [Amazon SageMaker Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints.html) for more insights into various features and functionalities. -Are you interested in learning more about different YOLOv8 integrations? Visit the [Ultralytics integrations guide page](../integrations/index.md) to discover additional tools and capabilities that can enhance your machine-learning projects. +Are you interested in learning more about different YOLO11 integrations? Visit the [Ultralytics integrations guide page](../integrations/index.md) to discover additional tools and capabilities that can enhance your machine-learning projects. ## FAQ -### How do I deploy the Ultralytics YOLOv8 model on Amazon SageMaker Endpoints? +### How do I deploy the Ultralytics YOLO11 model on Amazon SageMaker Endpoints? -To deploy the Ultralytics YOLOv8 model on Amazon SageMaker Endpoints, follow these steps: +To deploy the Ultralytics YOLO11 model on Amazon SageMaker Endpoints, follow these steps: 1. **Set Up Your AWS Environment**: Ensure you have an AWS Account, IAM roles with necessary permissions, and the AWS CLI configured. Install AWS CDK if not already done (refer to the [AWS CDK instructions](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_install)). -2. **Clone the YOLOv8 SageMaker Repository**: +2. **Clone the YOLO11 SageMaker Repository**: ```bash git clone https://github.com/aws-samples/host-yolov8-on-sagemaker-endpoint.git cd host-yolov8-on-sagemaker-endpoint/yolov8-pytorch-cdk @@ -196,11 +196,11 @@ To deploy the Ultralytics YOLOv8 model on Amazon SageMaker Endpoints, follow the cdk deploy ``` -For further details, review the [documentation section](#step-5-deploy-the-yolov8-model). +For further details, review the [documentation section](#step-5-deploy-the-yolo-model). -### What are the prerequisites for deploying YOLOv8 on Amazon SageMaker? +### What are the prerequisites for deploying YOLO11 on Amazon SageMaker? -To deploy YOLOv8 on Amazon SageMaker, ensure you have the following prerequisites: +To deploy YOLO11 on Amazon SageMaker, ensure you have the following prerequisites: 1. **AWS Account**: Active AWS account ([sign up here](https://aws.amazon.com/)). 2. **IAM Roles**: Configured IAM roles with permissions for SageMaker, CloudFormation, and Amazon S3. @@ -210,9 +210,9 @@ To deploy YOLOv8 on Amazon SageMaker, ensure you have the following prerequisite For detailed setup, refer to [this section](#step-1-setup-your-aws-environment). -### Why should I use Ultralytics YOLOv8 on Amazon SageMaker? +### Why should I use Ultralytics YOLO11 on Amazon SageMaker? -Using Ultralytics YOLOv8 on Amazon SageMaker offers several advantages: +Using Ultralytics YOLO11 on Amazon SageMaker offers several advantages: 1. **Scalability and Management**: SageMaker provides a managed environment with features like autoscaling, which helps in real-time inference needs. 2. **Integration with AWS Services**: Seamlessly integrate with other AWS services, such as S3 for data storage, CloudFormation for infrastructure as code, and CloudWatch for monitoring. @@ -221,9 +221,9 @@ Using Ultralytics YOLOv8 on Amazon SageMaker offers several advantages: Explore more about the advantages of using SageMaker in the [introduction section](#amazon-sagemaker). -### Can I customize the inference logic for YOLOv8 on Amazon SageMaker? +### Can I customize the inference logic for YOLO11 on Amazon SageMaker? -Yes, you can customize the inference logic for YOLOv8 on Amazon SageMaker: +Yes, you can customize the inference logic for YOLO11 on Amazon SageMaker: 1. **Modify `inference.py`**: Locate and customize the `output_fn` function in the `inference.py` file to tailor output formats. @@ -243,11 +243,11 @@ Yes, you can customize the inference logic for YOLOv8 on Amazon SageMaker: 2. **Deploy Updated Model**: Ensure you redeploy the model using Jupyter notebooks provided (`1_DeployEndpoint.ipynb`) to include these changes. -Refer to the [detailed steps](#step-5-deploy-the-yolov8-model) for deploying the modified model. +Refer to the [detailed steps](#step-5-deploy-the-yolo-model) for deploying the modified model. -### How can I test the deployed YOLOv8 model on Amazon SageMaker? +### How can I test the deployed YOLO11 model on Amazon SageMaker? -To test the deployed YOLOv8 model on Amazon SageMaker: +To test the deployed YOLO11 model on Amazon SageMaker: 1. **Open the Test Notebook**: Locate the `2_TestEndpoint.ipynb` notebook in the SageMaker Jupyter environment. 2. **Run the Notebook**: Follow the notebook's instructions to send an image to the endpoint, perform inference, and display results. diff --git a/docs/en/integrations/clearml.md b/docs/en/integrations/clearml.md index 5245d887408..465f2fa8d15 100644 --- a/docs/en/integrations/clearml.md +++ b/docs/en/integrations/clearml.md @@ -1,14 +1,14 @@ --- comments: true -description: Discover how to integrate YOLOv8 with ClearML to streamline your MLOps workflow, automate experiments, and enhance model management effortlessly. -keywords: YOLOv8, ClearML, MLOps, Ultralytics, machine learning, object detection, model training, automation, experiment management +description: Discover how to integrate YOLO11 with ClearML to streamline your MLOps workflow, automate experiments, and enhance model management effortlessly. +keywords: YOLO11, ClearML, MLOps, Ultralytics, machine learning, object detection, model training, automation, experiment management --- -# Training YOLOv8 with ClearML: Streamlining Your MLOps Workflow +# Training YOLO11 with ClearML: Streamlining Your MLOps Workflow MLOps bridges the gap between creating and deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models in real-world settings. It focuses on efficient deployment, scalability, and ongoing management to ensure models perform well in practical applications. -[Ultralytics YOLOv8](https://www.ultralytics.com/) effortlessly integrates with ClearML, streamlining and enhancing your [object detection](https://www.ultralytics.com/glossary/object-detection) model's training and management. This guide will walk you through the integration process, detailing how to set up ClearML, manage experiments, automate model management, and collaborate effectively. +[Ultralytics YOLO11](https://www.ultralytics.com/) effortlessly integrates with ClearML, streamlining and enhancing your [object detection](https://www.ultralytics.com/glossary/object-detection) model's training and management. This guide will walk you through the integration process, detailing how to set up ClearML, manage experiments, automate model management, and collaborate effectively. ## ClearML @@ -18,9 +18,9 @@ MLOps bridges the gap between creating and deploying [machine learning](https:// [ClearML](https://clear.ml/) is an innovative open-source MLOps platform that is skillfully designed to automate, monitor, and orchestrate machine learning workflows. Its key features include automated logging of all training and inference data for full experiment reproducibility, an intuitive web UI for easy [data visualization](https://www.ultralytics.com/glossary/data-visualization) and analysis, advanced hyperparameter [optimization algorithms](https://www.ultralytics.com/glossary/optimization-algorithm), and robust model management for efficient deployment across various platforms. -## YOLOv8 Training with ClearML +## YOLO11 Training with ClearML -You can bring automation and efficiency to your machine learning workflow by improving your training process by integrating YOLOv8 with ClearML. +You can bring automation and efficiency to your machine learning workflow by improving your training process by integrating YOLO11 with ClearML. ## Installation @@ -31,11 +31,11 @@ To install the required packages, run: === "CLI" ```bash - # Install the required packages for YOLOv8 and ClearML + # Install the required packages for YOLO11 and ClearML pip install ultralytics clearml ``` -For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ## Configuring ClearML @@ -56,7 +56,7 @@ After executing this command, visit the [ClearML Settings page](https://app.clea ## Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. !!! example "Usage" @@ -70,11 +70,11 @@ Before diving into the usage instructions, be sure to check out the range of [YO # Step 1: Creating a ClearML Task task = Task.init(project_name="my_project", task_name="my_yolov8_task") - # Step 2: Selecting the YOLOv8 Model - model_variant = "yolov8n" + # Step 2: Selecting the YOLO11 Model + model_variant = "yolo11n" task.set_parameter("model_variant", model_variant) - # Step 3: Loading the YOLOv8 Model + # Step 3: Loading the YOLO11 Model model = YOLO(f"{model_variant}.pt") # Step 4: Setting Up Training Arguments @@ -91,11 +91,11 @@ Let's understand the steps showcased in the usage code snippet above. **Step 1: Creating a ClearML Task**: A new task is initialized in ClearML, specifying your project and task names. This task will track and manage your model's training. -**Step 2: Selecting the YOLOv8 Model**: The `model_variant` variable is set to 'yolov8n', one of the YOLOv8 models. This variant is then logged in ClearML for tracking. +**Step 2: Selecting the YOLO11 Model**: The `model_variant` variable is set to 'yolo11n', one of the YOLO11 models. This variant is then logged in ClearML for tracking. -**Step 3: Loading the YOLOv8 Model**: The selected YOLOv8 model is loaded using Ultralytics' YOLO class, preparing it for training. +**Step 3: Loading the YOLO11 Model**: The selected YOLO11 model is loaded using Ultralytics' YOLO class, preparing it for training. -**Step 4: Setting Up Training Arguments**: Key training arguments like the dataset (`coco8.yaml`) and the number of [epochs](https://www.ultralytics.com/glossary/epoch) (`16`) are organized in a dictionary and connected to the ClearML task. This allows for tracking and potential modification via the ClearML UI. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). +**Step 4: Setting Up Training Arguments**: Key training arguments like the dataset (`coco8.yaml`) and the number of [epochs](https://www.ultralytics.com/glossary/epoch) (`16`) are organized in a dictionary and connected to the ClearML task. This allows for tracking and potential modification via the ClearML UI. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md). **Step 5: Initiating Model Training**: The model training is started with the specified arguments. The results of the training process are captured in the `results` variable. @@ -106,7 +106,7 @@ Upon running the usage code snippet above, you can expect the following output: - A confirmation message indicating the creation of a new ClearML task, along with its unique ID. - An informational message about the script code being stored, indicating that the code execution is being tracked by ClearML. - A URL link to the ClearML results page where you can monitor the training progress and view detailed logs. -- Download progress for the YOLOv8 model and the specified dataset, followed by a summary of the model architecture and training configuration. +- Download progress for the YOLO11 model and the specified dataset, followed by a summary of the model architecture and training configuration. - Initialization messages for various training components like TensorBoard, Automatic [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision) (AMP), and dataset preparation. - Finally, the training process starts, with progress updates as the model trains on the specified dataset. For an in-depth understanding of the performance metrics used during training, read [our guide on performance metrics](../guides/yolo-performance-metrics.md). @@ -151,7 +151,7 @@ For a visual walkthrough of what the ClearML Results Page looks like, watch the allowfullscreen>
- Watch: YOLOv8 MLOps Integration using ClearML + Watch: YOLO11 MLOps Integration using ClearML

### Advanced Features in ClearML @@ -180,7 +180,7 @@ ClearML's user-friendly interface allows easy cloning, editing, and enqueuing of ## Summary -This guide has led you through the process of integrating ClearML with Ultralytics' YOLOv8. Covering everything from initial setup to advanced model management, you've discovered how to leverage ClearML for efficient training, experiment tracking, and workflow optimization in your machine learning projects. +This guide has led you through the process of integrating ClearML with Ultralytics' YOLO11. Covering everything from initial setup to advanced model management, you've discovered how to leverage ClearML for efficient training, experiment tracking, and workflow optimization in your machine learning projects. For further details on usage, visit [ClearML's official documentation](https://clear.ml/docs/latest/docs/integrations/yolov8/). @@ -188,9 +188,9 @@ Additionally, explore more integrations and capabilities of Ultralytics by visit ## FAQ -### What is the process for integrating Ultralytics YOLOv8 with ClearML? +### What is the process for integrating Ultralytics YOLO11 with ClearML? -Integrating Ultralytics YOLOv8 with ClearML involves a series of steps to streamline your MLOps workflow. First, install the necessary packages: +Integrating Ultralytics YOLO11 with ClearML involves a series of steps to streamline your MLOps workflow. First, install the necessary packages: ```bash pip install ultralytics clearml @@ -202,19 +202,19 @@ Next, initialize the ClearML SDK in your environment using: clearml-init ``` -You then configure ClearML with your credentials from the [ClearML Settings page](https://app.clear.ml/settings/workspace-configuration). Detailed instructions on the entire setup process, including model selection and training configurations, can be found in our [YOLOv8 Model Training guide](../modes/train.md). +You then configure ClearML with your credentials from the [ClearML Settings page](https://app.clear.ml/settings/workspace-configuration). Detailed instructions on the entire setup process, including model selection and training configurations, can be found in our [YOLO11 Model Training guide](../modes/train.md). -### Why should I use ClearML with Ultralytics YOLOv8 for my machine learning projects? +### Why should I use ClearML with Ultralytics YOLO11 for my machine learning projects? -Using ClearML with Ultralytics YOLOv8 enhances your machine learning projects by automating experiment tracking, streamlining workflows, and enabling robust model management. ClearML offers real-time metrics tracking, resource utilization monitoring, and a user-friendly interface for comparing experiments. These features help optimize your model's performance and make the development process more efficient. Learn more about the benefits and procedures in our [MLOps Integration guide](../modes/train.md). +Using ClearML with Ultralytics YOLO11 enhances your machine learning projects by automating experiment tracking, streamlining workflows, and enabling robust model management. ClearML offers real-time metrics tracking, resource utilization monitoring, and a user-friendly interface for comparing experiments. These features help optimize your model's performance and make the development process more efficient. Learn more about the benefits and procedures in our [MLOps Integration guide](../modes/train.md). -### How do I troubleshoot common issues during YOLOv8 and ClearML integration? +### How do I troubleshoot common issues during YOLO11 and ClearML integration? -If you encounter issues during the integration of YOLOv8 with ClearML, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. Typical problems might involve package installation errors, credential setup, or configuration issues. This guide provides step-by-step troubleshooting instructions to resolve these common issues efficiently. +If you encounter issues during the integration of YOLO11 with ClearML, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. Typical problems might involve package installation errors, credential setup, or configuration issues. This guide provides step-by-step troubleshooting instructions to resolve these common issues efficiently. -### How do I set up the ClearML task for YOLOv8 model training? +### How do I set up the ClearML task for YOLO11 model training? -Setting up a ClearML task for YOLOv8 training involves initializing a task, selecting the model variant, loading the model, setting up training arguments, and finally, starting the model training. Here's a simplified example: +Setting up a ClearML task for YOLO11 training involves initializing a task, selecting the model variant, loading the model, setting up training arguments, and finally, starting the model training. Here's a simplified example: ```python from clearml import Task @@ -224,11 +224,11 @@ from ultralytics import YOLO # Step 1: Creating a ClearML Task task = Task.init(project_name="my_project", task_name="my_yolov8_task") -# Step 2: Selecting the YOLOv8 Model -model_variant = "yolov8n" +# Step 2: Selecting the YOLO11 Model +model_variant = "yolo11n" task.set_parameter("model_variant", model_variant) -# Step 3: Loading the YOLOv8 Model +# Step 3: Loading the YOLO11 Model model = YOLO(f"{model_variant}.pt") # Step 4: Setting Up Training Arguments @@ -241,6 +241,6 @@ results = model.train(**args) Refer to our [Usage guide](#usage) for a detailed breakdown of these steps. -### Where can I view the results of my YOLOv8 training in ClearML? +### Where can I view the results of my YOLO11 training in ClearML? -After running your YOLOv8 training script with ClearML, you can view the results on the ClearML results page. The output will include a URL link to the ClearML dashboard, where you can track metrics, compare experiments, and monitor resource usage. For more details on how to view and interpret the results, check our section on [Viewing the ClearML Results Page](#viewing-the-clearml-results-page). +After running your YOLO11 training script with ClearML, you can view the results on the ClearML results page. The output will include a URL link to the ClearML dashboard, where you can track metrics, compare experiments, and monitor resource usage. For more details on how to view and interpret the results, check our section on [Viewing the ClearML Results Page](#viewing-the-clearml-results-page). diff --git a/docs/en/integrations/comet.md b/docs/en/integrations/comet.md index 2591973515f..24d69c04453 100644 --- a/docs/en/integrations/comet.md +++ b/docs/en/integrations/comet.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn to simplify the logging of YOLOv8 training with Comet ML. This guide covers installation, setup, real-time insights, and custom logging. -keywords: YOLOv8, Comet ML, logging, machine learning, training, model checkpoints, metrics, installation, configuration, real-time insights, custom logging +description: Learn to simplify the logging of YOLO11 training with Comet ML. This guide covers installation, setup, real-time insights, and custom logging. +keywords: YOLO11, Comet ML, logging, machine learning, training, model checkpoints, metrics, installation, configuration, real-time insights, custom logging --- -# Elevating YOLOv8 Training: Simplify Your Logging Process with Comet ML +# Elevating YOLO11 Training: Simplify Your Logging Process with Comet ML Logging key training details such as parameters, metrics, image predictions, and model checkpoints is essential in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml)โ€”it keeps your project transparent, your progress measurable, and your results repeatable. -[Ultralytics YOLOv8](https://www.ultralytics.com/) seamlessly integrates with Comet ML, efficiently capturing and optimizing every aspect of your YOLOv8 [object detection](https://www.ultralytics.com/glossary/object-detection) model's training process. In this guide, we'll cover the installation process, Comet ML setup, real-time insights, custom logging, and offline usage, ensuring that your YOLOv8 training is thoroughly documented and fine-tuned for outstanding results. +[Ultralytics YOLO11](https://www.ultralytics.com/) seamlessly integrates with Comet ML, efficiently capturing and optimizing every aspect of your YOLO11 [object detection](https://www.ultralytics.com/glossary/object-detection) model's training process. In this guide, we'll cover the installation process, Comet ML setup, real-time insights, custom logging, and offline usage, ensuring that your YOLO11 training is thoroughly documented and fine-tuned for outstanding results. ## Comet ML @@ -18,9 +18,9 @@ Logging key training details such as parameters, metrics, image predictions, and [Comet ML](https://www.comet.com/site/) is a platform for tracking, comparing, explaining, and optimizing machine learning models and experiments. It allows you to log metrics, parameters, media, and more during your model training and monitor your experiments through an aesthetically pleasing web interface. Comet ML helps data scientists iterate more rapidly, enhances transparency and reproducibility, and aids in the development of production models. -## Harnessing the Power of YOLOv8 and Comet ML +## Harnessing the Power of YOLO11 and Comet ML -By combining Ultralytics YOLOv8 with Comet ML, you unlock a range of benefits. These include simplified experiment management, real-time insights for quick adjustments, flexible and tailored logging options, and the ability to log experiments offline when internet access is limited. This integration empowers you to make data-driven decisions, analyze performance metrics, and achieve exceptional results. +By combining Ultralytics YOLO11 with Comet ML, you unlock a range of benefits. These include simplified experiment management, real-time insights for quick adjustments, flexible and tailored logging options, and the ability to log experiments offline when internet access is limited. This integration empowers you to make data-driven decisions, analyze performance metrics, and achieve exceptional results. ## Installation @@ -31,7 +31,7 @@ To install the required packages, run: === "CLI" ```bash - # Install the required packages for YOLOv8 and Comet ML + # Install the required packages for YOLO11 and Comet ML pip install ultralytics comet_ml torch torchvision ``` @@ -50,17 +50,21 @@ After installing the required packages, you'll need to sign up, get a [Comet API Then, you can initialize your Comet project. Comet will automatically detect the API key and proceed with the setup. -```python -import comet_ml +!!! example "Initialize Comet project" -comet_ml.login(project_name="comet-example-yolov8-coco128") -``` + === "Python" + + ```python + import comet_ml + + comet_ml.login(project_name="comet-example-yolo11-coco128") + ``` If you are using a Google Colab notebook, the code above will prompt you to enter your API key for initialization. ## Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/yolo11.md). This will help you choose the most appropriate model for your project requirements. !!! example "Usage" @@ -70,12 +74,12 @@ Before diving into the usage instructions, be sure to check out the range of [YO from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model results = model.train( data="coco8.yaml", - project="comet-example-yolov8-coco128", + project="comet-example-yolo11-coco128", batch=32, save_period=1, save_json=True, @@ -83,13 +87,13 @@ Before diving into the usage instructions, be sure to check out the range of [YO ) ``` -After running the training code, Comet ML will create an experiment in your Comet workspace to track the run automatically. You will then be provided with a link to view the detailed logging of your [YOLOv8 model's training](../modes/train.md) process. +After running the training code, Comet ML will create an experiment in your Comet workspace to track the run automatically. You will then be provided with a link to view the detailed logging of your [YOLO11 model's training](../modes/train.md) process. Comet automatically logs the following data with no additional configuration: metrics such as mAP and loss, hyperparameters, model checkpoints, interactive confusion matrix, and image [bounding box](https://www.ultralytics.com/glossary/bounding-box) predictions. ## Understanding Your Model's Performance with Comet ML Visualizations -Let's dive into what you'll see on the Comet ML dashboard once your YOLOv8 model begins training. The dashboard is where all the action happens, presenting a range of automatically logged information through visuals and statistics. Here's a quick tour: +Let's dive into what you'll see on the Comet ML dashboard once your YOLO11 model begins training. The dashboard is where all the action happens, presenting a range of automatically logged information through visuals and statistics. Here's a quick tour: **Experiment Panels** @@ -169,19 +173,19 @@ os.environ["COMET_MODE"] = "offline" ## Summary -This guide has walked you through integrating Comet ML with Ultralytics' YOLOv8. From installation to customization, you've learned to streamline experiment management, gain real-time insights, and adapt logging to your project's needs. +This guide has walked you through integrating Comet ML with Ultralytics' YOLO11. From installation to customization, you've learned to streamline experiment management, gain real-time insights, and adapt logging to your project's needs. -Explore [Comet ML's official documentation](https://www.comet.com/docs/v2/integrations/third-party-tools/yolov8/) for more insights on integrating with YOLOv8. +Explore [Comet ML's official documentation](https://www.comet.com/docs/v2/integrations/third-party-tools/yolov8/) for more insights on integrating with YOLO11. -Furthermore, if you're looking to dive deeper into the practical applications of YOLOv8, specifically for [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) tasks, this detailed guide on [fine-tuning YOLOv8 with Comet ML](https://www.comet.com/site/blog/fine-tuning-yolov8-for-image-segmentation-with-comet/) offers valuable insights and step-by-step instructions to enhance your model's performance. +Furthermore, if you're looking to dive deeper into the practical applications of YOLO11, specifically for [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) tasks, this detailed guide on [fine-tuning YOLO11 with Comet ML](https://www.comet.com/site/blog/fine-tuning-yolov8-for-image-segmentation-with-comet/) offers valuable insights and step-by-step instructions to enhance your model's performance. Additionally, to explore other exciting integrations with Ultralytics, check out the [integration guide page](../integrations/index.md), which offers a wealth of resources and information. ## FAQ -### How do I integrate Comet ML with Ultralytics YOLOv8 for training? +### How do I integrate Comet ML with Ultralytics YOLO11 for training? -To integrate Comet ML with Ultralytics YOLOv8, follow these steps: +To integrate Comet ML with Ultralytics YOLO11, follow these steps: 1. **Install the required packages**: @@ -200,18 +204,18 @@ To integrate Comet ML with Ultralytics YOLOv8, follow these steps: ```python import comet_ml - comet_ml.login(project_name="comet-example-yolov8-coco128") + comet_ml.login(project_name="comet-example-yolo11-coco128") ``` -4. **Train your YOLOv8 model and log metrics**: +4. **Train your YOLO11 model and log metrics**: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.train( data="coco8.yaml", - project="comet-example-yolov8-coco128", + project="comet-example-yolo11-coco128", batch=32, save_period=1, save_json=True, @@ -221,9 +225,9 @@ To integrate Comet ML with Ultralytics YOLOv8, follow these steps: For more detailed instructions, refer to the [Comet ML configuration section](#configuring-comet-ml). -### What are the benefits of using Comet ML with YOLOv8? +### What are the benefits of using Comet ML with YOLO11? -By integrating Ultralytics YOLOv8 with Comet ML, you can: +By integrating Ultralytics YOLO11 with Comet ML, you can: - **Monitor real-time insights**: Get instant feedback on your training results, allowing for quick adjustments. - **Log extensive metrics**: Automatically capture essential metrics such as mAP, loss, hyperparameters, and model checkpoints. @@ -232,7 +236,7 @@ By integrating Ultralytics YOLOv8 with Comet ML, you can: By leveraging these features, you can optimize your machine learning workflows for better performance and reproducibility. For more information, visit the [Comet ML integration guide](../integrations/index.md). -### How do I customize the logging behavior of Comet ML during YOLOv8 training? +### How do I customize the logging behavior of Comet ML during YOLO11 training? Comet ML allows for extensive customization of its logging behavior using environment variables: @@ -262,9 +266,9 @@ Comet ML allows for extensive customization of its logging behavior using enviro Refer to the [Customizing Comet ML Logging](#customizing-comet-ml-logging) section for more customization options. -### How do I view detailed metrics and visualizations of my YOLOv8 training on Comet ML? +### How do I view detailed metrics and visualizations of my YOLO11 training on Comet ML? -Once your YOLOv8 model starts training, you can access a wide range of metrics and visualizations on the Comet ML dashboard. Key features include: +Once your YOLO11 model starts training, you can access a wide range of metrics and visualizations on the Comet ML dashboard. Key features include: - **Experiment Panels**: View different runs and their metrics, including segment mask loss, class loss, and mean average [precision](https://www.ultralytics.com/glossary/precision). - **Metrics**: Examine metrics in tabular format for detailed analysis. @@ -273,7 +277,7 @@ Once your YOLOv8 model starts training, you can access a wide range of metrics a For a detailed overview of these features, visit the [Understanding Your Model's Performance with Comet ML Visualizations](#understanding-your-models-performance-with-comet-ml-visualizations) section. -### Can I use Comet ML for offline logging when training YOLOv8 models? +### Can I use Comet ML for offline logging when training YOLO11 models? Yes, you can enable offline logging in Comet ML by setting the `COMET_MODE` environment variable to "offline": diff --git a/docs/en/integrations/coreml.md b/docs/en/integrations/coreml.md index 352e1753048..41ff20d10f7 100644 --- a/docs/en/integrations/coreml.md +++ b/docs/en/integrations/coreml.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to export YOLOv8 models to CoreML for optimized, on-device machine learning on iOS and macOS. Follow step-by-step instructions. -keywords: CoreML export, YOLOv8 models, CoreML conversion, Ultralytics, iOS object detection, macOS machine learning, AI deployment, machine learning integration +description: Learn how to export YOLO11 models to CoreML for optimized, on-device machine learning on iOS and macOS. Follow step-by-step instructions. +keywords: CoreML export, YOLO11 models, CoreML conversion, Ultralytics, iOS object detection, macOS machine learning, AI deployment, machine learning integration --- -# CoreML Export for YOLOv8 Models +# CoreML Export for YOLO11 Models Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models on Apple devices like iPhones and Macs requires a format that ensures seamless performance. -The CoreML export format allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for efficient [object detection](https://www.ultralytics.com/glossary/object-detection) in iOS and macOS applications. In this guide, we'll walk you through the steps for converting your models to the CoreML format, making it easier for your models to perform well on Apple devices. +The CoreML export format allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for efficient [object detection](https://www.ultralytics.com/glossary/object-detection) in iOS and macOS applications. In this guide, we'll walk you through the steps for converting your models to the CoreML format, making it easier for your models to perform well on Apple devices. ## CoreML @@ -40,7 +40,7 @@ Apple's CoreML framework offers robust features for on-device machine learning. ## CoreML Deployment Options -Before we look at the code for exporting YOLOv8 models to the CoreML format, let's understand where CoreML models are usually used. +Before we look at the code for exporting YOLO11 models to the CoreML format, let's understand where CoreML models are usually used. CoreML offers various deployment options for machine learning models, including: @@ -52,9 +52,9 @@ CoreML offers various deployment options for machine learning models, including: - **Cloud-Based Deployment**: CoreML models are hosted on servers and accessed by the iOS app through API requests. This scalable and flexible option enables easy model updates without app revisions. It's ideal for complex models or large-scale apps requiring regular updates. However, it does require an internet connection and may pose latency and security issues. -## Exporting YOLOv8 Models to CoreML +## Exporting YOLO11 Models to CoreML -Exporting YOLOv8 to CoreML enables optimized, on-device machine learning performance within Apple's ecosystem, offering benefits in terms of efficiency, security, and seamless integration with iOS, macOS, watchOS, and tvOS platforms. +Exporting YOLO11 to CoreML enables optimized, on-device machine learning performance within Apple's ecosystem, offering benefits in terms of efficiency, security, and seamless integration with iOS, macOS, watchOS, and tvOS platforms. ### Installation @@ -65,15 +65,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. !!! example "Usage" @@ -82,14 +82,14 @@ Before diving into the usage instructions, be sure to check out the range of [YO ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to CoreML format - model.export(format="coreml") # creates 'yolov8n.mlpackage' + model.export(format="coreml") # creates 'yolo11n.mlpackage' # Load the exported CoreML model - coreml_model = YOLO("yolov8n.mlpackage") + coreml_model = YOLO("yolo11n.mlpackage") # Run inference results = coreml_model("https://ultralytics.com/images/bus.jpg") @@ -98,18 +98,18 @@ Before diving into the usage instructions, be sure to check out the range of [YO === "CLI" ```bash - # Export a YOLOv8n PyTorch model to CoreML format - yolo export model=yolov8n.pt format=coreml # creates 'yolov8n.mlpackage'' + # Export a YOLO11n PyTorch model to CoreML format + yolo export model=yolo11n.pt format=coreml # creates 'yolo11n.mlpackage'' # Run inference with the exported model - yolo predict model=yolov8n.mlpackage source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.mlpackage source='https://ultralytics.com/images/bus.jpg' ``` For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). -## Deploying Exported YOLOv8 CoreML Models +## Deploying Exported YOLO11 CoreML Models -Having successfully exported your Ultralytics YOLOv8 models to CoreML, the next critical phase is deploying these models effectively. For detailed guidance on deploying CoreML models in various environments, check out these resources: +Having successfully exported your Ultralytics YOLO11 models to CoreML, the next critical phase is deploying these models effectively. For detailed guidance on deploying CoreML models in various environments, check out these resources: - **[CoreML Tools](https://apple.github.io/coremltools/docs-guides/)**: This guide includes instructions and examples to convert models from [TensorFlow](https://www.ultralytics.com/glossary/tensorflow), PyTorch, and other libraries to Core ML. @@ -119,17 +119,17 @@ Having successfully exported your Ultralytics YOLOv8 models to CoreML, the next ## Summary -In this guide, we went over how to export Ultralytics YOLOv8 models to CoreML format. By following the steps outlined in this guide, you can ensure maximum compatibility and performance when exporting YOLOv8 models to CoreML. +In this guide, we went over how to export Ultralytics YOLO11 models to CoreML format. By following the steps outlined in this guide, you can ensure maximum compatibility and performance when exporting YOLO11 models to CoreML. For further details on usage, visit the [CoreML official documentation](https://developer.apple.com/documentation/coreml). -Also, if you'd like to know more about other Ultralytics YOLOv8 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of valuable resources and insights there. +Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of valuable resources and insights there. ## FAQ -### How do I export YOLOv8 models to CoreML format? +### How do I export YOLO11 models to CoreML format? -To export your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models to CoreML format, you'll first need to ensure you have the `ultralytics` package installed. You can install it using: +To export your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to CoreML format, you'll first need to ensure you have the `ultralytics` package installed. You can install it using: !!! example "Installation" @@ -148,21 +148,21 @@ Next, you can export the model using the following Python or CLI commands: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.export(format="coreml") ``` === "CLI" ```bash - yolo export model=yolov8n.pt format=coreml + yolo export model=yolo11n.pt format=coreml ``` -For further details, refer to the [Exporting YOLOv8 Models to CoreML](../modes/export.md) section of our documentation. +For further details, refer to the [Exporting YOLO11 Models to CoreML](../modes/export.md) section of our documentation. -### What are the benefits of using CoreML for deploying YOLOv8 models? +### What are the benefits of using CoreML for deploying YOLO11 models? -CoreML provides numerous advantages for deploying [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models on Apple devices: +CoreML provides numerous advantages for deploying [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models on Apple devices: - **On-device Processing**: Enables local model inference on devices, ensuring [data privacy](https://www.ultralytics.com/glossary/data-privacy) and minimizing latency. - **Performance Optimization**: Leverages the full potential of the device's CPU, GPU, and Neural Engine, optimizing both speed and efficiency. @@ -171,9 +171,9 @@ CoreML provides numerous advantages for deploying [Ultralytics YOLOv8](https://g For more details on integrating your CoreML model into an iOS app, check out the guide on [Integrating a Core ML Model into Your App](https://developer.apple.com/documentation/coreml/integrating-a-core-ml-model-into-your-app). -### What are the deployment options for YOLOv8 models exported to CoreML? +### What are the deployment options for YOLO11 models exported to CoreML? -Once you export your YOLOv8 model to CoreML format, you have multiple deployment options: +Once you export your YOLO11 model to CoreML format, you have multiple deployment options: 1. **On-Device Deployment**: Directly integrate CoreML models into your app for enhanced privacy and offline functionality. This can be done as: @@ -184,9 +184,9 @@ Once you export your YOLOv8 model to CoreML format, you have multiple deployment For detailed guidance on deploying CoreML models, refer to [CoreML Deployment Options](#coreml-deployment-options). -### How does CoreML ensure optimized performance for YOLOv8 models? +### How does CoreML ensure optimized performance for YOLO11 models? -CoreML ensures optimized performance for [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models by utilizing various optimization techniques: +CoreML ensures optimized performance for [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models by utilizing various optimization techniques: - **Hardware Acceleration**: Uses the device's CPU, GPU, and Neural Engine for efficient computation. - **Model Compression**: Provides tools for compressing models to reduce their footprint without compromising accuracy. @@ -205,14 +205,14 @@ Yes, you can run inference directly using the exported CoreML model. Below are t ```python from ultralytics import YOLO - coreml_model = YOLO("yolov8n.mlpackage") + coreml_model = YOLO("yolo11n.mlpackage") results = coreml_model("https://ultralytics.com/images/bus.jpg") ``` === "CLI" ```bash - yolo predict model=yolov8n.mlpackage source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.mlpackage source='https://ultralytics.com/images/bus.jpg' ``` For additional information, refer to the [Usage section](#usage) of the CoreML export guide. diff --git a/docs/en/integrations/dvc.md b/docs/en/integrations/dvc.md index 76ba91b4320..c90377e06fe 100644 --- a/docs/en/integrations/dvc.md +++ b/docs/en/integrations/dvc.md @@ -1,14 +1,14 @@ --- comments: true -description: Unlock seamless YOLOv8 tracking with DVCLive. Discover how to log, visualize, and analyze experiments for optimized ML model performance. -keywords: YOLOv8, DVCLive, experiment tracking, machine learning, model training, data visualization, Git integration +description: Unlock seamless YOLO11 tracking with DVCLive. Discover how to log, visualize, and analyze experiments for optimized ML model performance. +keywords: YOLO11, DVCLive, experiment tracking, machine learning, model training, data visualization, Git integration --- -# Advanced YOLOv8 Experiment Tracking with DVCLive +# Advanced YOLO11 Experiment Tracking with DVCLive Experiment tracking in [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) is critical to model development and evaluation. It involves recording and analyzing various parameters, metrics, and outcomes from numerous training runs. This process is essential for understanding model performance and making data-driven decisions to refine and optimize models. -Integrating DVCLive with [Ultralytics YOLOv8](https://www.ultralytics.com/) transforms the way experiments are tracked and managed. This integration offers a seamless solution for automatically logging key experiment details, comparing results across different runs, and visualizing data for in-depth analysis. In this guide, we'll understand how DVCLive can be used to streamline the process. +Integrating DVCLive with [Ultralytics YOLO11](https://www.ultralytics.com/) transforms the way experiments are tracked and managed. This integration offers a seamless solution for automatically logging key experiment details, comparing results across different runs, and visualizing data for in-depth analysis. In this guide, we'll understand how DVCLive can be used to streamline the process. ## DVCLive @@ -18,9 +18,9 @@ Integrating DVCLive with [Ultralytics YOLOv8](https://www.ultralytics.com/) tran [DVCLive](https://dvc.org/doc/dvclive), developed by DVC, is an innovative open-source tool for experiment tracking in machine learning. Integrating seamlessly with Git and DVC, it automates the logging of crucial experiment data like model parameters and training metrics. Designed for simplicity, DVCLive enables effortless comparison and analysis of multiple runs, enhancing the efficiency of machine learning projects with intuitive [data visualization](https://www.ultralytics.com/glossary/data-visualization) and analysis tools. -## YOLOv8 Training with DVCLive +## YOLO11 Training with DVCLive -YOLOv8 training sessions can be effectively monitored with DVCLive. Additionally, DVC provides integral features for visualizing these experiments, including the generation of a report that enables the comparison of metric plots across all tracked experiments, offering a comprehensive view of the training process. +YOLO11 training sessions can be effectively monitored with DVCLive. Additionally, DVC provides integral features for visualizing these experiments, including the generation of a report that enables the comparison of metric plots across all tracked experiments, offering a comprehensive view of the training process. ## Installation @@ -31,11 +31,11 @@ To install the required packages, run: === "CLI" ```bash - # Install the required packages for YOLOv8 and DVCLive + # Install the required packages for YOLO11 and DVCLive pip install ultralytics dvclive ``` -For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ## Configuring DVCLive @@ -66,27 +66,27 @@ In these commands, ensure to replace "you@example.com" with the email address as ## Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. -### Training YOLOv8 Models with DVCLive +### Training YOLO11 Models with DVCLive -Start by running your YOLOv8 training sessions. You can use different model configurations and training parameters to suit your project needs. For instance: +Start by running your YOLO11 training sessions. You can use different model configurations and training parameters to suit your project needs. For instance: ```bash -# Example training commands for YOLOv8 with varying configurations -yolo train model=yolov8n.pt data=coco8.yaml epochs=5 imgsz=512 -yolo train model=yolov8n.pt data=coco8.yaml epochs=5 imgsz=640 +# Example training commands for YOLO11 with varying configurations +yolo train model=yolo11n.pt data=coco8.yaml epochs=5 imgsz=512 +yolo train model=yolo11n.pt data=coco8.yaml epochs=5 imgsz=640 ``` -Adjust the model, data, [epochs](https://www.ultralytics.com/glossary/epoch), and imgsz parameters according to your specific requirements. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). +Adjust the model, data, [epochs](https://www.ultralytics.com/glossary/epoch), and imgsz parameters according to your specific requirements. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md). ### Monitoring Experiments with DVCLive -DVCLive enhances the training process by enabling the tracking and visualization of key metrics. When installed, Ultralytics YOLOv8 automatically integrates with DVCLive for experiment tracking, which you can later analyze for performance insights. For a comprehensive understanding of the specific performance metrics used during training, be sure to explore [our detailed guide on performance metrics](../guides/yolo-performance-metrics.md). +DVCLive enhances the training process by enabling the tracking and visualization of key metrics. When installed, Ultralytics YOLO11 automatically integrates with DVCLive for experiment tracking, which you can later analyze for performance insights. For a comprehensive understanding of the specific performance metrics used during training, be sure to explore [our detailed guide on performance metrics](../guides/yolo-performance-metrics.md). ### Analyzing Results -After your YOLOv8 training sessions are complete, you can leverage DVCLive's powerful visualization tools for in-depth analysis of the results. DVCLive's integration ensures that all training metrics are systematically logged, facilitating a comprehensive evaluation of your model's performance. +After your YOLO11 training sessions are complete, you can leverage DVCLive's powerful visualization tools for in-depth analysis of the results. DVCLive's integration ensures that all training metrics are systematically logged, facilitating a comprehensive evaluation of your model's performance. To start the analysis, you can extract the experiment data using DVC's API and process it with Pandas for easier handling and visualization: @@ -108,7 +108,7 @@ df.reset_index(drop=True, inplace=True) print(df) ``` -The output of the code snippet above provides a clear tabular view of the different experiments conducted with YOLOv8 models. Each row represents a different training run, detailing the experiment's name, the number of epochs, image size (imgsz), the specific model used, and the mAP50-95(B) metric. This metric is crucial for evaluating the model's [accuracy](https://www.ultralytics.com/glossary/accuracy), with higher values indicating better performance. +The output of the code snippet above provides a clear tabular view of the different experiments conducted with YOLO11 models. Each row represents a different training run, detailing the experiment's name, the number of epochs, image size (imgsz), the specific model used, and the mAP50-95(B) metric. This metric is crucial for evaluating the model's [accuracy](https://www.ultralytics.com/glossary/accuracy), with higher values indicating better performance. #### Visualizing Results with Plotly @@ -164,7 +164,7 @@ Based on your analysis, iterate on your experiments. Adjust model configurations ## Summary -This guide has led you through the process of integrating DVCLive with Ultralytics' YOLOv8. You have learned how to harness the power of DVCLive for detailed experiment monitoring, effective visualization, and insightful analysis in your machine learning endeavors. +This guide has led you through the process of integrating DVCLive with Ultralytics' YOLO11. You have learned how to harness the power of DVCLive for detailed experiment monitoring, effective visualization, and insightful analysis in your machine learning endeavors. For further details on usage, visit [DVCLive's official documentation](https://dvc.org/doc/dvclive/ml-frameworks/yolo). @@ -172,9 +172,9 @@ Additionally, explore more integrations and capabilities of Ultralytics by visit ## FAQ -### How do I integrate DVCLive with Ultralytics YOLOv8 for experiment tracking? +### How do I integrate DVCLive with Ultralytics YOLO11 for experiment tracking? -Integrating DVCLive with Ultralytics YOLOv8 is straightforward. Start by installing the necessary packages: +Integrating DVCLive with Ultralytics YOLO11 is straightforward. Start by installing the necessary packages: !!! example "Installation" @@ -198,21 +198,21 @@ Next, initialize a Git repository and configure DVCLive in your project: git commit -m "DVC init" ``` -Follow our [YOLOv8 Installation guide](../quickstart.md) for detailed setup instructions. +Follow our [YOLO11 Installation guide](../quickstart.md) for detailed setup instructions. -### Why should I use DVCLive for tracking YOLOv8 experiments? +### Why should I use DVCLive for tracking YOLO11 experiments? -Using DVCLive with YOLOv8 provides several advantages, such as: +Using DVCLive with YOLO11 provides several advantages, such as: - **Automated Logging**: DVCLive automatically records key experiment details like model parameters and metrics. - **Easy Comparison**: Facilitates comparison of results across different runs. - **Visualization Tools**: Leverages DVCLive's robust data visualization capabilities for in-depth analysis. -For further details, refer to our guide on [YOLOv8 Model Training](../modes/train.md) and [YOLO Performance Metrics](../guides/yolo-performance-metrics.md) to maximize your experiment tracking efficiency. +For further details, refer to our guide on [YOLO11 Model Training](../modes/train.md) and [YOLO Performance Metrics](../guides/yolo-performance-metrics.md) to maximize your experiment tracking efficiency. -### How can DVCLive improve my results analysis for YOLOv8 training sessions? +### How can DVCLive improve my results analysis for YOLO11 training sessions? -After completing your YOLOv8 training sessions, DVCLive helps in visualizing and analyzing the results effectively. Example code for loading and displaying experiment data: +After completing your YOLO11 training sessions, DVCLive helps in visualizing and analyzing the results effectively. Example code for loading and displaying experiment data: ```python import dvc.api @@ -241,11 +241,11 @@ fig = parallel_coordinates(df, columns, color="metrics.mAP50-95(B)") fig.show() ``` -Refer to our guide on [YOLOv8 Training with DVCLive](#yolov8-training-with-dvclive) for more examples and best practices. +Refer to our guide on [YOLO11 Training with DVCLive](#yolo11-training-with-dvclive) for more examples and best practices. -### What are the steps to configure my environment for DVCLive and YOLOv8 integration? +### What are the steps to configure my environment for DVCLive and YOLO11 integration? -To configure your environment for a smooth integration of DVCLive and YOLOv8, follow these steps: +To configure your environment for a smooth integration of DVCLive and YOLO11, follow these steps: 1. **Install Required Packages**: Use `pip install ultralytics dvclive`. 2. **Initialize Git Repository**: Run `git init -q`. @@ -254,9 +254,9 @@ To configure your environment for a smooth integration of DVCLive and YOLOv8, fo These steps ensure proper version control and setup for experiment tracking. For in-depth configuration details, visit our [Configuration guide](../quickstart.md). -### How do I visualize YOLOv8 experiment results using DVCLive? +### How do I visualize YOLO11 experiment results using DVCLive? -DVCLive offers powerful tools to visualize the results of YOLOv8 experiments. Here's how you can generate comparative plots: +DVCLive offers powerful tools to visualize the results of YOLO11 experiments. Here's how you can generate comparative plots: !!! example "Generate Comparative Plots" @@ -275,4 +275,4 @@ from IPython.display import HTML HTML(filename="./dvc_plots/index.html") ``` -These visualizations help identify trends and optimize model performance. Check our detailed guides on [YOLOv8 Experiment Analysis](#analyzing-results) for comprehensive steps and examples. +These visualizations help identify trends and optimize model performance. Check our detailed guides on [YOLO11 Experiment Analysis](#analyzing-results) for comprehensive steps and examples. diff --git a/docs/en/integrations/edge-tpu.md b/docs/en/integrations/edge-tpu.md index d72410c5a77..f8821e47bd4 100644 --- a/docs/en/integrations/edge-tpu.md +++ b/docs/en/integrations/edge-tpu.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to export YOLOv8 models to TFLite Edge TPU format for high-speed, low-power inferencing on mobile and embedded devices. -keywords: YOLOv8, TFLite Edge TPU, TensorFlow Lite, model export, machine learning, edge computing, neural networks, Ultralytics +description: Learn how to export YOLO11 models to TFLite Edge TPU format for high-speed, low-power inferencing on mobile and embedded devices. +keywords: YOLO11, TFLite Edge TPU, TensorFlow Lite, model export, machine learning, edge computing, neural networks, Ultralytics --- -# Learn to Export to TFLite Edge TPU Format From YOLOv8 Model +# Learn to Export to TFLite Edge TPU Format From YOLO11 Model Deploying computer vision models on devices with limited computational power, such as mobile or embedded systems, can be tricky. Using a model format that is optimized for faster performance simplifies the process. The [TensorFlow Lite](https://ai.google.dev/edge/litert) [Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) or TFLite Edge TPU model format is designed to use minimal power while delivering fast performance for neural networks. -The export to TFLite Edge TPU format feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through converting your models to the TFLite Edge TPU format, making it easier for your models to perform well on various mobile and embedded devices. +The export to TFLite Edge TPU format feature allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through converting your models to the TFLite Edge TPU format, making it easier for your models to perform well on various mobile and embedded devices. ## Why Should You Export to TFLite Edge TPU? @@ -32,7 +32,7 @@ Here are the key features that make TFLite Edge TPU a great model format choice ## Deployment Options with TFLite Edge TPU -Before we jump into how to export YOLOv8 models to the TFLite Edge TPU format, let's understand where TFLite Edge TPU models are usually used. +Before we jump into how to export YOLO11 models to the TFLite Edge TPU format, let's understand where TFLite Edge TPU models are usually used. TFLite Edge TPU offers various deployment options for machine learning models, including: @@ -42,9 +42,9 @@ TFLite Edge TPU offers various deployment options for machine learning models, i - **Hybrid Deployment**: A hybrid approach combines on-device and cloud deployment and offers a versatile and scalable solution for deploying machine learning models. Advantages include on-device processing for quick responses and [cloud computing](https://www.ultralytics.com/glossary/cloud-computing) for more complex computations. -## Exporting YOLOv8 Models to TFLite Edge TPU +## Exporting YOLO11 Models to TFLite Edge TPU -You can expand model compatibility and deployment flexibility by converting YOLOv8 models to TensorFlow Edge TPU. +You can expand model compatibility and deployment flexibility by converting YOLO11 models to TensorFlow Edge TPU. ### Installation @@ -55,15 +55,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -72,14 +72,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TFLite Edge TPU format - model.export(format="edgetpu") # creates 'yolov8n_full_integer_quant_edgetpu.tflite' + model.export(format="edgetpu") # creates 'yolo11n_full_integer_quant_edgetpu.tflite' # Load the exported TFLite Edge TPU model - edgetpu_model = YOLO("yolov8n_full_integer_quant_edgetpu.tflite") + edgetpu_model = YOLO("yolo11n_full_integer_quant_edgetpu.tflite") # Run inference results = edgetpu_model("https://ultralytics.com/images/bus.jpg") @@ -88,22 +88,22 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TFLite Edge TPU format - yolo export model=yolov8n.pt format=edgetpu # creates 'yolov8n_full_integer_quant_edgetpu.tflite' + # Export a YOLO11n PyTorch model to TFLite Edge TPU format + yolo export model=yolo11n.pt format=edgetpu # creates 'yolo11n_full_integer_quant_edgetpu.tflite' # Run inference with the exported model - yolo predict model=yolov8n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -## Deploying Exported YOLOv8 TFLite Edge TPU Models +## Deploying Exported YOLO11 TFLite Edge TPU Models -After successfully exporting your Ultralytics YOLOv8 models to TFLite Edge TPU format, you can now deploy them. The primary and recommended first step for running a TFLite Edge TPU model is to use the YOLO("model_edgetpu.tflite") method, as outlined in the previous usage code snippet. +After successfully exporting your Ultralytics YOLO11 models to TFLite Edge TPU format, you can now deploy them. The primary and recommended first step for running a TFLite Edge TPU model is to use the YOLO("model_edgetpu.tflite") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your TFLite Edge TPU models, take a look at the following resources: -- **[Coral Edge TPU on a Raspberry Pi with Ultralytics YOLOv8](../guides/coral-edge-tpu-on-raspberry-pi.md)**: Discover how to integrate Coral Edge TPUs with Raspberry Pi for enhanced machine learning capabilities. +- **[Coral Edge TPU on a Raspberry Pi with Ultralytics YOLO11](../guides/coral-edge-tpu-on-raspberry-pi.md)**: Discover how to integrate Coral Edge TPUs with Raspberry Pi for enhanced machine learning capabilities. - **[Code Examples](https://coral.ai/docs/edgetpu/compiler/)**: Access practical TensorFlow Edge TPU deployment examples to kickstart your projects. @@ -111,17 +111,17 @@ However, for in-depth instructions on deploying your TFLite Edge TPU models, tak ## Summary -In this guide, we've learned how to export Ultralytics YOLOv8 models to TFLite Edge TPU format. By following the steps mentioned above, you can increase the speed and power of your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. +In this guide, we've learned how to export Ultralytics YOLO11 models to TFLite Edge TPU format. By following the steps mentioned above, you can increase the speed and power of your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. For further details on usage, visit the [Edge TPU official website](https://cloud.google.com/tpu). -Also, for more information on other Ultralytics YOLOv8 integrations, please visit our [integration guide page](index.md). There, you'll discover valuable resources and insights. +Also, for more information on other Ultralytics YOLO11 integrations, please visit our [integration guide page](index.md). There, you'll discover valuable resources and insights. ## FAQ -### How do I export a YOLOv8 model to TFLite Edge TPU format? +### How do I export a YOLO11 model to TFLite Edge TPU format? -To export a YOLOv8 model to TFLite Edge TPU format, you can follow these steps: +To export a YOLO11 model to TFLite Edge TPU format, you can follow these steps: !!! example "Usage" @@ -130,14 +130,14 @@ To export a YOLOv8 model to TFLite Edge TPU format, you can follow these steps: ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TFLite Edge TPU format - model.export(format="edgetpu") # creates 'yolov8n_full_integer_quant_edgetpu.tflite' + model.export(format="edgetpu") # creates 'yolo11n_full_integer_quant_edgetpu.tflite' # Load the exported TFLite Edge TPU model - edgetpu_model = YOLO("yolov8n_full_integer_quant_edgetpu.tflite") + edgetpu_model = YOLO("yolo11n_full_integer_quant_edgetpu.tflite") # Run inference results = edgetpu_model("https://ultralytics.com/images/bus.jpg") @@ -146,18 +146,18 @@ To export a YOLOv8 model to TFLite Edge TPU format, you can follow these steps: === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TFLite Edge TPU format - yolo export model=yolov8n.pt format=edgetpu # creates 'yolov8n_full_integer_quant_edgetpu.tflite' + # Export a YOLO11n PyTorch model to TFLite Edge TPU format + yolo export model=yolo11n.pt format=edgetpu # creates 'yolo11n_full_integer_quant_edgetpu.tflite' # Run inference with the exported model - yolo predict model=yolov8n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n_full_integer_quant_edgetpu.tflite source='https://ultralytics.com/images/bus.jpg' ``` For complete details on exporting models to other formats, refer to our [export guide](../modes/export.md). -### What are the benefits of exporting YOLOv8 models to TFLite Edge TPU? +### What are the benefits of exporting YOLO11 models to TFLite Edge TPU? -Exporting YOLOv8 models to TFLite Edge TPU offers several benefits: +Exporting YOLO11 models to TFLite Edge TPU offers several benefits: - **Optimized Performance**: Achieve high-speed neural network performance with minimal power consumption. - **Reduced Latency**: Quick local data processing without the need for cloud dependency. diff --git a/docs/en/integrations/google-colab.md b/docs/en/integrations/google-colab.md index 2c45528c192..2c242f6f824 100644 --- a/docs/en/integrations/google-colab.md +++ b/docs/en/integrations/google-colab.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to efficiently train Ultralytics YOLOv8 models using Google Colab's powerful cloud-based environment. Start your project with ease. -keywords: YOLOv8, Google Colab, machine learning, deep learning, model training, GPU, TPU, cloud computing, Jupyter Notebook, Ultralytics +description: Learn how to efficiently train Ultralytics YOLO11 models using Google Colab's powerful cloud-based environment. Start your project with ease. +keywords: YOLO11, Google Colab, machine learning, deep learning, model training, GPU, TPU, cloud computing, Jupyter Notebook, Ultralytics --- -# Accelerating YOLOv8 Projects with Google Colab +# Accelerating YOLO11 Projects with Google Colab Many developers lack the powerful computing resources needed to build [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models. Acquiring high-end hardware or renting a decent GPU can be expensive. Google Colab is a great solution to this. It's a browser-based platform that allows you to work with large datasets, develop complex models, and share your work with others without a huge cost. -You can use Google Colab to work on projects related to [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models. Google Colab's user-friendly environment is well suited for efficient model development and experimentation. Let's learn more about Google Colab, its key features, and how you can use it to train YOLOv8 models. +You can use Google Colab to work on projects related to [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models. Google Colab's user-friendly environment is well suited for efficient model development and experimentation. Let's learn more about Google Colab, its key features, and how you can use it to train YOLO11 models. ## Google Colaboratory @@ -16,15 +16,15 @@ Google Colaboratory, commonly known as Google Colab, was developed by Google Res You can use Google Colab regardless of the specifications and configurations of your local computer. All you need is a Google account and a web browser, and you're good to go. -## Training YOLOv8 Using Google Colaboratory +## Training YOLO11 Using Google Colaboratory -Training YOLOv8 models on Google Colab is pretty straightforward. Thanks to the integration, you can access the [Google Colab YOLOv8 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). +Training YOLO11 models on Google Colab is pretty straightforward. Thanks to the integration, you can access the [Google Colab YOLO11 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md). Sign in to your Google account and run the notebook's cells to train your model. -![Training YOLOv8 Using Google Colab](https://github.com/ultralytics/docs/releases/download/0/training-yolov8-using-google-colab.avif) +![Training YOLO11 Using Google Colab](https://github.com/ultralytics/docs/releases/download/0/training-yolov8-using-google-colab.avif) -Learn how to train a YOLOv8 model with custom data on YouTube with Nicolai. Check out the guide below. +Learn how to train a YOLO11 model with custom data on YouTube with Nicolai. Check out the guide below.


@@ -34,7 +34,7 @@ Learn how to train a YOLOv8 model with custom data on YouTube with Nicolai. Chec allowfullscreen>
- Watch: How to Train Ultralytics YOLOv8 models on Your Custom Dataset in Google Colab | Episode 3 + Watch: How to Train Ultralytics YOLO11 models on Your Custom Dataset in Google Colab | Episode 3

### Common Questions While Working with Google Colab @@ -75,9 +75,9 @@ Now, let's look at some of the standout features that make Google Colab a go-to - **Educational Resources:** Google Colab offers a range of tutorials and example notebooks to help users learn and explore various functionalities. -## Why Should You Use Google Colab for Your YOLOv8 Projects? +## Why Should You Use Google Colab for Your YOLO11 Projects? -There are many options for training and evaluating YOLOv8 models, so what makes the integration with Google Colab unique? Let's explore the advantages of this integration: +There are many options for training and evaluating YOLO11 models, so what makes the integration with Google Colab unique? Let's explore the advantages of this integration: - **Zero Setup:** Since Colab runs in the cloud, users can start training models immediately without the need for complex environment setups. Just create an account and start coding. @@ -95,7 +95,7 @@ There are many options for training and evaluating YOLOv8 models, so what makes If you'd like to dive deeper into Google Colab, here are a few resources to guide you. -- **[Training Custom Datasets with Ultralytics YOLOv8 in Google Colab](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab)**: Learn how to train custom datasets with Ultralytics YOLOv8 on Google Colab. This comprehensive blog post will take you through the entire process, from initial setup to the training and evaluation stages. +- **[Training Custom Datasets with Ultralytics YOLO11 in Google Colab](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab)**: Learn how to train custom datasets with Ultralytics YOLO11 on Google Colab. This comprehensive blog post will take you through the entire process, from initial setup to the training and evaluation stages. - **[Curated Notebooks](https://colab.google/notebooks/)**: Here you can explore a series of organized and educational notebooks, each grouped by specific topic areas. @@ -103,21 +103,21 @@ If you'd like to dive deeper into Google Colab, here are a few resources to guid ## Summary -We've discussed how you can easily experiment with Ultralytics YOLOv8 models on Google Colab. You can use Google Colab to train and evaluate your models on GPUs and TPUs with a few clicks. +We've discussed how you can easily experiment with Ultralytics YOLO11 models on Google Colab. You can use Google Colab to train and evaluate your models on GPUs and TPUs with a few clicks. For more details, visit [Google Colab's FAQ page](https://research.google.com/colaboratory/intl/en-GB/faq.html). -Interested in more YOLOv8 integrations? Visit the [Ultralytics integration guide page](index.md) to explore additional tools and capabilities that can improve your machine-learning projects. +Interested in more YOLO11 integrations? Visit the [Ultralytics integration guide page](index.md) to explore additional tools and capabilities that can improve your machine-learning projects. ## FAQ -### How do I start training Ultralytics YOLOv8 models on Google Colab? +### How do I start training Ultralytics YOLO11 models on Google Colab? -To start training Ultralytics YOLOv8 models on Google Colab, sign in to your Google account, then access the [Google Colab YOLOv8 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb). This notebook guides you through the setup and training process. After launching the notebook, run the cells step-by-step to train your model. For a full guide, refer to the [YOLOv8 Model Training guide](../modes/train.md). +To start training Ultralytics YOLO11 models on Google Colab, sign in to your Google account, then access the [Google Colab YOLO11 Notebook](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb). This notebook guides you through the setup and training process. After launching the notebook, run the cells step-by-step to train your model. For a full guide, refer to the [YOLO11 Model Training guide](../modes/train.md). -### What are the advantages of using Google Colab for training YOLOv8 models? +### What are the advantages of using Google Colab for training YOLO11 models? -Google Colab offers several advantages for training YOLOv8 models: +Google Colab offers several advantages for training YOLO11 models: - **Zero Setup:** No initial environment setup is required; just log in and start coding. - **Free GPU Access:** Use powerful GPUs or TPUs without the need for expensive hardware. @@ -126,7 +126,7 @@ Google Colab offers several advantages for training YOLOv8 models: For more information on why you should use Google Colab, explore the [training guide](../modes/train.md) and visit the [Google Colab page](https://colab.google/notebooks/). -### How can I handle Google Colab session timeouts during YOLOv8 training? +### How can I handle Google Colab session timeouts during YOLO11 training? Google Colab sessions timeout due to inactivity, especially for free users. To handle this: @@ -136,9 +136,9 @@ Google Colab sessions timeout due to inactivity, especially for free users. To h For more tips on managing your Colab session, visit the [Google Colab FAQ page](https://research.google.com/colaboratory/intl/en-GB/faq.html). -### Can I use custom datasets for training YOLOv8 models in Google Colab? +### Can I use custom datasets for training YOLO11 models in Google Colab? -Yes, you can use custom datasets to train YOLOv8 models in Google Colab. Upload your dataset to Google Drive and load it directly into your Colab notebook. You can follow Nicolai's YouTube guide, [How to Train YOLOv8 Models on Your Custom Dataset](https://www.youtube.com/watch?v=LNwODJXcvt4), or refer to the [Custom Dataset Training guide](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab) for detailed steps. +Yes, you can use custom datasets to train YOLO11 models in Google Colab. Upload your dataset to Google Drive and load it directly into your Colab notebook. You can follow Nicolai's YouTube guide, [How to Train YOLO11 Models on Your Custom Dataset](https://www.youtube.com/watch?v=LNwODJXcvt4), or refer to the [Custom Dataset Training guide](https://www.ultralytics.com/blog/training-custom-datasets-with-ultralytics-yolov8-in-google-colab) for detailed steps. ### What should I do if my Google Colab training session is interrupted? diff --git a/docs/en/integrations/gradio.md b/docs/en/integrations/gradio.md index b8a3644a6b3..3199a519cc0 100644 --- a/docs/en/integrations/gradio.md +++ b/docs/en/integrations/gradio.md @@ -1,14 +1,14 @@ --- comments: true -description: Discover an interactive way to perform object detection with Ultralytics YOLOv8 using Gradio. Upload images and adjust settings for real-time results. -keywords: Ultralytics, YOLOv8, Gradio, object detection, interactive, real-time, image processing, AI +description: Discover an interactive way to perform object detection with Ultralytics YOLO11 using Gradio. Upload images and adjust settings for real-time results. +keywords: Ultralytics, YOLO11, Gradio, object detection, interactive, real-time, image processing, AI --- -# Interactive [Object Detection](https://www.ultralytics.com/glossary/object-detection): Gradio & Ultralytics YOLOv8 ๐Ÿš€ +# Interactive [Object Detection](https://www.ultralytics.com/glossary/object-detection): Gradio & Ultralytics YOLO11 ๐Ÿš€ ## Introduction to Interactive Object Detection -This Gradio interface provides an easy and interactive way to perform object detection using the [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) model. Users can upload images and adjust parameters like confidence threshold and intersection-over-union (IoU) threshold to get real-time detection results. +This Gradio interface provides an easy and interactive way to perform object detection using the [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) model. Users can upload images and adjust parameters like confidence threshold and intersection-over-union (IoU) threshold to get real-time detection results.


@@ -18,7 +18,7 @@ This Gradio interface provides an easy and interactive way to perform object det allowfullscreen>
- Watch: Gradio Integration with Ultralytics YOLOv8 + Watch: Gradio Integration with Ultralytics YOLO11

## Why Use Gradio for Object Detection? @@ -52,7 +52,7 @@ pip install gradio ## Usage Example -This section provides the Python code used to create the Gradio interface with the Ultralytics YOLOv8 model. Supports classification tasks, detection tasks, segmentation tasks, and key point tasks. +This section provides the Python code used to create the Gradio interface with the Ultralytics YOLO11 model. Supports classification tasks, detection tasks, segmentation tasks, and key point tasks. ```python import gradio as gr @@ -60,11 +60,11 @@ import PIL.Image as Image from ultralytics import ASSETS, YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") def predict_image(img, conf_threshold, iou_threshold): - """Predicts and plots labeled objects in an image using YOLOv8 model with adjustable confidence and IOU thresholds.""" + """Predicts objects in an image using a YOLO11 model with adjustable confidence and IOU thresholds.""" results = model.predict( source=img, conf=conf_threshold, @@ -90,7 +90,7 @@ iface = gr.Interface( ], outputs=gr.Image(type="pil", label="Result"), title="Ultralytics Gradio", - description="Upload images for inference. The Ultralytics YOLOv8n model is used by default.", + description="Upload images for inference. The Ultralytics YOLO11n model is used by default.", examples=[ [ASSETS / "bus.jpg", 0.25, 0.45], [ASSETS / "zidane.jpg", 0.25, 0.45], @@ -119,9 +119,9 @@ if __name__ == "__main__": ## FAQ -### How do I use Gradio with Ultralytics YOLOv8 for object detection? +### How do I use Gradio with Ultralytics YOLO11 for object detection? -To use Gradio with Ultralytics YOLOv8 for object detection, you can follow these steps: +To use Gradio with Ultralytics YOLO11 for object detection, you can follow these steps: 1. **Install Gradio:** Use the command `pip install gradio`. 2. **Create Interface:** Write a Python script to initialize the Gradio interface. You can refer to the provided code example in the [documentation](#usage-example) for details. @@ -134,7 +134,7 @@ import gradio as gr from ultralytics import YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") def predict_image(img, conf_threshold, iou_threshold): @@ -156,15 +156,15 @@ iface = gr.Interface( gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU threshold"), ], outputs=gr.Image(type="pil", label="Result"), - title="Ultralytics Gradio YOLOv8", - description="Upload images for YOLOv8 object detection.", + title="Ultralytics Gradio YOLO11", + description="Upload images for YOLO11 object detection.", ) iface.launch() ``` -### What are the benefits of using Gradio for Ultralytics YOLOv8 object detection? +### What are the benefits of using Gradio for Ultralytics YOLO11 object detection? -Using Gradio for Ultralytics YOLOv8 object detection offers several benefits: +Using Gradio for Ultralytics YOLO11 object detection offers several benefits: - **User-Friendly Interface:** Gradio provides an intuitive interface for users to upload images and visualize detection results without any coding effort. - **Real-Time Adjustments:** You can dynamically adjust detection parameters such as confidence and IoU thresholds and see the effects immediately. @@ -172,22 +172,22 @@ Using Gradio for Ultralytics YOLOv8 object detection offers several benefits: For more details, you can read this [blog post](https://www.ultralytics.com/blog/ai-and-radiology-a-new-era-of-precision-and-efficiency). -### Can I use Gradio and Ultralytics YOLOv8 together for educational purposes? +### Can I use Gradio and Ultralytics YOLO11 together for educational purposes? -Yes, Gradio and Ultralytics YOLOv8 can be utilized together for educational purposes effectively. Gradio's intuitive web interface makes it easy for students and educators to interact with state-of-the-art [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models like Ultralytics YOLOv8 without needing advanced programming skills. This setup is ideal for demonstrating key concepts in object detection and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), as Gradio provides immediate visual feedback which helps in understanding the impact of different parameters on the detection performance. +Yes, Gradio and Ultralytics YOLO11 can be utilized together for educational purposes effectively. Gradio's intuitive web interface makes it easy for students and educators to interact with state-of-the-art [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models like Ultralytics YOLO11 without needing advanced programming skills. This setup is ideal for demonstrating key concepts in object detection and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), as Gradio provides immediate visual feedback which helps in understanding the impact of different parameters on the detection performance. -### How do I adjust the confidence and IoU thresholds in the Gradio interface for YOLOv8? +### How do I adjust the confidence and IoU thresholds in the Gradio interface for YOLO11? -In the Gradio interface for YOLOv8, you can adjust the confidence and IoU thresholds using the sliders provided. These thresholds help control the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) and object separation: +In the Gradio interface for YOLO11, you can adjust the confidence and IoU thresholds using the sliders provided. These thresholds help control the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) and object separation: - **Confidence Threshold:** Determines the minimum confidence level for detecting objects. Slide to increase or decrease the confidence required. - **IoU Threshold:** Sets the intersection-over-union threshold for distinguishing between overlapping objects. Adjust this value to refine object separation. For more information on these parameters, visit the [parameters explanation section](#parameters-explanation). -### What are some practical applications of using Ultralytics YOLOv8 with Gradio? +### What are some practical applications of using Ultralytics YOLO11 with Gradio? -Practical applications of combining Ultralytics YOLOv8 with Gradio include: +Practical applications of combining Ultralytics YOLO11 with Gradio include: - **Real-Time Object Detection Demonstrations:** Ideal for showcasing how object detection works in real-time. - **Educational Tools:** Useful in academic settings to teach object detection and computer vision concepts. @@ -196,4 +196,4 @@ Practical applications of combining Ultralytics YOLOv8 with Gradio include: For examples of similar use cases, check out the [Ultralytics blog](https://www.ultralytics.com/blog/monitoring-animal-behavior-using-ultralytics-yolov8). -Providing this information within the documentation will help in enhancing the usability and accessibility of Ultralytics YOLOv8, making it more approachable for users at all levels of expertise. +Providing this information within the documentation will help in enhancing the usability and accessibility of Ultralytics YOLO11, making it more approachable for users at all levels of expertise. diff --git a/docs/en/integrations/ibm-watsonx.md b/docs/en/integrations/ibm-watsonx.md index cda19b055ce..0e77bc5e1bc 100644 --- a/docs/en/integrations/ibm-watsonx.md +++ b/docs/en/integrations/ibm-watsonx.md @@ -1,18 +1,18 @@ --- comments: true -description: Dive into our detailed integration guide on using IBM Watson to train a YOLOv8 model. Uncover key features and step-by-step instructions on model training. -keywords: IBM Watsonx, IBM Watsonx AI, What is Watson?, IBM Watson Integration, IBM Watson Features, YOLOv8, Ultralytics, Model Training, GPU, TPU, cloud computing +description: Dive into our detailed integration guide on using IBM Watson to train a YOLO11 model. Uncover key features and step-by-step instructions on model training. +keywords: IBM Watsonx, IBM Watsonx AI, What is Watson?, IBM Watson Integration, IBM Watson Features, YOLO11, Ultralytics, Model Training, GPU, TPU, cloud computing --- -# A Step-by-Step Guide to Training YOLOv8 Models with IBM Watsonx +# A Step-by-Step Guide to Training YOLO11 Models with IBM Watsonx Nowadays, scalable [computer vision solutions](../guides/steps-of-a-cv-project.md) are becoming more common and transforming the way we handle visual data. A great example is IBM Watsonx, an advanced AI and data platform that simplifies the development, deployment, and management of AI models. It offers a complete suite for the entire AI lifecycle and seamless integration with IBM Cloud services. -You can train [Ultralytics YOLOv8 models](https://github.com/ultralytics/ultralytics) using IBM Watsonx. It's a good option for enterprises interested in efficient [model training](../modes/train.md), fine-tuning for specific tasks, and improving [model performance](../guides/model-evaluation-insights.md) with robust tools and a user-friendly setup. In this guide, we'll walk you through the process of training YOLOv8 with IBM Watsonx, covering everything from setting up your environment to evaluating your trained models. Let's get started! +You can train [Ultralytics YOLO11 models](https://github.com/ultralytics/ultralytics) using IBM Watsonx. It's a good option for enterprises interested in efficient [model training](../modes/train.md), fine-tuning for specific tasks, and improving [model performance](../guides/model-evaluation-insights.md) with robust tools and a user-friendly setup. In this guide, we'll walk you through the process of training YOLO11 with IBM Watsonx, covering everything from setting up your environment to evaluating your trained models. Let's get started! ## What is IBM Watsonx? -[Watsonx](https://www.ibm.com/watsonx) is IBM's cloud-based platform designed for commercial [generative AI](https://www.ultralytics.com/glossary/generative-ai) and scientific data. IBM Watsonx's three components - watsonx.ai, watsonx.data, and watsonx.governance - come together to create an end-to-end, trustworthy AI platform that can accelerate AI projects aimed at solving business problems. It provides powerful tools for building, training, and [deploying machine learning models](../guides/model-deployment-options.md) and makes it easy to connect with various data sources. +[Watsonx](https://www.ibm.com/watsonx) is IBM's cloud-based platform designed for commercial [generative AI](https://www.ultralytics.com/glossary/generative-ai) and scientific data. IBM Watsonx's three components - `watsonx.ai`, `watsonx.data`, and `watsonx.governance` - come together to create an end-to-end, trustworthy AI platform that can accelerate AI projects aimed at solving business problems. It provides powerful tools for building, training, and [deploying machine learning models](../guides/model-deployment-options.md) and makes it easy to connect with various data sources.

Overview of IBM Watsonx @@ -22,7 +22,7 @@ Its user-friendly interface and collaborative capabilities streamline the develo ## Key Features of IBM Watsonx -IBM Watsonx is made of three main components: watsonx.ai, watsonx.data, and watsonx.governance. Each component offers features that cater to different aspects of AI and data management. Let's take a closer look at them. +IBM Watsonx is made of three main components: `watsonx.ai`, `watsonx.data`, and `watsonx.governance`. Each component offers features that cater to different aspects of AI and data management. Let's take a closer look at them. ### [Watsonx.ai](https://www.ibm.com/products/watsonx-ai) @@ -36,9 +36,9 @@ Watsonx.data supports both cloud and on-premises deployments through the IBM Sto Watsonx.governance makes compliance easier by automatically identifying regulatory changes and enforcing policies. It links requirements to internal risk data and provides up-to-date AI factsheets. The platform helps manage risk with alerts and tools to detect issues such as [bias and drift](../guides/model-monitoring-and-maintenance.md). It also automates the monitoring and documentation of the AI lifecycle, organizes AI development with a model inventory, and enhances collaboration with user-friendly dashboards and reporting tools. -## How to Train YOLOv8 Using IBM Watsonx +## How to Train YOLO11 Using IBM Watsonx -You can use IBM Watsonx to accelerate your YOLOv8 model training workflow. +You can use IBM Watsonx to accelerate your YOLO11 model training workflow. ### Prerequisites @@ -67,7 +67,7 @@ Next, you can install and import the necessary Python libraries. pip install ultralytics==8.0.196 ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. Then, you can import the needed packages. @@ -86,7 +86,7 @@ Then, you can import the needed packages. ### Step 3: Load the Data -For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLOv8 model to detect and classify litter and biological objects in underwater images. +For this tutorial, we will use a [marine litter dataset](https://www.kaggle.com/datasets/atiqishrak/trash-dataset-icra19) available on Kaggle. With this dataset, we will custom-train a YOLO11 model to detect and classify litter and biological objects in underwater images. We can load the dataset directly into the notebook using the Kaggle API. First, create a free Kaggle account. Once you have created an account, you'll need to generate an API key. Directions for generating your key can be found in the [Kaggle API documentation](https://github.com/Kaggle/kaggle-api/blob/main/docs/README.md) under the section "API credentials". @@ -133,7 +133,7 @@ After loading the dataset, we printed and saved our working directory. We have a If you see "trash_ICRA19" among the directory's contents, then it has loaded successfully. You should see three files/folders: a `config.yaml` file, a `videos_for_testing` directory, and a `dataset` directory. We will ignore the `videos_for_testing` directory, so feel free to delete it. -We will use the config.yaml file and the contents of the dataset directory to train our [object detection](https://www.ultralytics.com/glossary/object-detection) model. Here is a sample image from our marine litter data set. +We will use the `config.yaml` file and the contents of the dataset directory to train our [object detection](https://www.ultralytics.com/glossary/object-detection) model. Here is a sample image from our marine litter data set.

Marine Litter with Bounding Box @@ -205,14 +205,14 @@ names: 2: rov ``` -Run the following script to delete the current contents of config.yaml and replace it with the above contents that reflect our new data set directory structure. Be certain to replace the work_dir portion of the root directory path in line 4 with your own working directory path we retrieved earlier. Leave the train, val, and test subdirectory definitions. Also, do not change {work_dir} in line 23 of the code. +Run the following script to delete the current contents of `config.yaml` and replace it with the above contents that reflect our new data set directory structure. Be certain to replace the work_dir portion of the root directory path in line 4 with your own working directory path we retrieved earlier. Leave the train, val, and test subdirectory definitions. Also, do not change {work_dir} in line 23 of the code. !!! example "Edit the .yaml File" === "Python" ```python - # Contents of new confg.yaml file + # Contents of new config.yaml file def update_yaml_file(file_path): data = { "path": "work_dir/trash_ICRA19/dataset", @@ -236,34 +236,34 @@ Run the following script to delete the current contents of config.yaml and repla print(f"{file_path} updated successfully.") ``` -### Step 5: Train the YOLOv8 model +### Step 5: Train the YOLO11 model -Run the following command-line code to fine tune a pretrained default YOLOv8 model. +Run the following command-line code to fine tune a pretrained default YOLO11 model. -!!! example "Train the YOLOv8 model" +!!! example "Train the YOLO11 model" === "CLI" ```bash - !yolo task=detect mode=train data={work_dir}/trash_ICRA19/config.yaml model=yolov8s.pt epochs=2 batch=32 lr0=.04 plots=True + !yolo task=detect mode=train data={work_dir}/trash_ICRA19/config.yaml model=yolo11n.pt epochs=2 batch=32 lr0=.04 plots=True ``` Here's a closer look at the parameters in the model training command: - **task**: It specifies the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) task for which you are using the specified YOLO model and data set. - **mode**: Denotes the purpose for which you are loading the specified model and data. Since we are training a model, it is set to "train." Later, when we test our model's performance, we will set it to "predict." -- **epochs**: This delimits the number of times YOLOv8 will pass through our entire data set. +- **epochs**: This delimits the number of times YOLO11 will pass through our entire data set. - **batch**: The numerical value stipulates the training [batch sizes](https://www.ultralytics.com/glossary/batch-size). Batches are the number of images a model processes before it updates its parameters. - **lr0**: Specifies the model's initial [learning rate](https://www.ultralytics.com/glossary/learning-rate). - **plots**: Directs YOLO to generate and save plots of our model's training and evaluation metrics. -For a detailed understanding of the model training process and best practices, refer to the [YOLOv8 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLOv8 effectively. +For a detailed understanding of the model training process and best practices, refer to the [YOLO11 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLO11 effectively. ### Step 6: Test the Model We can now run inference to test the performance of our fine-tuned model: -!!! example "Test the YOLOv8 model" +!!! example "Test the YOLO11 model" === "CLI" @@ -312,11 +312,11 @@ Unlike precision, recall moves in the opposite direction, showing greater recall ### Step 8: Calculating [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) -You can measure the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) by calculating the IoU between a predicted bounding box and a ground truth bounding box for the same object. Check out [IBM's tutorial on training YOLOv8](https://developer.ibm.com/tutorials/awb-train-yolo-object-detection-model-in-python/) for more details. +You can measure the prediction [accuracy](https://www.ultralytics.com/glossary/accuracy) by calculating the IoU between a predicted bounding box and a ground truth bounding box for the same object. Check out [IBM's tutorial on training YOLO11](https://developer.ibm.com/tutorials/awb-train-yolo-object-detection-model-in-python/) for more details. ## Summary -We explored IBM Watsonx key features, and how to train a YOLOv8 model using IBM Watsonx. We also saw how IBM Watsonx can enhance your AI workflows with advanced tools for model building, data management, and compliance. +We explored IBM Watsonx key features, and how to train a YOLO11 model using IBM Watsonx. We also saw how IBM Watsonx can enhance your AI workflows with advanced tools for model building, data management, and compliance. For further details on usage, visit [IBM Watsonx official documentation](https://www.ibm.com/watsonx). @@ -324,9 +324,9 @@ Also, be sure to check out the [Ultralytics integration guide page](./index.md), ## FAQ -### How do I train a YOLOv8 model using IBM Watsonx? +### How do I train a YOLO11 model using IBM Watsonx? -To train a YOLOv8 model using IBM Watsonx, follow these steps: +To train a YOLO11 model using IBM Watsonx, follow these steps: 1. **Set Up Your Environment**: Create an IBM Cloud account and set up a Watsonx.ai project. Use a Jupyter Notebook for your coding environment. 2. **Install Libraries**: Install necessary libraries like `torch`, `opencv`, and `ultralytics`. @@ -335,7 +335,7 @@ To train a YOLOv8 model using IBM Watsonx, follow these steps: 5. **Train the Model**: Use the YOLO command-line interface to train your model with specific parameters like `epochs`, `batch size`, and `learning rate`. 6. **Test and Evaluate**: Run inference to test the model and evaluate its performance using metrics like precision and recall. -For detailed instructions, refer to our [YOLOv8 Model Training guide](../modes/train.md). +For detailed instructions, refer to our [YOLO11 Model Training guide](../modes/train.md). ### What are the key features of IBM Watsonx for AI model training? @@ -347,20 +347,20 @@ IBM Watsonx offers several key features for AI model training: For more information, visit the [IBM Watsonx official documentation](https://www.ibm.com/watsonx). -### Why should I use IBM Watsonx for training Ultralytics YOLOv8 models? +### Why should I use IBM Watsonx for training Ultralytics YOLO11 models? -IBM Watsonx is an excellent choice for training Ultralytics YOLOv8 models due to its comprehensive suite of tools that streamline the AI lifecycle. Key benefits include: +IBM Watsonx is an excellent choice for training Ultralytics YOLO11 models due to its comprehensive suite of tools that streamline the AI lifecycle. Key benefits include: - **Scalability**: Easily scale your model training with IBM Cloud services. - **Integration**: Seamlessly integrate with various data sources and APIs. - **User-Friendly Interface**: Simplifies the development process with a collaborative and intuitive interface. - **Advanced Tools**: Access to powerful tools like the Prompt Lab, Tuning Studio, and Flows Engine for enhancing model performance. -Learn more about [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) and how to train models using IBM Watsonx in our [integration guide](./index.md). +Learn more about [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) and how to train models using IBM Watsonx in our [integration guide](./index.md). -### How can I preprocess my dataset for YOLOv8 training on IBM Watsonx? +### How can I preprocess my dataset for YOLO11 training on IBM Watsonx? -To preprocess your dataset for YOLOv8 training on IBM Watsonx: +To preprocess your dataset for YOLO11 training on IBM Watsonx: 1. **Organize Directories**: Ensure your dataset follows the YOLO directory structure with separate subdirectories for images and labels within the train/val/test split. 2. **Update .yaml File**: Modify the `.yaml` configuration file to reflect the new directory structure and class names. @@ -399,9 +399,9 @@ if __name__ == "__main__": For more details, refer to our [data preprocessing guide](../guides/preprocessing_annotated_data.md). -### What are the prerequisites for training a YOLOv8 model on IBM Watsonx? +### What are the prerequisites for training a YOLO11 model on IBM Watsonx? -Before you start training a YOLOv8 model on IBM Watsonx, ensure you have the following prerequisites: +Before you start training a YOLO11 model on IBM Watsonx, ensure you have the following prerequisites: - **IBM Cloud Account**: Create an account on IBM Cloud to access Watsonx.ai. - **Kaggle Account**: For loading datasets, you'll need a Kaggle account and an API key. diff --git a/docs/en/integrations/index.md b/docs/en/integrations/index.md index 391b1ecb81a..4b91b18f2ec 100644 --- a/docs/en/integrations/index.md +++ b/docs/en/integrations/index.md @@ -18,7 +18,7 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of allowfullscreen>
- Watch: Ultralytics YOLOv8 Deployment and Integrations + Watch: Ultralytics YOLO11 Deployment and Integrations

## Datasets Integrations @@ -27,67 +27,77 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of ## Training Integrations +- [Amazon SageMaker](amazon-sagemaker.md): Leverage Amazon SageMaker to efficiently build, train, and deploy Ultralytics models, providing an all-in-one platform for the ML lifecycle. + - [ClearML](clearml.md): Automate your Ultralytics ML workflows, monitor experiments, and foster team collaboration. - [Comet ML](comet.md): Enhance your model development with Ultralytics by tracking, comparing, and optimizing your machine learning experiments. - [DVC](dvc.md): Implement version control for your Ultralytics machine learning projects, synchronizing data, code, and models effectively. -- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment. +- [Google Colab](google-colab.md): Use Google Colab to train and evaluate Ultralytics models in a cloud-based environment that supports collaboration and sharing. -- [Ultralytics HUB](https://hub.ultralytics.com/): Access and contribute to a community of pre-trained Ultralytics models. +- [IBM Watsonx](ibm-watsonx.md): See how IBM Watsonx simplifies the training and evaluation of Ultralytics models with its cutting-edge AI tools, effortless integration, and advanced model management system. + +- [JupyterLab](jupyterlab.md): Find out how to use JupyterLab's interactive and customizable environment to train and evaluate Ultralytics models with ease and efficiency. + +- [Kaggle](kaggle.md): Explore how you can use Kaggle to train and evaluate Ultralytics models in a cloud-based environment with pre-installed libraries, GPU support, and a vibrant community for collaboration and sharing. + +- [MLFlow](mlflow.md): Streamline the entire ML lifecycle of Ultralytics models, from experimentation and reproducibility to deployment. - [Neptune](https://neptune.ai/): Maintain a comprehensive log of your ML experiments with Ultralytics in this metadata store designed for MLOps. +- [Paperspace Gradient](paperspace.md): Paperspace Gradient simplifies working on YOLO11 projects by providing easy-to-use cloud tools for training, testing, and deploying your models quickly. + - [Ray Tune](ray-tune.md): Optimize the hyperparameters of your Ultralytics models at any scale. - [TensorBoard](tensorboard.md): Visualize your Ultralytics ML workflows, monitor model metrics, and foster team collaboration. +- [Ultralytics HUB](https://hub.ultralytics.com/): Access and contribute to a community of pre-trained Ultralytics models. + - [Weights & Biases (W&B)](weights-biases.md): Monitor experiments, visualize metrics, and foster reproducibility and collaboration on Ultralytics projects. -- [Amazon SageMaker](amazon-sagemaker.md): Leverage Amazon SageMaker to efficiently build, train, and deploy Ultralytics models, providing an all-in-one platform for the ML lifecycle. +- [VS Code](vscode.md): An extension for VS Code that provides code snippets for accelerating development workflows with Ultralytics and also for anyone looking for examples to help learn or get started with Ultralytics. -- [Paperspace Gradient](paperspace.md): Paperspace Gradient simplifies working on YOLOv8 projects by providing easy-to-use cloud tools for training, testing, and deploying your models quickly. +- [Albumentations](albumentations.md): Enhance your Ultralytics models with powerful image augmentations to improve model robustness and generalization. -- [Google Colab](google-colab.md): Use Google Colab to train and evaluate Ultralytics models in a cloud-based environment that supports collaboration and sharing. +## Deployment Integrations -- [Kaggle](kaggle.md): Explore how you can use Kaggle to train and evaluate Ultralytics models in a cloud-based environment with pre-installed libraries, GPU support, and a vibrant community for collaboration and sharing. +- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment). -- [JupyterLab](jupyterlab.md): Find out how to use JupyterLab's interactive and customizable environment to train and evaluate Ultralytics models with ease and efficiency. +- [Gradio](gradio.md) ๐Ÿš€ NEW: Deploy Ultralytics models with Gradio for real-time, interactive object detection demos. -- [IBM Watsonx](ibm-watsonx.md): See how IBM Watsonx simplifies the training and evaluation of Ultralytics models with its cutting-edge AI tools, effortless integration, and advanced model management system. +- [NCNN](ncnn.md): Developed by [Tencent](http://www.tencent.com/), NCNN is an efficient [neural network](https://www.ultralytics.com/glossary/neural-network-nn) inference framework tailored for mobile devices. It enables direct deployment of AI models into apps, optimizing performance across various mobile platforms. -## Deployment Integrations +- [MNN](mnn.md): Developed by [Alibaba](https://www.alibabagroup.com/), MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. - [Neural Magic](neural-magic.md): Leverage Quantization Aware Training (QAT) and pruning techniques to optimize Ultralytics models for superior performance and leaner size. -- [Gradio](gradio.md) ๐Ÿš€ NEW: Deploy Ultralytics models with Gradio for real-time, interactive object detection demos. - -- [TorchScript](torchscript.md): Developed as part of the [PyTorch](https://pytorch.org/) framework, TorchScript enables efficient execution and deployment of machine learning models in various production environments without the need for Python dependencies. - - [ONNX](onnx.md): An open-source format created by [Microsoft](https://www.microsoft.com/) for facilitating the transfer of AI models between various frameworks, enhancing the versatility and deployment flexibility of Ultralytics models. - [OpenVINO](openvino.md): Intel's toolkit for optimizing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models efficiently across various Intel CPU and GPU platforms. -- [TensorRT](tensorrt.md): Developed by [NVIDIA](https://www.nvidia.com/), this high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference framework and model format optimizes AI models for accelerated speed and efficiency on NVIDIA GPUs, ensuring streamlined deployment. +- [PaddlePaddle](paddlepaddle.md): An open-source deep learning platform by [Baidu](https://www.baidu.com/), PaddlePaddle enables the efficient deployment of AI models and focuses on the scalability of industrial applications. -- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment). +- [TF GraphDef](tf-graphdef.md): Developed by [Google](https://www.google.com/), GraphDef is TensorFlow's format for representing computation graphs, enabling optimized execution of machine learning models across diverse hardware. - [TF SavedModel](tf-savedmodel.md): Developed by [Google](https://www.google.com/), TF SavedModel is a universal serialization format for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) models, enabling easy sharing and deployment across a wide range of platforms, from servers to edge devices. -- [TF GraphDef](tf-graphdef.md): Developed by [Google](https://www.google.com/), GraphDef is TensorFlow's format for representing computation graphs, enabling optimized execution of machine learning models across diverse hardware. +- [TF.js](tfjs.md): Developed by [Google](https://www.google.com/) to facilitate machine learning in browsers and Node.js, TF.js allows JavaScript-based deployment of ML models. - [TFLite](tflite.md): Developed by [Google](https://www.google.com/), TFLite is a lightweight framework for deploying machine learning models on mobile and edge devices, ensuring fast, efficient inference with minimal memory footprint. - [TFLite Edge TPU](edge-tpu.md): Developed by [Google](https://www.google.com/) for optimizing TensorFlow Lite models on Edge TPUs, this model format ensures high-speed, efficient [edge computing](https://www.ultralytics.com/glossary/edge-computing). -- [TF.js](tfjs.md): Developed by [Google](https://www.google.com/) to facilitate machine learning in browsers and Node.js, TF.js allows JavaScript-based deployment of ML models. +- [TensorRT](tensorrt.md): Developed by [NVIDIA](https://www.nvidia.com/), this high-performance [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) inference framework and model format optimizes AI models for accelerated speed and efficiency on NVIDIA GPUs, ensuring streamlined deployment. -- [PaddlePaddle](paddlepaddle.md): An open-source deep learning platform by [Baidu](https://www.baidu.com/), PaddlePaddle enables the efficient deployment of AI models and focuses on the scalability of industrial applications. +- [TorchScript](torchscript.md): Developed as part of the [PyTorch](https://pytorch.org/) framework, TorchScript enables efficient execution and deployment of machine learning models in various production environments without the need for Python dependencies. -- [NCNN](ncnn.md): Developed by [Tencent](http://www.tencent.com/), NCNN is an efficient [neural network](https://www.ultralytics.com/glossary/neural-network-nn) inference framework tailored for mobile devices. It enables direct deployment of AI models into apps, optimizing performance across various mobile platforms. +- [SONY IMX500](sony-imx500.md): Optimize and deploy [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) models on Raspberry Pi AI Cameras with the IMX500 sensor for fast, low-power performance. -- [VS Code](vscode.md): An extension for VS Code that provides code snippets for accelerating development workflows with Ultralytics and also for anyone looking for examples to help learn or get started with Ultralytics. +- [Rockchip RKNN](rockchip-rknn.md): Developed by [Rockchip](https://www.rock-chips.com/), RKNN is a specialized neural network inference framework optimized for Rockchip's hardware platforms, particularly their NPUs. It facilitates efficient deployment of AI models on edge devices, enabling high-performance inference in real-time applications. + +- [Seeed Studio reCamera](seeedstudio-recamera.md): Developed by [Seeed Studio](https://www.seeedstudio.com/), the reCamera is a cutting-edge edge AI device designed for real-time computer vision applications. Powered by the RISC-V-based SG200X processor, it delivers high-performance AI inference with energy efficiency. Its modular design, advanced video processing capabilities, and support for flexible deployment make it an ideal choice for various use cases, including safety monitoring, environmental applications, and manufacturing. ### Export Formats @@ -111,7 +121,7 @@ Let's collaborate to make the Ultralytics YOLO ecosystem more expansive and feat ### What is Ultralytics HUB, and how does it streamline the ML workflow? -Ultralytics HUB is a cloud-based platform designed to make machine learning (ML) workflows for Ultralytics models seamless and efficient. By using this tool, you can easily upload datasets, train models, perform real-time tracking, and deploy YOLOv8 models without needing extensive coding skills. You can explore the key features on the [Ultralytics HUB](https://hub.ultralytics.com/) page and get started quickly with our [Quickstart](https://docs.ultralytics.com/hub/quickstart/) guide. +Ultralytics HUB is a cloud-based platform designed to make machine learning (ML) workflows for Ultralytics models seamless and efficient. By using this tool, you can easily upload datasets, train models, perform real-time tracking, and deploy YOLO11 models without needing extensive coding skills. You can explore the key features on the [Ultralytics HUB](https://hub.ultralytics.com/) page and get started quickly with our [Quickstart](https://docs.ultralytics.com/hub/quickstart/) guide. ### How do I integrate Ultralytics YOLO models with Roboflow for dataset management? @@ -121,9 +131,9 @@ Integrating Ultralytics YOLO models with Roboflow enhances dataset management by Yes, you can. Integrating MLFlow with Ultralytics models allows you to track experiments, improve reproducibility, and streamline the entire ML lifecycle. Detailed instructions for setting up this integration can be found on the [MLFlow](mlflow.md) integration page. This integration is particularly useful for monitoring model metrics and managing the ML workflow efficiently. -### What are the benefits of using Neural Magic for YOLOv8 model optimization? +### What are the benefits of using Neural Magic for YOLO11 model optimization? -Neural Magic optimizes YOLOv8 models by leveraging techniques like Quantization Aware Training (QAT) and pruning, resulting in highly efficient, smaller models that perform better on resource-limited hardware. Check out the [Neural Magic](neural-magic.md) integration page to learn how to implement these optimizations for superior performance and leaner models. This is especially beneficial for deployment on edge devices. +Neural Magic optimizes YOLO11 models by leveraging techniques like Quantization Aware Training (QAT) and pruning, resulting in highly efficient, smaller models that perform better on resource-limited hardware. Check out the [Neural Magic](neural-magic.md) integration page to learn how to implement these optimizations for superior performance and leaner models. This is especially beneficial for deployment on edge devices. ### How do I deploy Ultralytics YOLO models with Gradio for interactive demos? diff --git a/docs/en/integrations/jupyterlab.md b/docs/en/integrations/jupyterlab.md index b3179918b1f..668940ffab5 100644 --- a/docs/en/integrations/jupyterlab.md +++ b/docs/en/integrations/jupyterlab.md @@ -1,14 +1,14 @@ --- comments: true -description: Explore our integration guide that explains how you can use JupyterLab to train a YOLOv8 model. We'll also cover key features and tips for common issues. -keywords: JupyterLab, What is JupyterLab, How to Use JupyterLab, JupyterLab How to Use, YOLOv8, Ultralytics, Model Training, GPU, TPU, cloud computing +description: Explore our integration guide that explains how you can use JupyterLab to train a YOLO11 model. We'll also cover key features and tips for common issues. +keywords: JupyterLab, What is JupyterLab, How to Use JupyterLab, JupyterLab How to Use, YOLO11, Ultralytics, Model Training, GPU, TPU, cloud computing --- -# A Guide on How to Use JupyterLab to Train Your YOLOv8 Models +# A Guide on How to Use JupyterLab to Train Your YOLO11 Models Building [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models can be tough, especially when you don't have the right tools or environment to work with. If you are facing this issue, JupyterLab might be the right solution for you. JupyterLab is a user-friendly, web-based platform that makes coding more flexible and interactive. You can use it to handle big datasets, create complex models, and even collaborate with others, all in one place. -You can use JupyterLab to [work on projects](../guides/steps-of-a-cv-project.md) related to [Ultralytics YOLOv8 models](https://github.com/ultralytics/ultralytics). JupyterLab is a great option for efficient model development and experimentation. It makes it easy to start experimenting with and [training YOLOv8 models](../modes/train.md) right from your computer. Let's dive deeper into JupyterLab, its key features, and how you can use it to train YOLOv8 models. +You can use JupyterLab to [work on projects](../guides/steps-of-a-cv-project.md) related to [Ultralytics YOLO11 models](https://github.com/ultralytics/ultralytics). JupyterLab is a great option for efficient model development and experimentation. It makes it easy to start experimenting with and [training YOLO11 models](../modes/train.md) right from your computer. Let's dive deeper into JupyterLab, its key features, and how you can use it to train YOLO11 models. ## What is JupyterLab? @@ -26,7 +26,7 @@ Here are some of the key features that make JupyterLab a great option for model - **Markdown Preview**: Working with Markdown files is more efficient in JupyterLab, thanks to its simultaneous preview feature. As you write or edit your Markdown file, you can see the formatted output in real-time. It makes it easier to double-check that your documentation looks perfect, saving you from having to switch back and forth between editing and preview modes. - **Run Code from Text Files**: If you're sharing a text file with code, JupyterLab makes it easy to run it directly within the platform. You can highlight the code and press Shift + Enter to execute it. It is great for verifying code snippets quickly and helps guarantee that the code you share is functional and error-free. -## Why Should You Use JupyterLab for Your YOLOv8 Projects? +## Why Should You Use JupyterLab for Your YOLO11 Projects? There are multiple platforms for developing and evaluating machine learning models, so what makes JupyterLab stand out? Let's explore some of the unique aspects that JupyterLab offers for your machine-learning projects: @@ -46,9 +46,9 @@ When working with Kaggle, you might come across some common issues. Here are som - **Installing JupyterLab Extensions**: JupyterLab supports various extensions to enhance functionality. You can install and customize these extensions to suit your needs. For detailed instructions, refer to [JupyterLab Extensions Guide](https://jupyterlab.readthedocs.io/en/latest/user/extensions.html) for more information. - **Using Multiple Versions of Python**: If you need to work with different versions of Python, you can use Jupyter kernels configured with different Python versions. -## How to Use JupyterLab to Try Out YOLOv8 +## How to Use JupyterLab to Try Out YOLO11 -JupyterLab makes it easy to experiment with YOLOv8. To get started, follow these simple steps. +JupyterLab makes it easy to experiment with YOLO11. To get started, follow these simple steps. ### Step 1: Install JupyterLab @@ -63,7 +63,7 @@ First, you need to install JupyterLab. Open your terminal and run the command: pip install jupyterlab ``` -### Step 2: Download the YOLOv8 Tutorial Notebook +### Step 2: Download the YOLO11 Tutorial Notebook Next, download the [tutorial.ipynb](https://github.com/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb) file from the Ultralytics GitHub repository. Save this file to any directory on your local machine. @@ -85,13 +85,13 @@ Once you've run this command, it will open JupyterLab in your default web browse ### Step 4: Start Experimenting -In JupyterLab, open the tutorial.ipynb notebook. You can now start running the cells to explore and experiment with YOLOv8. +In JupyterLab, open the tutorial.ipynb notebook. You can now start running the cells to explore and experiment with YOLO11. -![Image Showing Opened YOLOv8 Notebook in JupyterLab](https://github.com/ultralytics/docs/releases/download/0/opened-yolov8-notebook-jupyterlab.avif) +![Image Showing Opened YOLO11 Notebook in JupyterLab](https://github.com/ultralytics/docs/releases/download/0/opened-yolov8-notebook-jupyterlab.avif) -JupyterLab's interactive environment allows you to modify code, visualize outputs, and document your findings all in one place. You can try out different configurations and understand how YOLOv8 works. +JupyterLab's interactive environment allows you to modify code, visualize outputs, and document your findings all in one place. You can try out different configurations and understand how YOLO11 works. -For a detailed understanding of the model training process and best practices, refer to the [YOLOv8 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLOv8 effectively. +For a detailed understanding of the model training process and best practices, refer to the [YOLO11 Model Training guide](../modes/train.md). This guide will help you get the most out of your experiments and ensure you're using YOLO11 effectively. ## Keep Learning about Jupyterlab @@ -103,17 +103,17 @@ If you're excited to learn more about JupyterLab, here are some great resources ## Summary -We've explored how JupyterLab can be a powerful tool for experimenting with Ultralytics YOLOv8 models. Using its flexible and interactive environment, you can easily set up JupyterLab on your local machine and start working with YOLOv8. JupyterLab makes it simple to [train](../guides/model-training-tips.md) and [evaluate](../guides/model-testing.md) your models, visualize outputs, and [document your findings](../guides/model-monitoring-and-maintenance.md) all in one place. +We've explored how JupyterLab can be a powerful tool for experimenting with Ultralytics YOLO11 models. Using its flexible and interactive environment, you can easily set up JupyterLab on your local machine and start working with YOLO11. JupyterLab makes it simple to [train](../guides/model-training-tips.md) and [evaluate](../guides/model-testing.md) your models, visualize outputs, and [document your findings](../guides/model-monitoring-and-maintenance.md) all in one place. For more details, visit the [JupyterLab FAQ Page](https://jupyterlab.readthedocs.io/en/stable/getting_started/faq.html). -Interested in more YOLOv8 integrations? Check out the [Ultralytics integration guide](./index.md) to explore additional tools and capabilities for your machine learning projects. +Interested in more YOLO11 integrations? Check out the [Ultralytics integration guide](./index.md) to explore additional tools and capabilities for your machine learning projects. ## FAQ -### How do I use JupyterLab to train a YOLOv8 model? +### How do I use JupyterLab to train a YOLO11 model? -To train a YOLOv8 model using JupyterLab: +To train a YOLO11 model using JupyterLab: 1. Install JupyterLab and the Ultralytics package: @@ -128,7 +128,7 @@ To train a YOLOv8 model using JupyterLab: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") ``` 4. Train the model on your custom dataset: @@ -147,22 +147,22 @@ To train a YOLOv8 model using JupyterLab: JupyterLab's interactive environment allows you to easily modify parameters, visualize results, and iterate on your model training process. -### What are the key features of JupyterLab that make it suitable for YOLOv8 projects? +### What are the key features of JupyterLab that make it suitable for YOLO11 projects? -JupyterLab offers several features that make it ideal for YOLOv8 projects: +JupyterLab offers several features that make it ideal for YOLO11 projects: -1. Interactive code execution: Test and debug YOLOv8 code snippets in real-time. +1. Interactive code execution: Test and debug YOLO11 code snippets in real-time. 2. Integrated file browser: Easily manage datasets, model weights, and configuration files. 3. Flexible layout: Arrange multiple notebooks, terminals, and output windows side-by-side for efficient workflow. -4. Rich output display: Visualize YOLOv8 detection results, training curves, and model performance metrics inline. -5. Markdown support: Document your YOLOv8 experiments and findings with rich text and images. +4. Rich output display: Visualize YOLO11 detection results, training curves, and model performance metrics inline. +5. Markdown support: Document your YOLO11 experiments and findings with rich text and images. 6. Extension ecosystem: Enhance functionality with extensions for version control, [remote computing](google-colab.md), and more. -These features allow for a seamless development experience when working with YOLOv8 models, from data preparation to [model deployment](https://www.ultralytics.com/glossary/model-deployment). +These features allow for a seamless development experience when working with YOLO11 models, from data preparation to [model deployment](https://www.ultralytics.com/glossary/model-deployment). -### How can I optimize YOLOv8 model performance using JupyterLab? +### How can I optimize YOLO11 model performance using JupyterLab? -To optimize YOLOv8 model performance in JupyterLab: +To optimize YOLO11 model performance in JupyterLab: 1. Use the autobatch feature to determine the optimal batch size: @@ -190,11 +190,11 @@ To optimize YOLOv8 model performance in JupyterLab: 4. Experiment with different model architectures and [export formats](../modes/export.md) to find the best balance of speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for your specific use case. -JupyterLab's interactive environment allows for quick iterations and real-time feedback, making it easier to optimize your YOLOv8 models efficiently. +JupyterLab's interactive environment allows for quick iterations and real-time feedback, making it easier to optimize your YOLO11 models efficiently. -### How do I handle common issues when working with JupyterLab and YOLOv8? +### How do I handle common issues when working with JupyterLab and YOLO11? -When working with JupyterLab and YOLOv8, you might encounter some common issues. Here's how to handle them: +When working with JupyterLab and YOLO11, you might encounter some common issues. Here's how to handle them: 1. GPU memory issues: @@ -203,7 +203,7 @@ When working with JupyterLab and YOLOv8, you might encounter some common issues. 2. Package conflicts: - - Create a separate conda environment for your YOLOv8 projects to avoid conflicts. + - Create a separate conda environment for your YOLO11 projects to avoid conflicts. - Use `!pip install package_name` in a notebook cell to install missing packages. 3. Kernel crashes: diff --git a/docs/en/integrations/kaggle.md b/docs/en/integrations/kaggle.md index 66929d109d8..40c928fa06f 100644 --- a/docs/en/integrations/kaggle.md +++ b/docs/en/integrations/kaggle.md @@ -1,14 +1,14 @@ --- comments: true -description: Dive into our guide on YOLOv8's integration with Kaggle. Find out what Kaggle is, its key features, and how to train a YOLOv8 model using the integration. -keywords: What is Kaggle, What is Kaggle Used For, YOLOv8, Kaggle Machine Learning, Model Training, GPU, TPU, cloud computing +description: Dive into our guide on YOLO11's integration with Kaggle. Find out what Kaggle is, its key features, and how to train a YOLO11 model using the integration. +keywords: What is Kaggle, What is Kaggle Used For, YOLO11, Kaggle Machine Learning, Model Training, GPU, TPU, cloud computing --- -# A Guide on Using Kaggle to Train Your YOLOv8 Models +# A Guide on Using Kaggle to Train Your YOLO11 Models If you are learning about AI and working on [small projects](../solutions/index.md), you might not have access to powerful computing resources yet, and high-end hardware can be pretty expensive. Fortunately, Kaggle, a platform owned by Google, offers a great solution. Kaggle provides a free, cloud-based environment where you can access GPU resources, handle large datasets, and collaborate with a diverse community of data scientists and [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) enthusiasts. -Kaggle is a great choice for [training](../guides/model-training-tips.md) and experimenting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics?tab=readme-ov-file) models. Kaggle Notebooks make using popular machine-learning libraries and frameworks in your projects easy. Let's explore Kaggle's main features and learn how you can train YOLOv8 models on this platform! +Kaggle is a great choice for [training](../guides/model-training-tips.md) and experimenting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics?tab=readme-ov-file) models. Kaggle Notebooks make using popular machine-learning libraries and frameworks in your projects easy. Let's explore Kaggle's main features and learn how you can train YOLO11 models on this platform! ## What is Kaggle? @@ -16,21 +16,21 @@ Kaggle is a platform that brings together data scientists from around the world With more than [10 million users](https://www.kaggle.com/discussions/general/332147) as of 2022, Kaggle provides a rich environment for developing and experimenting with machine learning models. You don't need to worry about your local machine's specs or setup; you can dive right in with just a Kaggle account and a web browser. -## Training YOLOv8 Using Kaggle +## Training YOLO11 Using Kaggle -Training YOLOv8 models on Kaggle is simple and efficient, thanks to the platform's access to powerful GPUs. +Training YOLO11 models on Kaggle is simple and efficient, thanks to the platform's access to powerful GPUs. -To get started, access the [Kaggle YOLOv8 Notebook](https://www.kaggle.com/code/ultralytics/yolov8). Kaggle's environment comes with pre-installed libraries like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making the setup process hassle-free. +To get started, access the [Kaggle YOLO11 Notebook](https://www.kaggle.com/code/glennjocherultralytics/yolo11). Kaggle's environment comes with pre-installed libraries like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making the setup process hassle-free. -![What is the kaggle integration with respect to YOLOv8?](https://github.com/ultralytics/docs/releases/download/0/kaggle-integration-yolov8.avif) +![What is the kaggle integration with respect to YOLO11?](https://github.com/ultralytics/docs/releases/download/0/kaggle-integration-yolov8.avif) -Once you sign in to your Kaggle account, you can click on the option to copy and edit the code, select a GPU under the accelerator settings, and run the notebook's cells to begin training your model. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). +Once you sign in to your Kaggle account, you can click on the option to copy and edit the code, select a GPU under the accelerator settings, and run the notebook's cells to begin training your model. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md). ![Using kaggle for machine learning model training with a GPU](https://github.com/ultralytics/docs/releases/download/0/using-kaggle-for-machine-learning-model-training-with-a-gpu.avif) -On the [official YOLOv8 Kaggle notebook page](https://www.kaggle.com/code/ultralytics/yolov8), if you click on the three dots in the upper right-hand corner, you'll notice more options will pop up. +On the [official YOLO11 Kaggle notebook page](https://www.kaggle.com/code/glennjocherultralytics/yolo11), if you click on the three dots in the upper right-hand corner, you'll notice more options will pop up. -![Overview of Options From the Official YOLOv8 Kaggle Notebook Page](https://github.com/ultralytics/docs/releases/download/0/overview-options-yolov8-kaggle-notebook.avif) +![Overview of Options From the Official YOLO11 Kaggle Notebook Page](https://github.com/ultralytics/docs/releases/download/0/overview-options-yolov8-kaggle-notebook.avif) These options include: @@ -48,7 +48,7 @@ These options include: When working with Kaggle, you might come across some common issues. Here are some points to help you navigate the platform smoothly: -- **Access to GPUs**: In your Kaggle notebooks, you can activate a GPU at any time, with usage allowed for up to 30 hours per week. Kaggle provides the Nvidia Tesla P100 GPU with 16GB of memory and also offers the option of using a Nvidia GPU T4 x2. Powerful hardware accelerates your machine-learning tasks, making model training and inference much faster. +- **Access to GPUs**: In your Kaggle notebooks, you can activate a GPU at any time, with usage allowed for up to 30 hours per week. Kaggle provides the NVIDIA Tesla P100 GPU with 16GB of memory and also offers the option of using a NVIDIA GPU T4 x2. Powerful hardware accelerates your machine-learning tasks, making model training and inference much faster. - **Kaggle Kernels**: Kaggle Kernels are free Jupyter notebook servers that can integrate GPUs, allowing you to perform machine learning operations on cloud computers. You don't have to rely on your own computer's CPU, avoiding overload and freeing up your local resources. - **Kaggle Datasets**: Kaggle datasets are free to download. However, it's important to check the license for each dataset to understand any usage restrictions. Some datasets may have limitations on academic publications or commercial use. You can download datasets directly to your Kaggle notebook or anywhere else via the Kaggle API. - **Saving and Committing Notebooks**: To save and commit a notebook on Kaggle, click "Save Version." This saves the current state of your notebook. Once the background kernel finishes generating the output files, you can access them from the Output tab on the main notebook page. @@ -59,17 +59,17 @@ When working with Kaggle, you might come across some common issues. Here are som Next, let's understand the features Kaggle offers that make it an excellent platform for data science and machine learning enthusiasts. Here are some of the key highlights: -- **Datasets**: Kaggle hosts a massive collection of datasets on various topics. You can easily search and use these datasets in your projects, which is particularly handy for training and testing your YOLOv8 models. +- **Datasets**: Kaggle hosts a massive collection of datasets on various topics. You can easily search and use these datasets in your projects, which is particularly handy for training and testing your YOLO11 models. - **Competitions**: Known for its exciting competitions, Kaggle allows data scientists and machine learning enthusiasts to solve real-world problems. Competing helps you improve your skills, learn new techniques, and gain recognition in the community. -- **Free Access to TPUs**: Kaggle provides free access to powerful TPUs, which are essential for training complex machine learning models. This means you can speed up processing and boost the performance of your YOLOv8 projects without incurring extra costs. -- **Integration with Github**: Kaggle allows you to easily connect your GitHub repository to upload notebooks and save your work. This integration makes it convenient to manage and access your files. +- **Free Access to TPUs**: Kaggle provides free access to powerful TPUs, which are essential for training complex machine learning models. This means you can speed up processing and boost the performance of your YOLO11 projects without incurring extra costs. +- **Integration with GitHub**: Kaggle allows you to easily connect your GitHub repository to upload notebooks and save your work. This integration makes it convenient to manage and access your files. - **Community and Discussions**: Kaggle boasts a strong community of data scientists and machine learning practitioners. The discussion forums and shared notebooks are fantastic resources for learning and troubleshooting. You can easily find help, share your knowledge, and collaborate with others. -## Why Should You Use Kaggle for Your YOLOv8 Projects? +## Why Should You Use Kaggle for Your YOLO11 Projects? There are multiple platforms for training and evaluating machine learning models, so what makes Kaggle stand out? Let's dive into the benefits of using Kaggle for your machine-learning projects: -- **Public Notebooks**: You can make your Kaggle notebooks public, allowing other users to view, vote, fork, and discuss your work. Kaggle promotes collaboration, feedback, and the sharing of ideas, helping you improve your YOLOv8 models. +- **Public Notebooks**: You can make your Kaggle notebooks public, allowing other users to view, vote, fork, and discuss your work. Kaggle promotes collaboration, feedback, and the sharing of ideas, helping you improve your YOLO11 models. - **Comprehensive History of Notebook Commits**: Kaggle creates a detailed history of your notebook commits. This allows you to review and track changes over time, making it easier to understand the evolution of your project and revert to previous versions if needed. - **Console Access**: Kaggle provides a console, giving you more control over your environment. This feature allows you to perform various tasks directly from the command line, enhancing your workflow and productivity. - **Resource Availability**: Each notebook editing session on Kaggle is provided with significant resources: 12 hours of execution time for CPU and GPU sessions, 9 hours of execution time for TPU sessions, and 20 gigabytes of auto-saved disk space. @@ -81,34 +81,34 @@ If you want to learn more about Kaggle, here are some helpful resources to guide - [**Kaggle Learn**](https://www.kaggle.com/learn): Discover a variety of free, interactive tutorials on Kaggle Learn. These courses cover essential data science topics and provide hands-on experience to help you master new skills. - [**Getting Started with Kaggle**](https://www.kaggle.com/code/alexisbcook/getting-started-with-kaggle): This comprehensive guide walks you through the basics of using Kaggle, from joining competitions to creating your first notebook. It's a great starting point for newcomers. -- [**Kaggle Medium Page**](https://medium.com/@kaggleteam): Explore tutorials, updates, and community contributions on Kaggle's Medium page. It's an excellent source for staying up-to-date with the latest trends and gaining deeper insights into data science. +- [**Kaggle Medium Page**](https://medium.com/@kaggleteam): Explore tutorials, updates, and community contributions to Kaggle's Medium page. It's an excellent source for staying up-to-date with the latest trends and gaining deeper insights into data science. ## Summary -We've seen how Kaggle can boost your YOLOv8 projects by providing free access to powerful GPUs, making model training and evaluation efficient. Kaggle's platform is user-friendly, with pre-installed libraries for quick setup. +We've seen how Kaggle can boost your YOLO11 projects by providing free access to powerful GPUs, making model training and evaluation efficient. Kaggle's platform is user-friendly, with pre-installed libraries for quick setup. For more details, visit [Kaggle's documentation](https://www.kaggle.com/docs). -Interested in more YOLOv8 integrations? Check out the[ Ultralytics integration guide](https://docs.ultralytics.com/integrations/) to explore additional tools and capabilities for your machine learning projects. +Interested in more YOLO11 integrations? Check out the[ Ultralytics integration guide](https://docs.ultralytics.com/integrations/) to explore additional tools and capabilities for your machine learning projects. ## FAQ -### How do I train a YOLOv8 model on Kaggle? +### How do I train a YOLO11 model on Kaggle? -Training a YOLOv8 model on Kaggle is straightforward. First, access the [Kaggle YOLOv8 Notebook](https://www.kaggle.com/ultralytics/yolov8). Sign in to your Kaggle account, copy and edit the notebook, and select a GPU under the accelerator settings. Run the notebook cells to start training. For more detailed steps, refer to our [YOLOv8 Model Training guide](../modes/train.md). +Training a YOLO11 model on Kaggle is straightforward. First, access the [Kaggle YOLO11 Notebook](https://www.kaggle.com/code/glennjocherultralytics/yolo11). Sign in to your Kaggle account, copy and edit the notebook, and select a GPU under the accelerator settings. Run the notebook cells to start training. For more detailed steps, refer to our [YOLO11 Model Training guide](../modes/train.md). -### What are the benefits of using Kaggle for YOLOv8 model training? +### What are the benefits of using Kaggle for YOLO11 model training? -Kaggle offers several advantages for training YOLOv8 models: +Kaggle offers several advantages for training YOLO11 models: -- **Free GPU Access**: Utilize powerful GPUs like Nvidia Tesla P100 or T4 x2 for up to 30 hours per week. +- **Free GPU Access**: Utilize powerful GPUs like NVIDIA Tesla P100 or T4 x2 for up to 30 hours per week. - **Pre-installed Libraries**: Libraries like TensorFlow and PyTorch are pre-installed, simplifying the setup. - **Community Collaboration**: Engage with a vast community of data scientists and machine learning enthusiasts. - **Version Control**: Easily manage different versions of your notebooks and revert to previous versions if needed. For more details, visit our [Ultralytics integration guide](https://docs.ultralytics.com/integrations/). -### What common issues might I encounter when using Kaggle for YOLOv8, and how can I resolve them? +### What common issues might I encounter when using Kaggle for YOLO11, and how can I resolve them? Common issues include: @@ -119,7 +119,7 @@ Common issues include: For more troubleshooting tips, see our [Common Issues guide](../guides/yolo-common-issues.md). -### Why should I choose Kaggle over other platforms like Google Colab for training YOLOv8 models? +### Why should I choose Kaggle over other platforms like Google Colab for training YOLO11 models? Kaggle offers unique features that make it an excellent choice: @@ -127,7 +127,8 @@ Kaggle offers unique features that make it an excellent choice: - **Free Access to TPUs**: Speed up training with powerful TPUs without extra costs. - **Comprehensive History**: Track changes over time with a detailed history of notebook commits. - **Resource Availability**: Significant resources are provided for each notebook session, including 12 hours of execution time for CPU and GPU sessions. - For a comparison with Google Colab, refer to our [Google Colab guide](./google-colab.md). + +For a comparison with Google Colab, refer to our [Google Colab guide](./google-colab.md). ### How can I revert to a previous version of my Kaggle notebook? diff --git a/docs/en/integrations/mnn.md b/docs/en/integrations/mnn.md new file mode 100644 index 00000000000..bd92c9a8f33 --- /dev/null +++ b/docs/en/integrations/mnn.md @@ -0,0 +1,344 @@ +--- +comments: true +description: Optimize YOLO11 models for mobile and embedded devices by exporting to MNN format. +keywords: Ultralytics, YOLO11, MNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models +--- + +# MNN Export for YOLO11 Models and Deploy + +## MNN + +

+ MNN architecture +

+ +[MNN](https://github.com/alibaba/MNN) is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models and has industry-leading performance for inference and training on-device. At present, MNN has been integrated into more than 30 apps of Alibaba Inc, such as Taobao, Tmall, Youku, DingTalk, Xianyu, etc., covering more than 70 usage scenarios such as live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control. In addition, MNN is also used on embedded devices, such as IoT. + +## Export to MNN: Converting Your YOLO11 Model + +You can expand model compatibility and deployment flexibility by converting YOLO11 models to MNN format. + +### Installation + +To install the required packages, run: + +!!! tip "Installation" + + === "CLI" + + ```bash + # Install the required package for YOLO11 and MNN + pip install ultralytics + pip install MNN + ``` + +### Usage + +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). + +!!! example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the YOLO11 model + model = YOLO("yolo11n.pt") + + # Export the model to MNN format + model.export(format="mnn") # creates 'yolo11n.mnn' + + # Load the exported MNN model + mnn_model = YOLO("yolo11n.mnn") + + # Run inference + results = mnn_model("https://ultralytics.com/images/bus.jpg") + ``` + + === "CLI" + + ```bash + # Export a YOLO11n PyTorch model to MNN format + yolo export model=yolo11n.pt format=mnn # creates 'yolo11n.mnn' + + # Run inference with the exported model + yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' + ``` + +For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). + +### MNN-Only Inference + +A function that relies solely on MNN for YOLO11 inference and preprocessing is implemented, providing both Python and C++ versions for easy deployment in any scenario. + +!!! example "MNN" + + === "Python" + + ```python + import argparse + + import MNN + import MNN.cv as cv2 + import MNN.numpy as np + + + def inference(model, img, precision, backend, thread): + config = {} + config["precision"] = precision + config["backend"] = backend + config["numThread"] = thread + rt = MNN.nn.create_runtime_manager((config,)) + # net = MNN.nn.load_module_from_file(model, ['images'], ['output0'], runtime_manager=rt) + net = MNN.nn.load_module_from_file(model, [], [], runtime_manager=rt) + original_image = cv2.imread(img) + ih, iw, _ = original_image.shape + length = max((ih, iw)) + scale = length / 640 + image = np.pad(original_image, [[0, length - ih], [0, length - iw], [0, 0]], "constant") + image = cv2.resize( + image, (640, 640), 0.0, 0.0, cv2.INTER_LINEAR, -1, [0.0, 0.0, 0.0], [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0] + ) + image = image[..., ::-1] # BGR to RGB + input_var = np.expand_dims(image, 0) + input_var = MNN.expr.convert(input_var, MNN.expr.NC4HW4) + output_var = net.forward(input_var) + output_var = MNN.expr.convert(output_var, MNN.expr.NCHW) + output_var = output_var.squeeze() + # output_var shape: [84, 8400]; 84 means: [cx, cy, w, h, prob * 80] + cx = output_var[0] + cy = output_var[1] + w = output_var[2] + h = output_var[3] + probs = output_var[4:] + # [cx, cy, w, h] -> [y0, x0, y1, x1] + x0 = cx - w * 0.5 + y0 = cy - h * 0.5 + x1 = cx + w * 0.5 + y1 = cy + h * 0.5 + boxes = np.stack([x0, y0, x1, y1], axis=1) + # get max prob and idx + scores = np.max(probs, 0) + class_ids = np.argmax(probs, 0) + result_ids = MNN.expr.nms(boxes, scores, 100, 0.45, 0.25) + print(result_ids.shape) + # nms result box, score, ids + result_boxes = boxes[result_ids] + result_scores = scores[result_ids] + result_class_ids = class_ids[result_ids] + for i in range(len(result_boxes)): + x0, y0, x1, y1 = result_boxes[i].read_as_tuple() + y0 = int(y0 * scale) + y1 = int(y1 * scale) + x0 = int(x0 * scale) + x1 = int(x1 * scale) + print(result_class_ids[i]) + cv2.rectangle(original_image, (x0, y0), (x1, y1), (0, 0, 255), 2) + cv2.imwrite("res.jpg", original_image) + + + if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, required=True, help="the yolo11 model path") + parser.add_argument("--img", type=str, required=True, help="the input image path") + parser.add_argument("--precision", type=str, default="normal", help="inference precision: normal, low, high, lowBF") + parser.add_argument( + "--backend", + type=str, + default="CPU", + help="inference backend: CPU, OPENCL, OPENGL, NN, VULKAN, METAL, TRT, CUDA, HIAI", + ) + parser.add_argument("--thread", type=int, default=4, help="inference using thread: int") + args = parser.parse_args() + inference(args.model, args.img, args.precision, args.backend, args.thread) + ``` + + === "CPP" + + ```cpp + #include + #include + #include + #include + #include + #include + + #include + + using namespace MNN; + using namespace MNN::Express; + using namespace MNN::CV; + + int main(int argc, const char* argv[]) { + if (argc < 3) { + MNN_PRINT("Usage: ./yolo11_demo.out model.mnn input.jpg [forwardType] [precision] [thread]\n"); + return 0; + } + int thread = 4; + int precision = 0; + int forwardType = MNN_FORWARD_CPU; + if (argc >= 4) { + forwardType = atoi(argv[3]); + } + if (argc >= 5) { + precision = atoi(argv[4]); + } + if (argc >= 6) { + thread = atoi(argv[5]); + } + MNN::ScheduleConfig sConfig; + sConfig.type = static_cast(forwardType); + sConfig.numThread = thread; + BackendConfig bConfig; + bConfig.precision = static_cast(precision); + sConfig.backendConfig = &bConfig; + std::shared_ptr rtmgr = std::shared_ptr(Executor::RuntimeManager::createRuntimeManager(sConfig)); + if(rtmgr == nullptr) { + MNN_ERROR("Empty RuntimeManger\n"); + return 0; + } + rtmgr->setCache(".cachefile"); + + std::shared_ptr net(Module::load(std::vector{}, std::vector{}, argv[1], rtmgr)); + auto original_image = imread(argv[2]); + auto dims = original_image->getInfo()->dim; + int ih = dims[0]; + int iw = dims[1]; + int len = ih > iw ? ih : iw; + float scale = len / 640.0; + std::vector padvals { 0, len - ih, 0, len - iw, 0, 0 }; + auto pads = _Const(static_cast(padvals.data()), {3, 2}, NCHW, halide_type_of()); + auto image = _Pad(original_image, pads, CONSTANT); + image = resize(image, Size(640, 640), 0, 0, INTER_LINEAR, -1, {0., 0., 0.}, {1./255., 1./255., 1./255.}); + image = cvtColor(image, COLOR_BGR2RGB); + auto input = _Unsqueeze(image, {0}); + input = _Convert(input, NC4HW4); + auto outputs = net->onForward({input}); + auto output = _Convert(outputs[0], NCHW); + output = _Squeeze(output); + // output shape: [84, 8400]; 84 means: [cx, cy, w, h, prob * 80] + auto cx = _Gather(output, _Scalar(0)); + auto cy = _Gather(output, _Scalar(1)); + auto w = _Gather(output, _Scalar(2)); + auto h = _Gather(output, _Scalar(3)); + std::vector startvals { 4, 0 }; + auto start = _Const(static_cast(startvals.data()), {2}, NCHW, halide_type_of()); + std::vector sizevals { -1, -1 }; + auto size = _Const(static_cast(sizevals.data()), {2}, NCHW, halide_type_of()); + auto probs = _Slice(output, start, size); + // [cx, cy, w, h] -> [y0, x0, y1, x1] + auto x0 = cx - w * _Const(0.5); + auto y0 = cy - h * _Const(0.5); + auto x1 = cx + w * _Const(0.5); + auto y1 = cy + h * _Const(0.5); + auto boxes = _Stack({x0, y0, x1, y1}, 1); + auto scores = _ReduceMax(probs, {0}); + auto ids = _ArgMax(probs, 0); + auto result_ids = _Nms(boxes, scores, 100, 0.45, 0.25); + auto result_ptr = result_ids->readMap(); + auto box_ptr = boxes->readMap(); + auto ids_ptr = ids->readMap(); + auto score_ptr = scores->readMap(); + for (int i = 0; i < 100; i++) { + auto idx = result_ptr[i]; + if (idx < 0) break; + auto x0 = box_ptr[idx * 4 + 0] * scale; + auto y0 = box_ptr[idx * 4 + 1] * scale; + auto x1 = box_ptr[idx * 4 + 2] * scale; + auto y1 = box_ptr[idx * 4 + 3] * scale; + auto class_idx = ids_ptr[idx]; + auto score = score_ptr[idx]; + rectangle(original_image, {x0, y0}, {x1, y1}, {0, 0, 255}, 2); + } + if (imwrite("res.jpg", original_image)) { + MNN_PRINT("result image write to `res.jpg`.\n"); + } + rtmgr->updateCache(); + return 0; + } + ``` + +## Summary + +In this guide, we introduce how to export the Ultralytics YOLO11 model to MNN and use MNN for inference. + +For more usage, please refer to the [MNN documentation](https://mnn-docs.readthedocs.io/en/latest). + +## FAQ + +### How do I export Ultralytics YOLO11 models to MNN format? + +To export your Ultralytics YOLO11 model to MNN format, follow these steps: + +!!! example "Export" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the YOLO11 model + model = YOLO("yolo11n.pt") + + # Export to MNN format + model.export(format="mnn") # creates 'yolo11n.mnn' with fp32 weight + model.export(format="mnn", half=True) # creates 'yolo11n.mnn' with fp16 weight + model.export(format="mnn", int8=True) # creates 'yolo11n.mnn' with int8 weight + ``` + + === "CLI" + + ```bash + yolo export model=yolo11n.pt format=mnn # creates 'yolo11n.mnn' with fp32 weight + yolo export model=yolo11n.pt format=mnn half=True # creates 'yolo11n.mnn' with fp16 weight + yolo export model=yolo11n.pt format=mnn int8=True # creates 'yolo11n.mnn' with int8 weight + ``` + +For detailed export options, check the [Export](../modes/export.md) page in the documentation. + +### How do I predict with an exported YOLO11 MNN model? + +To predict with an exported YOLO11 MNN model, use the `predict` function from the YOLO class. + +!!! example "Predict" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the YOLO11 MNN model + model = YOLO("yolo11n.mnn") + + # Export to MNN format + results = mnn_model("https://ultralytics.com/images/bus.jpg") # predict with `fp32` + results = mnn_model("https://ultralytics.com/images/bus.jpg", half=True) # predict with `fp16` if device support + + for result in results: + result.show() # display to screen + result.save(filename="result.jpg") # save to disk + ``` + + === "CLI" + + ```bash + yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' # predict with `fp32` + yolo predict model='yolo11n.mnn' source='https://ultralytics.com/images/bus.jpg' --half=True # predict with `fp16` if device support + ``` + +### What platforms are supported for MNN? + +MNN is versatile and supports various platforms: + +- **Mobile**: Android, iOS, Harmony. +- **Embedded Systems and IoT Devices**: Devices like Raspberry Pi and NVIDIA Jetson. +- **Desktop and Servers**: Linux, Windows, and macOS. + +### How can I deploy Ultralytics YOLO11 MNN models on Mobile Devices? + +To deploy your YOLO11 models on Mobile devices: + +1. **Build for Android**: Follow the [MNN Android](https://github.com/alibaba/MNN/tree/master/project/android). +2. **Build for iOS**: Follow the [MNN iOS](https://github.com/alibaba/MNN/tree/master/project/ios). +3. **Build for Harmony**: Follow the [MNN Harmony](https://github.com/alibaba/MNN/tree/master/project/harmony). diff --git a/docs/en/integrations/ncnn.md b/docs/en/integrations/ncnn.md index 42d04198e14..9dc13f966f7 100644 --- a/docs/en/integrations/ncnn.md +++ b/docs/en/integrations/ncnn.md @@ -1,14 +1,14 @@ --- comments: true -description: Optimize YOLOv8 models for mobile and embedded devices by exporting to NCNN format. Enhance performance in resource-constrained environments. -keywords: Ultralytics, YOLOv8, NCNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models +description: Optimize YOLO11 models for mobile and embedded devices by exporting to NCNN format. Enhance performance in resource-constrained environments. +keywords: Ultralytics, YOLO11, NCNN, model export, machine learning, deployment, mobile, embedded systems, deep learning, AI models --- -# How to Export to NCNN from YOLOv8 for Smooth Deployment +# How to Export to NCNN from YOLO11 for Smooth Deployment Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models on devices with limited computational power, such as mobile or embedded systems, can be tricky. You need to make sure you use a format optimized for optimal performance. This makes sure that even devices with limited processing power can handle advanced computer vision tasks well. -The export to NCNN format feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for lightweight device-based applications. In this guide, we'll walk you through how to convert your models to the NCNN format, making it easier for your models to perform well on various mobile and embedded devices. +The export to NCNN format feature allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for lightweight device-based applications. In this guide, we'll walk you through how to convert your models to the NCNN format, making it easier for your models to perform well on various mobile and embedded devices. ## Why should you export to NCNN? @@ -34,7 +34,7 @@ NCNN models offer a wide range of key features that enable on-device [machine le ## Deployment Options with NCNN -Before we look at the code for exporting YOLOv8 models to the NCNN format, let's understand how NCNN models are normally used. +Before we look at the code for exporting YOLO11 models to the NCNN format, let's understand how NCNN models are normally used. NCNN models, designed for efficiency and performance, are compatible with a variety of deployment platforms: @@ -44,9 +44,9 @@ NCNN models, designed for efficiency and performance, are compatible with a vari - **Desktop and Server Deployment**: Capable of being deployed in desktop and server environments across Linux, Windows, and macOS, supporting development, training, and evaluation with higher computational capacities. -## Export to NCNN: Converting Your YOLOv8 Model +## Export to NCNN: Converting Your YOLO11 Model -You can expand model compatibility and deployment flexibility by converting YOLOv8 models to NCNN format. +You can expand model compatibility and deployment flexibility by converting YOLO11 models to NCNN format. ### Installation @@ -57,15 +57,15 @@ To install the required packages, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -74,14 +74,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to NCNN format - model.export(format="ncnn") # creates '/yolov8n_ncnn_model' + model.export(format="ncnn") # creates '/yolo11n_ncnn_model' # Load the exported NCNN model - ncnn_model = YOLO("./yolov8n_ncnn_model") + ncnn_model = YOLO("./yolo11n_ncnn_model") # Run inference results = ncnn_model("https://ultralytics.com/images/bus.jpg") @@ -90,18 +90,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to NCNN format - yolo export model=yolov8n.pt format=ncnn # creates '/yolov8n_ncnn_model' + # Export a YOLO11n PyTorch model to NCNN format + yolo export model=yolo11n.pt format=ncnn # creates '/yolo11n_ncnn_model' # Run inference with the exported model - yolo predict model='./yolov8n_ncnn_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_ncnn_model' source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -## Deploying Exported YOLOv8 NCNN Models +## Deploying Exported YOLO11 NCNN Models -After successfully exporting your Ultralytics YOLOv8 models to NCNN format, you can now deploy them. The primary and recommended first step for running a NCNN model is to utilize the YOLO("./model_ncnn_model") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your NCNN models in various other settings, take a look at the following resources: +After successfully exporting your Ultralytics YOLO11 models to NCNN format, you can now deploy them. The primary and recommended first step for running a NCNN model is to utilize the YOLO("yolo11n_ncnn_model/") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your NCNN models in various other settings, take a look at the following resources: - **[Android](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-android)**: This blog explains how to use NCNN models for performing tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) through Android applications. @@ -113,40 +113,40 @@ After successfully exporting your Ultralytics YOLOv8 models to NCNN format, you ## Summary -In this guide, we've gone over exporting Ultralytics YOLOv8 models to the NCNN format. This conversion step is crucial for improving the efficiency and speed of YOLOv8 models, making them more effective and suitable for limited-resource computing environments. +In this guide, we've gone over exporting Ultralytics YOLO11 models to the NCNN format. This conversion step is crucial for improving the efficiency and speed of YOLO11 models, making them more effective and suitable for limited-resource computing environments. For detailed instructions on usage, please refer to the [official NCNN documentation](https://ncnn.readthedocs.io/en/latest/index.html). -Also, if you're interested in exploring other integration options for Ultralytics YOLOv8, be sure to visit our [integration guide page](index.md) for further insights and information. +Also, if you're interested in exploring other integration options for Ultralytics YOLO11, be sure to visit our [integration guide page](index.md) for further insights and information. ## FAQ -### How do I export Ultralytics YOLOv8 models to NCNN format? +### How do I export Ultralytics YOLO11 models to NCNN format? -To export your Ultralytics YOLOv8 model to NCNN format, follow these steps: +To export your Ultralytics YOLO11 model to NCNN format, follow these steps: - **Python**: Use the `export` function from the YOLO class. ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export to NCNN format - model.export(format="ncnn") # creates '/yolov8n_ncnn_model' + model.export(format="ncnn") # creates '/yolo11n_ncnn_model' ``` - **CLI**: Use the `yolo` command with the `export` argument. ```bash - yolo export model=yolov8n.pt format=ncnn # creates '/yolov8n_ncnn_model' + yolo export model=yolo11n.pt format=ncnn # creates '/yolo11n_ncnn_model' ``` For detailed export options, check the [Export](../modes/export.md) page in the documentation. -### What are the advantages of exporting YOLOv8 models to NCNN? +### What are the advantages of exporting YOLO11 models to NCNN? -Exporting your Ultralytics YOLOv8 models to NCNN offers several benefits: +Exporting your Ultralytics YOLO11 models to NCNN offers several benefits: - **Efficiency**: NCNN models are optimized for mobile and embedded devices, ensuring high performance even with limited computational resources. - **Quantization**: NCNN supports techniques like quantization that improve model speed and reduce memory usage. @@ -174,13 +174,13 @@ NCNN is versatile and supports various platforms: If running models on a Raspberry Pi isn't fast enough, converting to the NCNN format could speed things up as detailed in our [Raspberry Pi Guide](../guides/raspberry-pi.md). -### How can I deploy Ultralytics YOLOv8 NCNN models on Android? +### How can I deploy Ultralytics YOLO11 NCNN models on Android? -To deploy your YOLOv8 models on Android: +To deploy your YOLO11 models on Android: 1. **Build for Android**: Follow the [NCNN Build for Android](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-android) guide. 2. **Integrate with Your App**: Use the NCNN Android SDK to integrate the exported model into your application for efficient on-device inference. -For step-by-step instructions, refer to our guide on [Deploying YOLOv8 NCNN Models](#deploying-exported-yolov8-ncnn-models). +For step-by-step instructions, refer to our guide on [Deploying YOLO11 NCNN Models](#deploying-exported-yolo11-ncnn-models). For more advanced guides and use cases, visit the [Ultralytics documentation page](../guides/model-deployment-options.md). diff --git a/docs/en/integrations/neural-magic.md b/docs/en/integrations/neural-magic.md index d05cf98fa7f..72837ccdce4 100644 --- a/docs/en/integrations/neural-magic.md +++ b/docs/en/integrations/neural-magic.md @@ -1,14 +1,14 @@ --- comments: true -description: Enhance YOLOv8 performance using Neural Magic's DeepSparse Engine. Learn how to deploy and benchmark YOLOv8 models on CPUs for efficient object detection. -keywords: YOLOv8, DeepSparse, Neural Magic, model optimization, object detection, inference speed, CPU performance, sparsity, pruning, quantization +description: Enhance YOLO11 performance using Neural Magic's DeepSparse Engine. Learn how to deploy and benchmark YOLO11 models on CPUs for efficient object detection. +keywords: YOLO11, DeepSparse, Neural Magic, model optimization, object detection, inference speed, CPU performance, sparsity, pruning, quantization --- -# Optimizing YOLOv8 Inferences with Neural Magic's DeepSparse Engine +# Optimizing YOLO11 Inferences with Neural Magic's DeepSparse Engine -When deploying [object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLOv8](https://www.ultralytics.com/) on various hardware, you can bump into unique issues like optimization. This is where YOLOv8's integration with Neural Magic's DeepSparse Engine steps in. It transforms the way YOLOv8 models are executed and enables GPU-level performance directly on CPUs. +When deploying [object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLO11](https://www.ultralytics.com/) on various hardware, you can bump into unique issues like optimization. This is where YOLO11's integration with Neural Magic's DeepSparse Engine steps in. It transforms the way YOLO11 models are executed and enables GPU-level performance directly on CPUs. -This guide shows you how to deploy YOLOv8 using Neural Magic's DeepSparse, how to run inferences, and also how to benchmark performance to ensure it is optimized. +This guide shows you how to deploy YOLO11 using Neural Magic's DeepSparse, how to run inferences, and also how to benchmark performance to ensure it is optimized. ## Neural Magic's DeepSparse @@ -18,17 +18,17 @@ This guide shows you how to deploy YOLOv8 using Neural Magic's DeepSparse, how t [Neural Magic's DeepSparse](https://neuralmagic.com/deepsparse/) is an inference run-time designed to optimize the execution of neural networks on CPUs. It applies advanced techniques like sparsity, pruning, and quantization to dramatically reduce computational demands while maintaining accuracy. DeepSparse offers an agile solution for efficient and scalable [neural network](https://www.ultralytics.com/glossary/neural-network-nn) execution across various devices. -## Benefits of Integrating Neural Magic's DeepSparse with YOLOv8 +## Benefits of Integrating Neural Magic's DeepSparse with YOLO11 Before diving into how to deploy YOLOV8 using DeepSparse, let's understand the benefits of using DeepSparse. Some key advantages include: -- **Enhanced Inference Speed**: Achieves up to 525 FPS (on YOLOv8n), significantly speeding up YOLOv8's inference capabilities compared to traditional methods. +- **Enhanced Inference Speed**: Achieves up to 525 FPS (on YOLO11n), significantly speeding up YOLO11's inference capabilities compared to traditional methods.

Enhanced Inference Speed

-- **Optimized Model Efficiency**: Uses pruning and quantization to enhance YOLOv8's efficiency, reducing model size and computational requirements while maintaining [accuracy](https://www.ultralytics.com/glossary/accuracy). +- **Optimized Model Efficiency**: Uses pruning and quantization to enhance YOLO11's efficiency, reducing model size and computational requirements while maintaining [accuracy](https://www.ultralytics.com/glossary/accuracy).

Optimized Model Efficiency @@ -36,9 +36,9 @@ Before diving into how to deploy YOLOV8 using DeepSparse, let's understand the b - **High Performance on Standard CPUs**: Delivers GPU-like performance on CPUs, providing a more accessible and cost-effective option for various applications. -- **Streamlined Integration and Deployment**: Offers user-friendly tools for easy integration of YOLOv8 into applications, including image and video annotation features. +- **Streamlined Integration and Deployment**: Offers user-friendly tools for easy integration of YOLO11 into applications, including image and video annotation features. -- **Support for Various Model Types**: Compatible with both standard and sparsity-optimized YOLOv8 models, adding deployment flexibility. +- **Support for Various Model Types**: Compatible with both standard and sparsity-optimized YOLO11 models, adding deployment flexibility. - **Cost-Effective and Scalable Solution**: Reduces operational expenses and offers scalable deployment of advanced object detection models. @@ -56,15 +56,15 @@ Neural Magic's Deep Sparse technology is inspired by the human brain's efficienc For more details on how Neural Magic's DeepSparse technology work, check out [their blog post](https://neuralmagic.com/blog/how-neural-magics-deep-sparse-technology-works/). -## Creating A Sparse Version of YOLOv8 Trained on a Custom Dataset +## Creating A Sparse Version of YOLO11 Trained on a Custom Dataset -SparseZoo, an open-source model repository by Neural Magic, offers [a collection of pre-sparsified YOLOv8 model checkpoints](https://sparsezoo.neuralmagic.com/?modelSet=computer_vision&searchModels=yolo). With SparseML, seamlessly integrated with Ultralytics, users can effortlessly fine-tune these sparse checkpoints on their specific datasets using a straightforward command-line interface. +SparseZoo, an open-source model repository by Neural Magic, offers [a collection of pre-sparsified YOLO11 model checkpoints](https://sparsezoo.neuralmagic.com/?modelSet=computer_vision&searchModels=yolo). With SparseML, seamlessly integrated with Ultralytics, users can effortlessly fine-tune these sparse checkpoints on their specific datasets using a straightforward command-line interface. -Checkout [Neural Magic's SparseML YOLOv8 documentation](https://github.com/neuralmagic/sparseml/tree/main/integrations/ultralytics-yolov8) for more details. +Checkout [Neural Magic's SparseML YOLO11 documentation](https://github.com/neuralmagic/sparseml/tree/main/integrations/ultralytics-yolov8) for more details. ## Usage: Deploying YOLOV8 using DeepSparse -Deploying YOLOv8 with Neural Magic's DeepSparse involves a few straightforward steps. Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. Here's how you can get started. +Deploying YOLO11 with Neural Magic's DeepSparse involves a few straightforward steps. Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. Here's how you can get started. ### Step 1: Installation @@ -79,24 +79,24 @@ To install the required packages, run: pip install deepsparse[yolov8] ``` -### Step 2: Exporting YOLOv8 to ONNX Format +### Step 2: Exporting YOLO11 to ONNX Format -DeepSparse Engine requires YOLOv8 models in ONNX format. Exporting your model to this format is essential for compatibility with DeepSparse. Use the following command to export YOLOv8 models: +DeepSparse Engine requires YOLO11 models in ONNX format. Exporting your model to this format is essential for compatibility with DeepSparse. Use the following command to export YOLO11 models: !!! tip "Model Export" === "CLI" ```bash - # Export YOLOv8 model to ONNX format - yolo task=detect mode=export model=yolov8n.pt format=onnx opset=13 + # Export YOLO11 model to ONNX format + yolo task=detect mode=export model=yolo11n.pt format=onnx opset=13 ``` -This command will save the `yolov8n.onnx` model to your disk. +This command will save the `yolo11n.onnx` model to your disk. ### Step 3: Deploying and Running Inferences -With your YOLOv8 model in ONNX format, you can deploy and run inferences using DeepSparse. This can be done easily with their intuitive Python API: +With your YOLO11 model in ONNX format, you can deploy and run inferences using DeepSparse. This can be done easily with their intuitive Python API: !!! tip "Deploying and Running Inferences" @@ -105,8 +105,8 @@ With your YOLOv8 model in ONNX format, you can deploy and run inferences using D ```python from deepsparse import Pipeline - # Specify the path to your YOLOv8 ONNX model - model_path = "path/to/yolov8n.onnx" + # Specify the path to your YOLO11 ONNX model + model_path = "path/to/yolo11n.onnx" # Set up the DeepSparse Pipeline yolo_pipeline = Pipeline.create(task="yolov8", model_path=model_path) @@ -118,7 +118,7 @@ With your YOLOv8 model in ONNX format, you can deploy and run inferences using D ### Step 4: Benchmarking Performance -It's important to check that your YOLOv8 model is performing optimally on DeepSparse. You can benchmark your model's performance to analyze throughput and latency: +It's important to check that your YOLO11 model is performing optimally on DeepSparse. You can benchmark your model's performance to analyze throughput and latency: !!! tip "Benchmarking" @@ -126,12 +126,12 @@ It's important to check that your YOLOv8 model is performing optimally on DeepSp ```bash # Benchmark performance - deepsparse.benchmark model_path="path/to/yolov8n.onnx" --scenario=sync --input_shapes="[1,3,640,640]" + deepsparse.benchmark model_path="path/to/yolo11n.onnx" --scenario=sync --input_shapes="[1,3,640,640]" ``` ### Step 5: Additional Features -DeepSparse provides additional features for practical integration of YOLOv8 in applications, such as image annotation and dataset evaluation. +DeepSparse provides additional features for practical integration of YOLO11 in applications, such as image annotation and dataset evaluation. !!! tip "Additional Features" @@ -139,10 +139,10 @@ DeepSparse provides additional features for practical integration of YOLOv8 in a ```bash # For image annotation - deepsparse.yolov8.annotate --source "path/to/image.jpg" --model_filepath "path/to/yolov8n.onnx" + deepsparse.yolov8.annotate --source "path/to/image.jpg" --model_filepath "path/to/yolo11n.onnx" # For evaluating model performance on a dataset - deepsparse.yolov8.eval --model_path "path/to/yolov8n.onnx" + deepsparse.yolov8.eval --model_path "path/to/yolo11n.onnx" ``` Running the annotate command processes your specified image, detecting objects, and saving the annotated image with bounding boxes and classifications. The annotated image will be stored in an annotation-results folder. This helps provide a visual representation of the model's detection capabilities. @@ -151,61 +151,61 @@ Running the annotate command processes your specified image, detecting objects, Image Annotation Feature

-After running the eval command, you will receive detailed output metrics such as [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP (mean Average Precision). This provides a comprehensive view of your model's performance on the dataset. This functionality is particularly useful for fine-tuning and optimizing your YOLOv8 models for specific use cases, ensuring high accuracy and efficiency. +After running the eval command, you will receive detailed output metrics such as [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP (mean Average Precision). This provides a comprehensive view of your model's performance on the dataset. This functionality is particularly useful for fine-tuning and optimizing your YOLO11 models for specific use cases, ensuring high accuracy and efficiency. ## Summary -This guide explored integrating Ultralytics' YOLOv8 with Neural Magic's DeepSparse Engine. It highlighted how this integration enhances YOLOv8's performance on CPU platforms, offering GPU-level efficiency and advanced neural network sparsity techniques. +This guide explored integrating Ultralytics' YOLO11 with Neural Magic's DeepSparse Engine. It highlighted how this integration enhances YOLO11's performance on CPU platforms, offering GPU-level efficiency and advanced neural network sparsity techniques. -For more detailed information and advanced usage, visit [Neural Magic's DeepSparse documentation](https://docs.neuralmagic.com/products/deepsparse/). Also, check out Neural Magic's documentation on the integration with YOLOv8 [here](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/yolov8#yolov8-inference-pipelines) and watch a great session on it [here](https://www.youtube.com/watch?v=qtJ7bdt52x8). +For more detailed information and advanced usage, visit [Neural Magic's DeepSparse documentation](https://docs.neuralmagic.com/products/deepsparse/). Also, check out Neural Magic's documentation on the integration with YOLO11 [here](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/yolov8#yolov8-inference-pipelines) and watch a great session on it [here](https://www.youtube.com/watch?v=qtJ7bdt52x8). -Additionally, for a broader understanding of various YOLOv8 integrations, visit the [Ultralytics integration guide page](../integrations/index.md), where you can discover a range of other exciting integration possibilities. +Additionally, for a broader understanding of various YOLO11 integrations, visit the [Ultralytics integration guide page](../integrations/index.md), where you can discover a range of other exciting integration possibilities. ## FAQ -### What is Neural Magic's DeepSparse Engine and how does it optimize YOLOv8 performance? +### What is Neural Magic's DeepSparse Engine and how does it optimize YOLO11 performance? -Neural Magic's DeepSparse Engine is an inference runtime designed to optimize the execution of neural networks on CPUs through advanced techniques such as sparsity, pruning, and quantization. By integrating DeepSparse with YOLOv8, you can achieve GPU-like performance on standard CPUs, significantly enhancing inference speed, model efficiency, and overall performance while maintaining accuracy. For more details, check out the [Neural Magic's DeepSparse section](#neural-magics-deepsparse). +Neural Magic's DeepSparse Engine is an inference runtime designed to optimize the execution of neural networks on CPUs through advanced techniques such as sparsity, pruning, and quantization. By integrating DeepSparse with YOLO11, you can achieve GPU-like performance on standard CPUs, significantly enhancing inference speed, model efficiency, and overall performance while maintaining accuracy. For more details, check out the [Neural Magic's DeepSparse section](#neural-magics-deepsparse). -### How can I install the needed packages to deploy YOLOv8 using Neural Magic's DeepSparse? +### How can I install the needed packages to deploy YOLO11 using Neural Magic's DeepSparse? -Installing the required packages for deploying YOLOv8 with Neural Magic's DeepSparse is straightforward. You can easily install them using the CLI. Here's the command you need to run: +Installing the required packages for deploying YOLO11 with Neural Magic's DeepSparse is straightforward. You can easily install them using the CLI. Here's the command you need to run: ```bash pip install deepsparse[yolov8] ``` -Once installed, follow the steps provided in the [Installation section](#step-1-installation) to set up your environment and start using DeepSparse with YOLOv8. +Once installed, follow the steps provided in the [Installation section](#step-1-installation) to set up your environment and start using DeepSparse with YOLO11. -### How do I convert YOLOv8 models to ONNX format for use with DeepSparse? +### How do I convert YOLO11 models to ONNX format for use with DeepSparse? -To convert YOLOv8 models to the ONNX format, which is required for compatibility with DeepSparse, you can use the following CLI command: +To convert YOLO11 models to the ONNX format, which is required for compatibility with DeepSparse, you can use the following CLI command: ```bash -yolo task=detect mode=export model=yolov8n.pt format=onnx opset=13 +yolo task=detect mode=export model=yolo11n.pt format=onnx opset=13 ``` -This command will export your YOLOv8 model (`yolov8n.pt`) to a format (`yolov8n.onnx`) that can be utilized by the DeepSparse Engine. More information about model export can be found in the [Model Export section](#step-2-exporting-yolov8-to-onnx-format). +This command will export your YOLO11 model (`yolo11n.pt`) to a format (`yolo11n.onnx`) that can be utilized by the DeepSparse Engine. More information about model export can be found in the [Model Export section](#step-2-exporting-yolo11-to-onnx-format). -### How do I benchmark YOLOv8 performance on the DeepSparse Engine? +### How do I benchmark YOLO11 performance on the DeepSparse Engine? -Benchmarking YOLOv8 performance on DeepSparse helps you analyze throughput and latency to ensure your model is optimized. You can use the following CLI command to run a benchmark: +Benchmarking YOLO11 performance on DeepSparse helps you analyze throughput and latency to ensure your model is optimized. You can use the following CLI command to run a benchmark: ```bash -deepsparse.benchmark model_path="path/to/yolov8n.onnx" --scenario=sync --input_shapes="[1,3,640,640]" +deepsparse.benchmark model_path="path/to/yolo11n.onnx" --scenario=sync --input_shapes="[1,3,640,640]" ``` This command will provide you with vital performance metrics. For more details, see the [Benchmarking Performance section](#step-4-benchmarking-performance). -### Why should I use Neural Magic's DeepSparse with YOLOv8 for object detection tasks? +### Why should I use Neural Magic's DeepSparse with YOLO11 for object detection tasks? -Integrating Neural Magic's DeepSparse with YOLOv8 offers several benefits: +Integrating Neural Magic's DeepSparse with YOLO11 offers several benefits: -- **Enhanced Inference Speed:** Achieves up to 525 FPS, significantly speeding up YOLOv8's capabilities. +- **Enhanced Inference Speed:** Achieves up to 525 FPS, significantly speeding up YOLO11's capabilities. - **Optimized Model Efficiency:** Uses sparsity, pruning, and quantization techniques to reduce model size and computational needs while maintaining accuracy. - **High Performance on Standard CPUs:** Offers GPU-like performance on cost-effective CPU hardware. - **Streamlined Integration:** User-friendly tools for easy deployment and integration. -- **Flexibility:** Supports both standard and sparsity-optimized YOLOv8 models. +- **Flexibility:** Supports both standard and sparsity-optimized YOLO11 models. - **Cost-Effective:** Reduces operational expenses through efficient resource utilization. -For a deeper dive into these advantages, visit the [Benefits of Integrating Neural Magic's DeepSparse with YOLOv8 section](#benefits-of-integrating-neural-magics-deepsparse-with-yolov8). +For a deeper dive into these advantages, visit the [Benefits of Integrating Neural Magic's DeepSparse with YOLO11 section](#benefits-of-integrating-neural-magics-deepsparse-with-yolo11). diff --git a/docs/en/integrations/onnx.md b/docs/en/integrations/onnx.md index 3bb372ac2a7..fbff328d02e 100644 --- a/docs/en/integrations/onnx.md +++ b/docs/en/integrations/onnx.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to export YOLOv8 models to ONNX format for flexible deployment across various platforms with enhanced performance. -keywords: YOLOv8, ONNX, model export, Ultralytics, ONNX Runtime, machine learning, model deployment, computer vision, deep learning +description: Learn how to export YOLO11 models to ONNX format for flexible deployment across various platforms with enhanced performance. +keywords: YOLO11, ONNX, model export, Ultralytics, ONNX Runtime, machine learning, model deployment, computer vision, deep learning --- -# ONNX Export for YOLOv8 Models +# ONNX Export for YOLO11 Models Often, when deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models, you'll need a model format that's both flexible and compatible with multiple platforms. -Exporting [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models to ONNX format streamlines deployment and ensures optimal performance across various environments. This guide will show you how to easily convert your YOLOv8 models to ONNX and enhance their scalability and effectiveness in real-world applications. +Exporting [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to ONNX format streamlines deployment and ensures optimal performance across various environments. This guide will show you how to easily convert your YOLO11 models to ONNX and enhance their scalability and effectiveness in real-world applications. ## ONNX and ONNX Runtime @@ -44,7 +44,7 @@ The ability of ONNX to handle various formats can be attributed to the following ## Common Usage of ONNX -Before we jump into how to export YOLOv8 models to the ONNX format, let's take a look at where ONNX models are usually used. +Before we jump into how to export YOLO11 models to the ONNX format, let's take a look at where ONNX models are usually used. ### CPU Deployment @@ -60,9 +60,9 @@ While ONNX models are commonly used on CPUs, they can also be deployed on the fo - **Web Browsers**: ONNX can run directly in web browsers, powering interactive and dynamic web-based AI applications. -## Exporting YOLOv8 Models to ONNX +## Exporting YOLO11 Models to ONNX -You can expand model compatibility and deployment flexibility by converting YOLOv8 models to ONNX format. +You can expand model compatibility and deployment flexibility by converting YOLO11 models to ONNX format. ### Installation @@ -73,15 +73,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. !!! example "Usage" @@ -90,14 +90,14 @@ Before diving into the usage instructions, be sure to check out the range of [YO ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to ONNX format - model.export(format="onnx") # creates 'yolov8n.onnx' + model.export(format="onnx") # creates 'yolo11n.onnx' # Load the exported ONNX model - onnx_model = YOLO("yolov8n.onnx") + onnx_model = YOLO("yolo11n.onnx") # Run inference results = onnx_model("https://ultralytics.com/images/bus.jpg") @@ -106,18 +106,18 @@ Before diving into the usage instructions, be sure to check out the range of [YO === "CLI" ```bash - # Export a YOLOv8n PyTorch model to ONNX format - yolo export model=yolov8n.pt format=onnx # creates 'yolov8n.onnx' + # Export a YOLO11n PyTorch model to ONNX format + yolo export model=yolo11n.pt format=onnx # creates 'yolo11n.onnx' # Run inference with the exported model - yolo predict model=yolov8n.onnx source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.onnx source='https://ultralytics.com/images/bus.jpg' ``` For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). -## Deploying Exported YOLOv8 ONNX Models +## Deploying Exported YOLO11 ONNX Models -Once you've successfully exported your Ultralytics YOLOv8 models to ONNX format, the next step is deploying these models in various environments. For detailed instructions on deploying your ONNX models, take a look at the following resources: +Once you've successfully exported your Ultralytics YOLO11 models to ONNX format, the next step is deploying these models in various environments. For detailed instructions on deploying your ONNX models, take a look at the following resources: - **[ONNX Runtime Python API Documentation](https://onnxruntime.ai/docs/api/python/api_summary.html)**: This guide provides essential information for loading and running ONNX models using ONNX Runtime. @@ -127,17 +127,17 @@ Once you've successfully exported your Ultralytics YOLOv8 models to ONNX format, ## Summary -In this guide, you've learned how to export Ultralytics YOLOv8 models to ONNX format to increase their interoperability and performance across various platforms. You were also introduced to the ONNX Runtime and ONNX deployment options. +In this guide, you've learned how to export Ultralytics YOLO11 models to ONNX format to increase their interoperability and performance across various platforms. You were also introduced to the ONNX Runtime and ONNX deployment options. For further details on usage, visit the [ONNX official documentation](https://onnx.ai/onnx/intro/). -Also, if you'd like to know more about other Ultralytics YOLOv8 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there. +Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there. ## FAQ -### How do I export YOLOv8 models to ONNX format using Ultralytics? +### How do I export YOLO11 models to ONNX format using Ultralytics? -To export your YOLOv8 models to ONNX format using Ultralytics, follow these steps: +To export your YOLO11 models to ONNX format using Ultralytics, follow these steps: !!! example "Usage" @@ -146,14 +146,14 @@ To export your YOLOv8 models to ONNX format using Ultralytics, follow these step ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to ONNX format - model.export(format="onnx") # creates 'yolov8n.onnx' + model.export(format="onnx") # creates 'yolo11n.onnx' # Load the exported ONNX model - onnx_model = YOLO("yolov8n.onnx") + onnx_model = YOLO("yolo11n.onnx") # Run inference results = onnx_model("https://ultralytics.com/images/bus.jpg") @@ -162,18 +162,18 @@ To export your YOLOv8 models to ONNX format using Ultralytics, follow these step === "CLI" ```bash - # Export a YOLOv8n PyTorch model to ONNX format - yolo export model=yolov8n.pt format=onnx # creates 'yolov8n.onnx' + # Export a YOLO11n PyTorch model to ONNX format + yolo export model=yolo11n.pt format=onnx # creates 'yolo11n.onnx' # Run inference with the exported model - yolo predict model=yolov8n.onnx source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.onnx source='https://ultralytics.com/images/bus.jpg' ``` For more details, visit the [export documentation](../modes/export.md). -### What are the advantages of using ONNX Runtime for deploying YOLOv8 models? +### What are the advantages of using ONNX Runtime for deploying YOLO11 models? -Using ONNX Runtime for deploying YOLOv8 models offers several advantages: +Using ONNX Runtime for deploying YOLO11 models offers several advantages: - **Cross-platform compatibility**: ONNX Runtime supports various platforms, such as Windows, macOS, and Linux, ensuring your models run smoothly across different environments. - **Hardware acceleration**: ONNX Runtime can leverage hardware-specific optimizations for CPUs, GPUs, and dedicated accelerators, providing high-performance inference. @@ -181,9 +181,9 @@ Using ONNX Runtime for deploying YOLOv8 models offers several advantages: Learn more by checking the [ONNX Runtime documentation](https://onnxruntime.ai/docs/api/python/api_summary.html). -### What deployment options are available for YOLOv8 models exported to ONNX? +### What deployment options are available for YOLO11 models exported to ONNX? -YOLOv8 models exported to ONNX can be deployed on various platforms including: +YOLO11 models exported to ONNX can be deployed on various platforms including: - **CPUs**: Utilizing ONNX Runtime for optimized CPU inference. - **GPUs**: Leveraging NVIDIA CUDA for high-performance GPU acceleration. @@ -192,19 +192,19 @@ YOLOv8 models exported to ONNX can be deployed on various platforms including: For more information, explore our guide on [model deployment options](../guides/model-deployment-options.md). -### Why should I use ONNX format for Ultralytics YOLOv8 models? +### Why should I use ONNX format for Ultralytics YOLO11 models? -Using ONNX format for Ultralytics YOLOv8 models provides numerous benefits: +Using ONNX format for Ultralytics YOLO11 models provides numerous benefits: - **Interoperability**: ONNX allows models to be transferred between different machine learning frameworks seamlessly. - **Performance Optimization**: ONNX Runtime can enhance model performance by utilizing hardware-specific optimizations. - **Flexibility**: ONNX supports various deployment environments, enabling you to use the same model on different platforms without modification. -Refer to the comprehensive guide on [exporting YOLOv8 models to ONNX](https://www.ultralytics.com/blog/export-and-optimize-a-yolov8-model-for-inference-on-openvino). +Refer to the comprehensive guide on [exporting YOLO11 models to ONNX](https://www.ultralytics.com/blog/export-and-optimize-a-yolov8-model-for-inference-on-openvino). -### How can I troubleshoot issues when exporting YOLOv8 models to ONNX? +### How can I troubleshoot issues when exporting YOLO11 models to ONNX? -When exporting YOLOv8 models to ONNX, you might encounter common issues such as mismatched dependencies or unsupported operations. To troubleshoot these problems: +When exporting YOLO11 models to ONNX, you might encounter common issues such as mismatched dependencies or unsupported operations. To troubleshoot these problems: 1. Verify that you have the correct version of required dependencies installed. 2. Check the official [ONNX documentation](https://onnx.ai/onnx/intro/) for supported operators and features. diff --git a/docs/en/integrations/openvino.md b/docs/en/integrations/openvino.md index 8395f949f10..b3fd6a8201a 100644 --- a/docs/en/integrations/openvino.md +++ b/docs/en/integrations/openvino.md @@ -59,14 +59,19 @@ Export a YOLOv8n model to OpenVINO format and run inference with the exported mo ## Arguments -| Key | Value | Description | -| --------- | ------------ | --------------------------------------------------------------------------- | -| `format` | `'openvino'` | format to export to | -| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | -| `half` | `False` | FP16 quantization | -| `int8` | `False` | INT8 quantization | -| `batch` | `1` | [batch size](https://www.ultralytics.com/glossary/batch-size) for inference | -| `dynamic` | `False` | allows dynamic input sizes | +| Key | Value | Description | +| --------- | ------------ | ------------------------------------------------------------------------------------------- | +| `format` | `'openvino'` | format to export to | +| `imgsz` | `640` | image size as scalar or (h, w) list, i.e. (640, 480) | +| `half` | `False` | FP16 quantization | +| `int8` | `False` | INT8 quantization | +| `batch` | `1` | [batch size](https://www.ultralytics.com/glossary/batch-size) for inference | +| `dynamic` | `False` | allows dynamic input sizes | +| `data` | `coco8.yaml` | Path to the dataset configuration file (default: `coco8.yaml`), essential for quantization. | + +!!! note + + When using `data` argument for quantization, please check [Dataset Guide](https://docs.ultralytics.com/datasets/detect) to learn more about the dataset format. ## Benefits of OpenVINO @@ -148,7 +153,7 @@ This table represents the benchmark results for five different models (YOLOv8n, ### Intel Arc GPU -Intelยฎ Arcโ„ข represents Intel's foray into the dedicated GPU market. The Arcโ„ข series, designed to compete with leading GPU manufacturers like AMD and Nvidia, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers. +Intelยฎ Arcโ„ข represents Intel's foray into the dedicated GPU market. The Arcโ„ข series, designed to compete with leading GPU manufacturers like AMD and NVIDIA, caters to both the laptop and desktop markets. The series includes mobile versions for compact devices like laptops, and larger, more powerful versions for desktop computers. The Arcโ„ข series is divided into three categories: Arcโ„ข 3, Arcโ„ข 5, and Arcโ„ข 7, with each number indicating the performance level. Each category includes several models, and the 'M' in the GPU model name signifies a mobile, integrated variant. @@ -352,7 +357,7 @@ To reproduce the Ultralytics benchmarks above on all export [formats](../modes/e model = YOLO("yolov8n.pt") # Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all export formats - results = model.benchmarks(data="coco8.yaml") + results = model.benchmark(data="coco8.yaml") ``` === "CLI" @@ -466,7 +471,7 @@ Yes, you can benchmark YOLOv8 models in various formats including PyTorch, Torch model = YOLO("yolov8n.pt") # Benchmark YOLOv8n speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset for all export formats - results = model.benchmarks(data="coco8.yaml") + results = model.benchmark(data="coco8.yaml") ``` === "CLI" diff --git a/docs/en/integrations/paddlepaddle.md b/docs/en/integrations/paddlepaddle.md index 62092df9f56..77c6164b0f0 100644 --- a/docs/en/integrations/paddlepaddle.md +++ b/docs/en/integrations/paddlepaddle.md @@ -1,12 +1,12 @@ --- comments: true -description: Learn how to export YOLOv8 models to PaddlePaddle format for enhanced performance, flexibility, and deployment across various platforms and devices. -keywords: YOLOv8, PaddlePaddle, export models, computer vision, deep learning, model deployment, performance optimization +description: Learn how to export YOLO11 models to PaddlePaddle format for enhanced performance, flexibility, and deployment across various platforms and devices. +keywords: YOLO11, PaddlePaddle, export models, computer vision, deep learning, model deployment, performance optimization --- -# How to Export to PaddlePaddle Format from YOLOv8 Models +# How to Export to PaddlePaddle Format from YOLO11 Models -Bridging the gap between developing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models in real-world scenarios with varying conditions can be difficult. PaddlePaddle makes this process easier with its focus on flexibility, performance, and its capability for parallel processing in distributed environments. This means you can use your YOLOv8 computer vision models on a wide variety of devices and platforms, from smartphones to cloud-based servers. +Bridging the gap between developing and deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models in real-world scenarios with varying conditions can be difficult. PaddlePaddle makes this process easier with its focus on flexibility, performance, and its capability for parallel processing in distributed environments. This means you can use your YOLO11 computer vision models on a wide variety of devices and platforms, from smartphones to cloud-based servers.


@@ -16,10 +16,10 @@ Bridging the gap between developing and deploying [computer vision](https://www. allowfullscreen>
- Watch: How to Export Ultralytics YOLOv8 Models to PaddlePaddle Format | Key Features of PaddlePaddle Format + Watch: How to Export Ultralytics YOLO11 Models to PaddlePaddle Format | Key Features of PaddlePaddle Format

-The ability to export to PaddlePaddle model format allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for use within the PaddlePaddle framework. PaddlePaddle is known for facilitating industrial deployments and is a good choice for deploying computer vision applications in real-world settings across various domains. +The ability to export to PaddlePaddle model format allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for use within the PaddlePaddle framework. PaddlePaddle is known for facilitating industrial deployments and is a good choice for deploying computer vision applications in real-world settings across various domains. ## Why should you export to PaddlePaddle? @@ -31,7 +31,7 @@ Developed by Baidu, [PaddlePaddle](https://www.paddlepaddle.org.cn/en) (**PA**ra It offers tools and resources similar to popular frameworks like [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making it accessible for developers of all experience levels. From farming and factories to service businesses, PaddlePaddle's large developer community of over 4.77 million is helping create and deploy AI applications. -By exporting your Ultralytics YOLOv8 models to PaddlePaddle format, you can tap into PaddlePaddle's strengths in performance optimization. PaddlePaddle prioritizes efficient model execution and reduced memory usage. As a result, your YOLOv8 models can potentially achieve even better performance, delivering top-notch results in practical scenarios. +By exporting your Ultralytics YOLO11 models to PaddlePaddle format, you can tap into PaddlePaddle's strengths in performance optimization. PaddlePaddle prioritizes efficient model execution and reduced memory usage. As a result, your YOLO11 models can potentially achieve even better performance, delivering top-notch results in practical scenarios. ## Key Features of PaddlePaddle Models @@ -45,7 +45,7 @@ PaddlePaddle models offer a range of key features that contribute to their flexi ## Deployment Options in PaddlePaddle -Before diving into the code for exporting YOLOv8 models to PaddlePaddle, let's take a look at the different deployment scenarios in which PaddlePaddle models excel. +Before diving into the code for exporting YOLO11 models to PaddlePaddle, let's take a look at the different deployment scenarios in which PaddlePaddle models excel. PaddlePaddle provides a range of options, each offering a distinct balance of ease of use, flexibility, and performance: @@ -57,9 +57,9 @@ PaddlePaddle provides a range of options, each offering a distinct balance of ea - **Paddle.js**: Paddle.js enables you to deploy PaddlePaddle models directly within web browsers. Paddle.js can either load a pre-trained model or transform a model from [paddle-hub](https://github.com/PaddlePaddle/PaddleHub) with model transforming tools provided by Paddle.js. It can run in browsers that support WebGL/WebGPU/WebAssembly. -## Export to PaddlePaddle: Converting Your YOLOv8 Model +## Export to PaddlePaddle: Converting Your YOLO11 Model -Converting YOLOv8 models to the PaddlePaddle format can improve execution flexibility and optimize performance for various deployment scenarios. +Converting YOLO11 models to the PaddlePaddle format can improve execution flexibility and optimize performance for various deployment scenarios. ### Installation @@ -70,15 +70,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -87,14 +87,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to PaddlePaddle format - model.export(format="paddle") # creates '/yolov8n_paddle_model' + model.export(format="paddle") # creates '/yolo11n_paddle_model' # Load the exported PaddlePaddle model - paddle_model = YOLO("./yolov8n_paddle_model") + paddle_model = YOLO("./yolo11n_paddle_model") # Run inference results = paddle_model("https://ultralytics.com/images/bus.jpg") @@ -103,18 +103,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to PaddlePaddle format - yolo export model=yolov8n.pt format=paddle # creates '/yolov8n_paddle_model' + # Export a YOLO11n PyTorch model to PaddlePaddle format + yolo export model=yolo11n.pt format=paddle # creates '/yolo11n_paddle_model' # Run inference with the exported model - yolo predict model='./yolov8n_paddle_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_paddle_model' source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -## Deploying Exported YOLOv8 PaddlePaddle Models +## Deploying Exported YOLO11 PaddlePaddle Models -After successfully exporting your Ultralytics YOLOv8 models to PaddlePaddle format, you can now deploy them. The primary and recommended first step for running a PaddlePaddle model is to use the YOLO("./model_paddle_model") method, as outlined in the previous usage code snippet. +After successfully exporting your Ultralytics YOLO11 models to PaddlePaddle format, you can now deploy them. The primary and recommended first step for running a PaddlePaddle model is to use the YOLO("yolo11n_paddle_model/") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your PaddlePaddle models in various other settings, take a look at the following resources: @@ -126,17 +126,17 @@ However, for in-depth instructions on deploying your PaddlePaddle models in vari ## Summary -In this guide, we explored the process of exporting Ultralytics YOLOv8 models to the PaddlePaddle format. By following these steps, you can leverage PaddlePaddle's strengths in diverse deployment scenarios, optimizing your models for different hardware and software environments. +In this guide, we explored the process of exporting Ultralytics YOLO11 models to the PaddlePaddle format. By following these steps, you can leverage PaddlePaddle's strengths in diverse deployment scenarios, optimizing your models for different hardware and software environments. For further details on usage, visit the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html) -Want to explore more ways to integrate your Ultralytics YOLOv8 models? Our [integration guide page](index.md) explores various options, equipping you with valuable resources and insights. +Want to explore more ways to integrate your Ultralytics YOLO11 models? Our [integration guide page](index.md) explores various options, equipping you with valuable resources and insights. ## FAQ -### How do I export Ultralytics YOLOv8 models to PaddlePaddle format? +### How do I export Ultralytics YOLO11 models to PaddlePaddle format? -Exporting Ultralytics YOLOv8 models to PaddlePaddle format is straightforward. You can use the `export` method of the YOLO class to perform this exportation. Here is an example using Python: +Exporting Ultralytics YOLO11 models to PaddlePaddle format is straightforward. You can use the `export` method of the YOLO class to perform this exportation. Here is an example using Python: !!! example "Usage" @@ -145,14 +145,14 @@ Exporting Ultralytics YOLOv8 models to PaddlePaddle format is straightforward. Y ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to PaddlePaddle format - model.export(format="paddle") # creates '/yolov8n_paddle_model' + model.export(format="paddle") # creates '/yolo11n_paddle_model' # Load the exported PaddlePaddle model - paddle_model = YOLO("./yolov8n_paddle_model") + paddle_model = YOLO("./yolo11n_paddle_model") # Run inference results = paddle_model("https://ultralytics.com/images/bus.jpg") @@ -161,11 +161,11 @@ Exporting Ultralytics YOLOv8 models to PaddlePaddle format is straightforward. Y === "CLI" ```bash - # Export a YOLOv8n PyTorch model to PaddlePaddle format - yolo export model=yolov8n.pt format=paddle # creates '/yolov8n_paddle_model' + # Export a YOLO11n PyTorch model to PaddlePaddle format + yolo export model=yolo11n.pt format=paddle # creates '/yolo11n_paddle_model' # Run inference with the exported model - yolo predict model='./yolov8n_paddle_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_paddle_model' source='https://ultralytics.com/images/bus.jpg' ``` For more detailed setup and troubleshooting, check the [Ultralytics Installation Guide](../quickstart.md) and [Common Issues Guide](../guides/yolo-common-issues.md). @@ -179,17 +179,17 @@ PaddlePaddle offers several key advantages for model deployment: - **Operator Fusion**: By merging compatible operations, it reduces computational overhead. - **Quantization Techniques**: Supports both post-training and quantization-aware training, enabling lower-[precision](https://www.ultralytics.com/glossary/precision) data representations for improved performance. -You can achieve enhanced results by exporting your Ultralytics YOLOv8 models to PaddlePaddle, ensuring flexibility and high performance across various applications and hardware platforms. Learn more about PaddlePaddle's features [here](https://www.paddlepaddle.org.cn/en). +You can achieve enhanced results by exporting your Ultralytics YOLO11 models to PaddlePaddle, ensuring flexibility and high performance across various applications and hardware platforms. Learn more about PaddlePaddle's features [here](https://www.paddlepaddle.org.cn/en). -### Why should I choose PaddlePaddle for deploying my YOLOv8 models? +### Why should I choose PaddlePaddle for deploying my YOLO11 models? -PaddlePaddle, developed by Baidu, is optimized for industrial and commercial AI deployments. Its large developer community and robust framework provide extensive tools similar to TensorFlow and PyTorch. By exporting your YOLOv8 models to PaddlePaddle, you leverage: +PaddlePaddle, developed by Baidu, is optimized for industrial and commercial AI deployments. Its large developer community and robust framework provide extensive tools similar to TensorFlow and PyTorch. By exporting your YOLO11 models to PaddlePaddle, you leverage: - **Enhanced Performance**: Optimal execution speed and reduced memory footprint. - **Flexibility**: Wide compatibility with various devices from smartphones to cloud servers. - **Scalability**: Efficient parallel processing capabilities for distributed environments. -These features make PaddlePaddle a compelling choice for deploying YOLOv8 models in production settings. +These features make PaddlePaddle a compelling choice for deploying YOLO11 models in production settings. ### How does PaddlePaddle improve model performance over other frameworks? @@ -199,9 +199,9 @@ PaddlePaddle employs several advanced techniques to optimize model performance: - **Operator Fusion**: Combines compatible operations to minimize memory transfer and increase inference speed. - **Quantization**: Reduces model size and increases efficiency using lower-precision data while maintaining [accuracy](https://www.ultralytics.com/glossary/accuracy). -These techniques prioritize efficient model execution, making PaddlePaddle an excellent option for deploying high-performance YOLOv8 models. For more on optimization, see the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html). +These techniques prioritize efficient model execution, making PaddlePaddle an excellent option for deploying high-performance YOLO11 models. For more on optimization, see the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html). -### What deployment options does PaddlePaddle offer for YOLOv8 models? +### What deployment options does PaddlePaddle offer for YOLO11 models? PaddlePaddle provides flexible deployment options: diff --git a/docs/en/integrations/paperspace.md b/docs/en/integrations/paperspace.md index 7c67d9bbff1..f6f9117a6cf 100644 --- a/docs/en/integrations/paperspace.md +++ b/docs/en/integrations/paperspace.md @@ -1,14 +1,14 @@ --- comments: true -description: Simplify YOLOv8 training with Paperspace Gradient's all-in-one MLOps platform. Access GPUs, automate workflows, and deploy with ease. -keywords: YOLOv8, Paperspace Gradient, MLOps, machine learning, training, GPUs, Jupyter notebooks, model deployment, AI, cloud platform +description: Simplify YOLO11 training with Paperspace Gradient's all-in-one MLOps platform. Access GPUs, automate workflows, and deploy with ease. +keywords: YOLO11, Paperspace Gradient, MLOps, machine learning, training, GPUs, Jupyter notebooks, model deployment, AI, cloud platform --- -# YOLOv8 Model Training Made Simple with Paperspace Gradient +# YOLO11 Model Training Made Simple with Paperspace Gradient -Training computer vision models like [YOLOv8](https://github.com/ultralytics/ultralytics) can be complicated. It involves managing large datasets, using different types of computer hardware like GPUs, TPUs, and CPUs, and making sure data flows smoothly during the training process. Typically, developers end up spending a lot of time managing their computer systems and environments. It can be frustrating when you just want to focus on building the best model. +Training computer vision models like [YOLO11](https://github.com/ultralytics/ultralytics) can be complicated. It involves managing large datasets, using different types of computer hardware like GPUs, TPUs, and CPUs, and making sure data flows smoothly during the training process. Typically, developers end up spending a lot of time managing their computer systems and environments. It can be frustrating when you just want to focus on building the best model. -This is where a platform like Paperspace Gradient can make things simpler. Paperspace Gradient is a MLOps platform that lets you build, train, and deploy [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models all in one place. With Gradient, developers can focus on training their YOLOv8 models without the hassle of managing infrastructure and environments. +This is where a platform like Paperspace Gradient can make things simpler. Paperspace Gradient is a MLOps platform that lets you build, train, and deploy [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models all in one place. With Gradient, developers can focus on training their YOLO11 models without the hassle of managing infrastructure and environments. ## Paperspace @@ -28,15 +28,15 @@ Paperspace Gradient is a suite of tools designed to make working with AI and mac Within its toolkit, it includes support for Google's TPUs via a job runner, comprehensive support for Jupyter notebooks and containers, and new programming language integrations. Its focus on language integration particularly stands out, allowing users to easily adapt their existing Python projects to use the most advanced GPU infrastructure available. -## Training YOLOv8 Using Paperspace Gradient +## Training YOLO11 Using Paperspace Gradient -Paperspace Gradient makes training a YOLOv8 model possible with a few clicks. Thanks to the integration, you can access the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLOv8 Model Training guide](../modes/train.md). +Paperspace Gradient makes training a YOLO11 model possible with a few clicks. Thanks to the integration, you can access the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics) and start training your model immediately. For a detailed understanding of the model training process and best practices, refer to our [YOLO11 Model Training guide](../modes/train.md). Sign in and then click on the โ€œStart Machineโ€ button shown in the image below. In a few seconds, a managed GPU environment will start up, and then you can run the notebook's cells. -![Training YOLOv8 Using Paperspace Gradient](https://github.com/ultralytics/docs/releases/download/0/start-machine-button.avif) +![Training YOLO11 Using Paperspace Gradient](https://github.com/ultralytics/docs/releases/download/0/start-machine-button.avif) -Explore more capabilities of YOLOv8 and Paperspace Gradient in a discussion with Glenn Jocher, Ultralytics founder, and James Skelton from Paperspace. Watch the discussion below. +Explore more capabilities of YOLO11 and Paperspace Gradient in a discussion with Glenn Jocher, Ultralytics founder, and James Skelton from Paperspace. Watch the discussion below.


@@ -46,14 +46,14 @@ Explore more capabilities of YOLOv8 and Paperspace Gradient in a discussion with allowfullscreen>
- Watch: Ultralytics Live Session 7: It's All About the Environment: Optimizing YOLOv8 Training With Gradient + Watch: Ultralytics Live Session 7: It's All About the Environment: Optimizing YOLO11 Training With Gradient

## Key Features of Paperspace Gradient As you explore the Paperspace console, you'll see how each step of the machine-learning workflow is supported and enhanced. Here are some things to look out for: -- **One-Click Notebooks:** Gradient provides pre-configured Jupyter Notebooks specifically tailored for YOLOv8, eliminating the need for environment setup and dependency management. Simply choose the desired notebook and start experimenting immediately. +- **One-Click Notebooks:** Gradient provides pre-configured Jupyter Notebooks specifically tailored for YOLO11, eliminating the need for environment setup and dependency management. Simply choose the desired notebook and start experimenting immediately. - **Hardware Flexibility:** Choose from a range of machine types with varying CPU, GPU, and TPU configurations to suit your training needs and budget. Gradient handles all the backend setup, allowing you to focus on model development. @@ -61,13 +61,13 @@ As you explore the Paperspace console, you'll see how each step of the machine-l - **Dataset Management:** Efficiently manage your datasets directly within Gradient. Upload, version, and pre-process data with ease, streamlining the data preparation phase of your project. -- **Model Serving:** Deploy your trained YOLOv8 models as REST APIs with just a few clicks. Gradient handles the infrastructure, allowing you to easily integrate your [object detection](https://www.ultralytics.com/glossary/object-detection) models into your applications. +- **Model Serving:** Deploy your trained YOLO11 models as REST APIs with just a few clicks. Gradient handles the infrastructure, allowing you to easily integrate your [object detection](https://www.ultralytics.com/glossary/object-detection) models into your applications. - **Real-time Monitoring:** Monitor the performance and health of your deployed models through Gradient's intuitive dashboard. Gain insights into inference speed, resource utilization, and potential errors. -## Why Should You Use Gradient for Your YOLOv8 Projects? +## Why Should You Use Gradient for Your YOLO11 Projects? -While many options are available for training, deploying, and evaluating YOLOv8 models, the integration with Paperspace Gradient offers a unique set of advantages that separates it from other solutions. Let's explore what makes this integration unique: +While many options are available for training, deploying, and evaluating YOLO11 models, the integration with Paperspace Gradient offers a unique set of advantages that separates it from other solutions. Let's explore what makes this integration unique: - **Enhanced Collaboration:** Shared workspaces and version control facilitate seamless teamwork and ensure reproducibility, allowing your team to work together effectively and maintain a clear history of your project. @@ -79,37 +79,37 @@ While many options are available for training, deploying, and evaluating YOLOv8 ## Summary -This guide explored the Paperspace Gradient integration for training YOLOv8 models. Gradient provides the tools and infrastructure to accelerate your AI development journey from effortless model training and evaluation to streamlined deployment options. +This guide explored the Paperspace Gradient integration for training YOLO11 models. Gradient provides the tools and infrastructure to accelerate your AI development journey from effortless model training and evaluation to streamlined deployment options. For further exploration, visit [PaperSpace's official documentation](https://docs.digitalocean.com/products/paperspace/). -Also, visit the [Ultralytics integration guide page](index.md) to learn more about different YOLOv8 integrations. It's full of insights and tips to take your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects to the next level. +Also, visit the [Ultralytics integration guide page](index.md) to learn more about different YOLO11 integrations. It's full of insights and tips to take your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects to the next level. ## FAQ -### How do I train a YOLOv8 model using Paperspace Gradient? +### How do I train a YOLO11 model using Paperspace Gradient? -Training a YOLOv8 model with Paperspace Gradient is straightforward and efficient. First, sign in to the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics). Next, click the โ€œStart Machineโ€ button to initiate a managed GPU environment. Once the environment is ready, you can run the notebook's cells to start training your YOLOv8 model. For detailed instructions, refer to our [YOLOv8 Model Training guide](../modes/train.md). +Training a YOLO11 model with Paperspace Gradient is straightforward and efficient. First, sign in to the [Paperspace console](https://console.paperspace.com/github/ultralytics/ultralytics). Next, click the โ€œStart Machineโ€ button to initiate a managed GPU environment. Once the environment is ready, you can run the notebook's cells to start training your YOLO11 model. For detailed instructions, refer to our [YOLO11 Model Training guide](../modes/train.md). -### What are the advantages of using Paperspace Gradient for YOLOv8 projects? +### What are the advantages of using Paperspace Gradient for YOLO11 projects? -Paperspace Gradient offers several unique advantages for training and deploying YOLOv8 models: +Paperspace Gradient offers several unique advantages for training and deploying YOLO11 models: - **Hardware Flexibility:** Choose from various CPU, GPU, and TPU configurations. -- **One-Click Notebooks:** Use pre-configured Jupyter Notebooks for YOLOv8 without worrying about environment setup. +- **One-Click Notebooks:** Use pre-configured Jupyter Notebooks for YOLO11 without worrying about environment setup. - **Experiment Tracking:** Automatic tracking of hyperparameters, metrics, and code changes. - **Dataset Management:** Efficiently manage your datasets within Gradient. - **Model Serving:** Deploy models as REST APIs easily. - **Real-time Monitoring:** Monitor model performance and resource utilization through a dashboard. -### Why should I choose Ultralytics YOLOv8 over other object detection models? +### Why should I choose Ultralytics YOLO11 over other object detection models? -Ultralytics YOLOv8 stands out for its real-time object detection capabilities and high [accuracy](https://www.ultralytics.com/glossary/accuracy). Its seamless integration with platforms like Paperspace Gradient enhances productivity by simplifying the training and deployment process. YOLOv8 supports various use cases, from security systems to retail inventory management. Explore more about YOLOv8's advantages [here](https://www.ultralytics.com/yolo). +Ultralytics YOLO11 stands out for its real-time object detection capabilities and high [accuracy](https://www.ultralytics.com/glossary/accuracy). Its seamless integration with platforms like Paperspace Gradient enhances productivity by simplifying the training and deployment process. YOLO11 supports various use cases, from security systems to retail inventory management. Explore more about YOLO11's advantages [here](https://www.ultralytics.com/yolo). -### Can I deploy my YOLOv8 model on edge devices using Paperspace Gradient? +### Can I deploy my YOLO11 model on edge devices using Paperspace Gradient? -Yes, you can deploy YOLOv8 models on edge devices using Paperspace Gradient. The platform supports various deployment formats like TFLite and Edge TPU, which are optimized for edge devices. After training your model on Gradient, refer to our [export guide](../modes/export.md) for instructions on converting your model to the desired format. +Yes, you can deploy YOLO11 models on edge devices using Paperspace Gradient. The platform supports various deployment formats like TFLite and Edge TPU, which are optimized for edge devices. After training your model on Gradient, refer to our [export guide](../modes/export.md) for instructions on converting your model to the desired format. -### How does experiment tracking in Paperspace Gradient help improve YOLOv8 training? +### How does experiment tracking in Paperspace Gradient help improve YOLO11 training? Experiment tracking in Paperspace Gradient streamlines the model development process by automatically logging hyperparameters, metrics, and code changes. This allows you to easily compare different training runs, identify optimal configurations, and reproduce successful experiments. diff --git a/docs/en/integrations/ray-tune.md b/docs/en/integrations/ray-tune.md index 3dec5efeb67..29eb3a5173f 100644 --- a/docs/en/integrations/ray-tune.md +++ b/docs/en/integrations/ray-tune.md @@ -1,16 +1,16 @@ --- comments: true -description: Optimize YOLOv8 model performance with Ray Tune. Learn efficient hyperparameter tuning using advanced search strategies, parallelism, and early stopping. -keywords: YOLOv8, Ray Tune, hyperparameter tuning, model optimization, machine learning, deep learning, AI, Ultralytics, Weights & Biases +description: Optimize YOLO11 model performance with Ray Tune. Learn efficient hyperparameter tuning using advanced search strategies, parallelism, and early stopping. +keywords: YOLO11, Ray Tune, hyperparameter tuning, model optimization, machine learning, deep learning, AI, Ultralytics, Weights & Biases --- -# Efficient [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) with Ray Tune and YOLOv8 +# Efficient [Hyperparameter Tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) with Ray Tune and YOLO11 Hyperparameter tuning is vital in achieving peak model performance by discovering the optimal set of hyperparameters. This involves running trials with different hyperparameters and evaluating each trial's performance. -## Accelerate Tuning with Ultralytics YOLOv8 and Ray Tune +## Accelerate Tuning with Ultralytics YOLO11 and Ray Tune -[Ultralytics YOLOv8](https://www.ultralytics.com/) incorporates Ray Tune for hyperparameter tuning, streamlining the optimization of YOLOv8 model hyperparameters. With Ray Tune, you can utilize advanced search strategies, parallelism, and early stopping to expedite the tuning process. +[Ultralytics YOLO11](https://www.ultralytics.com/) incorporates Ray Tune for hyperparameter tuning, streamlining the optimization of YOLO11 model hyperparameters. With Ray Tune, you can utilize advanced search strategies, parallelism, and early stopping to expedite the tuning process. ### Ray Tune @@ -18,11 +18,11 @@ Hyperparameter tuning is vital in achieving peak model performance by discoverin Ray Tune Overview

-[Ray Tune](https://docs.ray.io/en/latest/tune/index.html) is a hyperparameter tuning library designed for efficiency and flexibility. It supports various search strategies, parallelism, and early stopping strategies, and seamlessly integrates with popular [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) frameworks, including Ultralytics YOLOv8. +[Ray Tune](https://docs.ray.io/en/latest/tune/index.html) is a hyperparameter tuning library designed for efficiency and flexibility. It supports various search strategies, parallelism, and early stopping strategies, and seamlessly integrates with popular [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) frameworks, including Ultralytics YOLO11. ### Integration with Weights & Biases -YOLOv8 also allows optional integration with [Weights & Biases](https://wandb.ai/site) for monitoring the tuning process. +YOLO11 also allows optional integration with [Weights & Biases](https://wandb.ai/site) for monitoring the tuning process. ## Installation @@ -49,21 +49,21 @@ To install the required packages, run: ```python from ultralytics import YOLO - # Load a YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a YOLO11n model + model = YOLO("yolo11n.pt") - # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset + # Start tuning hyperparameters for YOLO11n training on the COCO8 dataset result_grid = model.tune(data="coco8.yaml", use_ray=True) ``` ## `tune()` Method Parameters -The `tune()` method in YOLOv8 provides an easy-to-use interface for hyperparameter tuning with Ray Tune. It accepts several arguments that allow you to customize the tuning process. Below is a detailed explanation of each parameter: +The `tune()` method in YOLO11 provides an easy-to-use interface for hyperparameter tuning with Ray Tune. It accepts several arguments that allow you to customize the tuning process. Below is a detailed explanation of each parameter: | Parameter | Type | Description | Default Value | | --------------- | ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | | `data` | `str` | The dataset configuration file (in YAML format) to run the tuner on. This file should specify the training and [validation data](https://www.ultralytics.com/glossary/validation-data) paths, as well as other dataset-specific settings. | | -| `space` | `dict, optional` | A dictionary defining the hyperparameter search space for Ray Tune. Each key corresponds to a hyperparameter name, and the value specifies the range of values to explore during tuning. If not provided, YOLOv8 uses a default search space with various hyperparameters. | | +| `space` | `dict, optional` | A dictionary defining the hyperparameter search space for Ray Tune. Each key corresponds to a hyperparameter name, and the value specifies the range of values to explore during tuning. If not provided, YOLO11 uses a default search space with various hyperparameters. | | | `grace_period` | `int, optional` | The grace period in [epochs](https://www.ultralytics.com/glossary/epoch) for the [ASHA scheduler](https://docs.ray.io/en/latest/tune/api/schedulers.html) in Ray Tune. The scheduler will not terminate any trial before this number of epochs, allowing the model to have some minimum training before making a decision on early stopping. | 10 | | `gpu_per_trial` | `int, optional` | The number of GPUs to allocate per trial during tuning. This helps manage GPU usage, particularly in multi-GPU environments. If not provided, the tuner will use all available GPUs. | None | | `iterations` | `int, optional` | The maximum number of trials to run during tuning. This parameter helps control the total number of hyperparameter combinations tested, ensuring the tuning process does not run indefinitely. | 10 | @@ -73,7 +73,7 @@ By customizing these parameters, you can fine-tune the hyperparameter optimizati ## Default Search Space Description -The following table lists the default search space parameters for hyperparameter tuning in YOLOv8 with Ray Tune. Each parameter has a specific value range defined by `tune.uniform()`. +The following table lists the default search space parameters for hyperparameter tuning in YOLO11 with Ray Tune. Each parameter has a specific value range defined by `tune.uniform()`. | Parameter | Value Range | Description | | ----------------- | -------------------------- | --------------------------------------------------------------------------- | @@ -101,15 +101,17 @@ The following table lists the default search space parameters for hyperparameter ## Custom Search Space Example -In this example, we demonstrate how to use a custom search space for hyperparameter tuning with Ray Tune and YOLOv8. By providing a custom search space, you can focus the tuning process on specific hyperparameters of interest. +In this example, we demonstrate how to use a custom search space for hyperparameter tuning with Ray Tune and YOLO11. By providing a custom search space, you can focus the tuning process on specific hyperparameters of interest. !!! example "Usage" ```python + from ray import tune + from ultralytics import YOLO # Define a YOLO model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Run Ray Tune on the model result_grid = model.tune( @@ -120,7 +122,7 @@ In this example, we demonstrate how to use a custom search space for hyperparame ) ``` -In the code snippet above, we create a YOLO model with the "yolov8n.pt" pretrained weights. Then, we call the `tune()` method, specifying the dataset configuration with "coco8.yaml". We provide a custom search space for the initial learning rate `lr0` using a dictionary with the key "lr0" and the value `tune.uniform(1e-5, 1e-1)`. Finally, we pass additional training arguments, such as the number of epochs directly to the tune method as `epochs=50`. +In the code snippet above, we create a YOLO model with the "yolo11n.pt" pretrained weights. Then, we call the `tune()` method, specifying the dataset configuration with "coco8.yaml". We provide a custom search space for the initial learning rate `lr0` using a dictionary with the key "lr0" and the value `tune.uniform(1e-5, 1e-1)`. Finally, we pass additional training arguments, such as the number of epochs directly to the tune method as `epochs=50`. ## Processing Ray Tune Results @@ -186,9 +188,9 @@ Explore further by looking into Ray Tune's [Analyze Results](https://docs.ray.io ## FAQ -### How do I tune the hyperparameters of my YOLOv8 model using Ray Tune? +### How do I tune the hyperparameters of my YOLO11 model using Ray Tune? -To tune the hyperparameters of your Ultralytics YOLOv8 model using Ray Tune, follow these steps: +To tune the hyperparameters of your Ultralytics YOLO11 model using Ray Tune, follow these steps: 1. **Install the required packages:** @@ -197,13 +199,13 @@ To tune the hyperparameters of your Ultralytics YOLOv8 model using Ray Tune, fol pip install wandb # optional for logging ``` -2. **Load your YOLOv8 model and start tuning:** +2. **Load your YOLO11 model and start tuning:** ```python from ultralytics import YOLO - # Load a YOLOv8 model - model = YOLO("yolov8n.pt") + # Load a YOLO11 model + model = YOLO("yolo11n.pt") # Start tuning with the COCO8 dataset result_grid = model.tune(data="coco8.yaml", use_ray=True) @@ -211,9 +213,9 @@ To tune the hyperparameters of your Ultralytics YOLOv8 model using Ray Tune, fol This utilizes Ray Tune's advanced search strategies and parallelism to efficiently optimize your model's hyperparameters. For more information, check out the [Ray Tune documentation](https://docs.ray.io/en/latest/tune/index.html). -### What are the default hyperparameters for YOLOv8 tuning with Ray Tune? +### What are the default hyperparameters for YOLO11 tuning with Ray Tune? -Ultralytics YOLOv8 uses the following default hyperparameters for tuning with Ray Tune: +Ultralytics YOLO11 uses the following default hyperparameters for tuning with Ray Tune: | Parameter | Value Range | Description | | --------------- | -------------------------- | ------------------------------ | @@ -229,9 +231,9 @@ Ultralytics YOLOv8 uses the following default hyperparameters for tuning with Ra These hyperparameters can be customized to suit your specific needs. For a complete list and more details, refer to the [Hyperparameter Tuning](../guides/hyperparameter-tuning.md) guide. -### How can I integrate Weights & Biases with my YOLOv8 model tuning? +### How can I integrate Weights & Biases with my YOLO11 model tuning? -To integrate Weights & Biases (W&B) with your Ultralytics YOLOv8 tuning process: +To integrate Weights & Biases (W&B) with your Ultralytics YOLO11 tuning process: 1. **Install W&B:** @@ -249,7 +251,7 @@ To integrate Weights & Biases (W&B) with your Ultralytics YOLOv8 tuning process: wandb.init(project="YOLO-Tuning", entity="your-entity") # Load YOLO model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Tune hyperparameters result_grid = model.tune(data="coco8.yaml", use_ray=True) @@ -257,7 +259,7 @@ To integrate Weights & Biases (W&B) with your Ultralytics YOLOv8 tuning process: This setup will allow you to monitor the tuning process, track hyperparameter configurations, and visualize results in W&B. -### Why should I use Ray Tune for hyperparameter optimization with YOLOv8? +### Why should I use Ray Tune for hyperparameter optimization with YOLO11? Ray Tune offers numerous advantages for hyperparameter optimization: @@ -265,18 +267,18 @@ Ray Tune offers numerous advantages for hyperparameter optimization: - **Parallelism:** Supports parallel execution of multiple trials, significantly speeding up the tuning process. - **Early Stopping:** Employs strategies like ASHA to terminate under-performing trials early, saving computational resources. -Ray Tune seamlessly integrates with Ultralytics YOLOv8, providing an easy-to-use interface for tuning hyperparameters effectively. To get started, check out the [Efficient Hyperparameter Tuning with Ray Tune and YOLOv8](../guides/hyperparameter-tuning.md) guide. +Ray Tune seamlessly integrates with Ultralytics YOLO11, providing an easy-to-use interface for tuning hyperparameters effectively. To get started, check out the [Efficient Hyperparameter Tuning with Ray Tune and YOLO11](../guides/hyperparameter-tuning.md) guide. -### How can I define a custom search space for YOLOv8 hyperparameter tuning? +### How can I define a custom search space for YOLO11 hyperparameter tuning? -To define a custom search space for your YOLOv8 hyperparameter tuning with Ray Tune: +To define a custom search space for your YOLO11 hyperparameter tuning with Ray Tune: ```python from ray import tune from ultralytics import YOLO -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") search_space = {"lr0": tune.uniform(1e-5, 1e-1), "momentum": tune.uniform(0.6, 0.98)} result_grid = model.tune(data="coco8.yaml", space=search_space, use_ray=True) ``` diff --git a/docs/en/integrations/roboflow.md b/docs/en/integrations/roboflow.md index 321e5601644..5a9d5e3180b 100644 --- a/docs/en/integrations/roboflow.md +++ b/docs/en/integrations/roboflow.md @@ -1,7 +1,7 @@ --- comments: true -description: Learn how to gather, label, and deploy data for custom YOLOv8 models using Roboflow's powerful tools. Optimize your computer vision pipeline effortlessly. -keywords: Roboflow, YOLOv8, data labeling, computer vision, model training, model deployment, dataset management, automated image annotation, AI tools +description: Learn how to gather, label, and deploy data for custom YOLO11 models using Roboflow's powerful tools. Optimize your computer vision pipeline effortlessly. +keywords: Roboflow, YOLO11, data labeling, computer vision, model training, model deployment, dataset management, automated image annotation, AI tools --- # Roboflow @@ -17,17 +17,17 @@ keywords: Roboflow, YOLOv8, data labeling, computer vision, model training, mode For more details see [Ultralytics Licensing](https://www.ultralytics.com/license). -In this guide, we are going to showcase how to find, label, and organize data for use in training a custom Ultralytics YOLOv8 model. Use the table of contents below to jump directly to a specific section: +In this guide, we are going to showcase how to find, label, and organize data for use in training a custom Ultralytics YOLO11 model. Use the table of contents below to jump directly to a specific section: -- Gather data for training a custom YOLOv8 model -- Upload, convert and label data for YOLOv8 format +- Gather data for training a custom YOLO11 model +- Upload, convert and label data for YOLO11 format - Pre-process and augment data for model robustness -- Dataset management for [YOLOv8](../models/yolov8.md) +- Dataset management for [YOLO11](../models/yolov8.md) - Export data in 40+ formats for model training -- Upload custom YOLOv8 model weights for testing and deployment -- Gather Data for Training a Custom YOLOv8 Model +- Upload custom YOLO11 model weights for testing and deployment +- Gather Data for Training a Custom YOLO11 Model -Roboflow provides two services that can help you collect data for YOLOv8 models: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). +Roboflow provides two services that can help you collect data for YOLO11 models: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). Universe is an online repository with over 250,000 vision datasets totalling over 100 million images. @@ -41,21 +41,21 @@ With a [free Roboflow account](https://app.roboflow.com/?ref=ultralytics), you c Roboflow Universe dataset export

-For YOLOv8, select "YOLOv8" as the export format: +For YOLO11, select "YOLO11" as the export format:

Roboflow Universe dataset export

-Universe also has a page that aggregates all [public fine-tuned YOLOv8 models uploaded to Roboflow](https://universe.roboflow.com/search?q=model%3Ayolov8&ref=ultralytics). You can use this page to explore pre-trained models you can use for testing or [for automated data labeling](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling?ref=ultralytics) or to prototype with [Roboflow inference](https://github.com/roboflow/inference?ref=ultralytics). +Universe also has a page that aggregates all [public fine-tuned YOLO11 models uploaded to Roboflow](https://universe.roboflow.com/search?q=model%3Ayolov8&ref=ultralytics). You can use this page to explore pre-trained models you can use for testing or [for automated data labeling](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling?ref=ultralytics) or to prototype with [Roboflow inference](https://github.com/roboflow/inference?ref=ultralytics). If you want to gather images yourself, try [Collect](https://github.com/roboflow/roboflow-collect), an open source project that allows you to automatically gather images using a webcam on the edge. You can use text or image prompts with Collect to instruct what data should be collected, allowing you to capture only the useful data you need to build your vision model. -## Upload, Convert and Label Data for YOLOv8 Format +## Upload, Convert and Label Data for YOLO11 Format [Roboflow Annotate](https://docs.roboflow.com/annotate/use-roboflow-annotate?ref=ultralytics) is an online annotation tool for use in labeling images for [object detection](https://www.ultralytics.com/glossary/object-detection), classification, and segmentation. -To label data for a YOLOv8 object detection, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), or classification model, first create a project in Roboflow. +To label data for a YOLO11 object detection, [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), or classification model, first create a project in Roboflow.

Create a Roboflow project @@ -95,7 +95,7 @@ You can also add tags to images from the Tags panel in the sidebar. You can appl Adding tags to an image in Roboflow

-Models hosted on Roboflow can be used with Label Assist, an automated annotation tool that uses your YOLOv8 model to recommend annotations. To use Label Assist, first upload a YOLOv8 model to Roboflow (see instructions later in the guide). Then, click the magic wand icon in the left sidebar and select your model for use in Label Assist. +Models hosted on Roboflow can be used with Label Assist, an automated annotation tool that uses your YOLO11 model to recommend annotations. To use Label Assist, first upload a YOLO11 model to Roboflow (see instructions later in the guide). Then, click the magic wand icon in the left sidebar and select your model for use in Label Assist. Choose a model, then click "Continue" to enable Label Assist: @@ -109,7 +109,7 @@ When you open new images for annotation, Label Assist will trigger and recommend ALabel Assist recommending an annotation

-## Dataset Management for YOLOv8 +## Dataset Management for YOLO11 Roboflow provides a suite of tools for understanding computer vision datasets. @@ -157,13 +157,13 @@ When your dataset version has been generated, you can export your data into a ra Exporting a dataset

-You are now ready to train YOLOv8 on a custom dataset. Follow this [written guide](https://blog.roboflow.com/how-to-train-yolov8-on-a-custom-dataset/?ref=ultralytics) and [YouTube video](https://www.youtube.com/watch?v=wuZtUMEiKWY) for step-by-step instructions or refer to the [Ultralytics documentation](../modes/train.md). +You are now ready to train YOLO11 on a custom dataset. Follow this [written guide](https://blog.roboflow.com/how-to-train-yolov8-on-a-custom-dataset/?ref=ultralytics) and [YouTube video](https://www.youtube.com/watch?v=wuZtUMEiKWY) for step-by-step instructions or refer to the [Ultralytics documentation](../modes/train.md). -## Upload Custom YOLOv8 Model Weights for Testing and Deployment +## Upload Custom YOLO11 Model Weights for Testing and Deployment -Roboflow offers an infinitely scalable API for deployed models and SDKs for use with NVIDIA Jetsons, Luxonis OAKs, Raspberry Pis, GPU-based devices, and more. +Roboflow offers a scalable API for deployed models and SDKs for use with NVIDIA Jetson, Luxonis OAK, Raspberry Pi, GPU-based devices, and more. -You can deploy YOLOv8 models by uploading YOLOv8 weights to Roboflow. You can do this in a few lines of Python code. Create a new Python file and add the following code: +You can deploy YOLO11 models by uploading YOLO11 weights to Roboflow. You can do this in a few lines of Python code. Create a new Python file and add the following code: ```python import roboflow # install with 'pip install roboflow' @@ -190,7 +190,7 @@ To test your model and find deployment instructions for supported SDKs, go to th You can also use your uploaded model as a [labeling assistant](https://docs.roboflow.com/annotate/use-roboflow-annotate/model-assisted-labeling?ref=ultralytics). This feature uses your trained model to recommend annotations on images uploaded to Roboflow. -## How to Evaluate YOLOv8 Models +## How to Evaluate YOLO11 Models Roboflow provides a range of features for use in evaluating models. @@ -224,17 +224,17 @@ You can use Vector Analysis to: ## Learning Resources -Want to learn more about using Roboflow for creating YOLOv8 models? The following resources may be helpful in your work. +Want to learn more about using Roboflow for creating YOLO11 models? The following resources may be helpful in your work. -- [Train YOLOv8 on a Custom Dataset](https://github.com/roboflow/notebooks/blob/main/notebooks/train-yolov8-object-detection-on-custom-dataset.ipynb): Follow our interactive notebook that shows you how to train a YOLOv8 model on a custom dataset. -- [Autodistill](https://docs.autodistill.com/): Use large foundation vision models to label data for specific models. You can label images for use in training YOLOv8 classification, detection, and segmentation models with Autodistill. +- [Train YOLO11 on a Custom Dataset](https://github.com/roboflow/notebooks/blob/main/notebooks/train-yolov8-object-detection-on-custom-dataset.ipynb): Follow our interactive notebook that shows you how to train a YOLO11 model on a custom dataset. +- [Autodistill](https://docs.autodistill.com/): Use large foundation vision models to label data for specific models. You can label images for use in training YOLO11 classification, detection, and segmentation models with Autodistill. - [Supervision](https://supervision.roboflow.com/?ref=ultralytics): A Python package with helpful utilities for use in working with computer vision models. You can use supervision to filter detections, compute confusion matrices, and more, all in a few lines of Python code. -- [Roboflow Blog](https://blog.roboflow.com/?ref=ultralytics): The Roboflow Blog features over 500 articles on computer vision, covering topics from how to train a YOLOv8 model to annotation best practices. -- [Roboflow YouTube channel](https://www.youtube.com/@Roboflow): Browse dozens of in-depth computer vision guides on our YouTube channel, covering topics from training YOLOv8 models to automated image labeling. +- [Roboflow Blog](https://blog.roboflow.com/?ref=ultralytics): The Roboflow Blog features over 500 articles on computer vision, covering topics from how to train a YOLO11 model to annotation best practices. +- [Roboflow YouTube channel](https://www.youtube.com/@Roboflow): Browse dozens of in-depth computer vision guides on our YouTube channel, covering topics from training YOLO11 models to automated image labeling. ## Project Showcase -Below are a few of the many pieces of feedback we have received for using YOLOv8 and Roboflow together to create computer vision models. +Below are a few of the many pieces of feedback we have received for using YOLO11 and Roboflow together to create computer vision models.

Showcase image @@ -244,26 +244,26 @@ Below are a few of the many pieces of feedback we have received for using YOLOv8 ## FAQ -### How do I label data for YOLOv8 models using Roboflow? +### How do I label data for YOLO11 models using Roboflow? -Labeling data for YOLOv8 models using Roboflow is straightforward with Roboflow Annotate. First, create a project on Roboflow and upload your images. After uploading, select the batch of images and click "Start Annotating." You can use the `B` key for bounding boxes or the `P` key for polygons. For faster annotation, use the SAM-based label assistant by clicking the cursor icon in the sidebar. Detailed steps can be found [here](#upload-convert-and-label-data-for-yolov8-format). +Labeling data for YOLO11 models using Roboflow is straightforward with Roboflow Annotate. First, create a project on Roboflow and upload your images. After uploading, select the batch of images and click "Start Annotating." You can use the `B` key for bounding boxes or the `P` key for polygons. For faster annotation, use the SAM-based label assistant by clicking the cursor icon in the sidebar. Detailed steps can be found [here](#upload-convert-and-label-data-for-yolo11-format). -### What services does Roboflow offer for collecting YOLOv8 [training data](https://www.ultralytics.com/glossary/training-data)? +### What services does Roboflow offer for collecting YOLO11 [training data](https://www.ultralytics.com/glossary/training-data)? -Roboflow provides two key services for collecting YOLOv8 training data: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). Universe offers access to over 250,000 vision datasets, while Collect helps you gather images using a webcam and automated prompts. +Roboflow provides two key services for collecting YOLO11 training data: [Universe](https://universe.roboflow.com/?ref=ultralytics) and [Collect](https://github.com/roboflow/roboflow-collect?ref=ultralytics). Universe offers access to over 250,000 vision datasets, while Collect helps you gather images using a webcam and automated prompts. -### How can I manage and analyze my YOLOv8 dataset using Roboflow? +### How can I manage and analyze my YOLO11 dataset using Roboflow? -Roboflow offers robust dataset management tools, including dataset search, tagging, and Health Check. Use the search feature to find images based on text descriptions or tags. Health Check provides insights into dataset quality, showing class balance, image sizes, and annotation heatmaps. This helps optimize dataset performance before training YOLOv8 models. Detailed information can be found [here](#dataset-management-for-yolov8). +Roboflow offers robust dataset management tools, including dataset search, tagging, and Health Check. Use the search feature to find images based on text descriptions or tags. Health Check provides insights into dataset quality, showing class balance, image sizes, and annotation heatmaps. This helps optimize dataset performance before training YOLO11 models. Detailed information can be found [here](#dataset-management-for-yolo11). -### How do I export my YOLOv8 dataset from Roboflow? +### How do I export my YOLO11 dataset from Roboflow? -To export your YOLOv8 dataset from Roboflow, you need to create a dataset version. Click "Versions" in the sidebar, then "Create New Version" and apply any desired augmentations. Once the version is generated, click "Export Dataset" and choose the YOLOv8 format. Follow this process [here](#export-data-in-40-formats-for-model-training). +To export your YOLO11 dataset from Roboflow, you need to create a dataset version. Click "Versions" in the sidebar, then "Create New Version" and apply any desired augmentations. Once the version is generated, click "Export Dataset" and choose the YOLO11 format. Follow this process [here](#export-data-in-40-formats-for-model-training). -### How can I integrate and deploy YOLOv8 models with Roboflow? +### How can I integrate and deploy YOLO11 models with Roboflow? -Integrate and deploy YOLOv8 models on Roboflow by uploading your YOLOv8 weights through a few lines of Python code. Use the provided script to authenticate and upload your model, which will create an API for deployment. For details on the script and further instructions, see [this section](#upload-custom-yolov8-model-weights-for-testing-and-deployment). +Integrate and deploy YOLO11 models on Roboflow by uploading your YOLO11 weights through a few lines of Python code. Use the provided script to authenticate and upload your model, which will create an API for deployment. For details on the script and further instructions, see [this section](#upload-custom-yolo11-model-weights-for-testing-and-deployment). -### What tools does Roboflow provide for evaluating YOLOv8 models? +### What tools does Roboflow provide for evaluating YOLO11 models? -Roboflow offers model evaluation tools, including a confusion matrix and vector analysis plots. Access these tools from the "View Detailed Evaluation" button on your model page. These features help identify model performance issues and find areas for improvement. For more information, refer to [this section](#how-to-evaluate-yolov8-models). +Roboflow offers model evaluation tools, including a confusion matrix and vector analysis plots. Access these tools from the "View Detailed Evaluation" button on your model page. These features help identify model performance issues and find areas for improvement. For more information, refer to [this section](#how-to-evaluate-yolo11-models). diff --git a/docs/en/integrations/rockchip-rknn.md b/docs/en/integrations/rockchip-rknn.md new file mode 100644 index 00000000000..087f942e089 --- /dev/null +++ b/docs/en/integrations/rockchip-rknn.md @@ -0,0 +1,206 @@ +--- +comments: true +description: Learn how to export YOLO11 models to RKNN format for efficient deployment on Rockchip platforms with enhanced performance. +keywords: YOLO11, RKNN, model export, Ultralytics, Rockchip, machine learning, model deployment, computer vision, deep learning +--- + +# Rockchip RKNN Export for Ultralytics YOLO11 Models + +When deploying computer vision models on embedded devices, especially those powered by Rockchip processors, having a compatible model format is essential. Exporting [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to RKNN format ensures optimized performance and compatibility with Rockchip's hardware. This guide will walk you through converting your YOLO11 models to RKNN format, enabling efficient deployment on Rockchip platforms. + +

+ RKNN +

+ +!!! note + + This guide has been tested with [Radxa Rock 5B](https://radxa.com/products/rock5/5b) which is based on Rockchip RK3588 and [Radxa Zero 3W](https://radxa.com/products/zeros/zero3w) which is based on Rockchip RK3566. It is expected to work across other Rockchip-based devices which supports [rknn-toolkit2](https://github.com/airockchip/rknn-toolkit2) such as RK3576, RK3568, RK3562, RV1103, RV1106, RV1103B, RV1106B and RK2118. + +## What is Rockchip? + +Renowned for delivering versatile and power-efficient solutions, Rockchip designs advanced System-on-Chips (SoCs) that power a wide range of consumer electronics, industrial applications, and AI technologies. With ARM-based architecture, built-in Neural Processing Units (NPUs), and high-resolution multimedia support, Rockchip SoCs enable cutting-edge performance for devices like tablets, smart TVs, IoT systems, and edge AI applications. Companies like Radxa, ASUS, Pine64, Orange Pi, Odroid, Khadas, and Banana Pi offer a variety of products based on Rockchip SoCs, further extending their reach and impact across diverse markets. + +## RKNN Toolkit + +The [RKNN Toolkit](https://github.com/airockchip/rknn-toolkit2) is a set of tools and libraries provided by Rockchip to facilitate the deployment of deep learning models on their hardware platforms. RKNN, or Rockchip Neural Network, is the proprietary format used by these tools. RKNN models are designed to take full advantage of the hardware acceleration provided by Rockchip's NPU (Neural Processing Unit), ensuring high performance in AI tasks on devices like RK3588, RK3566, RV1103, RV1106, and other Rockchip-powered systems. + +## Key Features of RKNN Models + +RKNN models offer several advantages for deployment on Rockchip platforms: + +- **Optimized for NPU**: RKNN models are specifically optimized to run on Rockchip's NPUs, ensuring maximum performance and efficiency. +- **Low Latency**: The RKNN format minimizes inference latency, which is critical for real-time applications on edge devices. +- **Platform-Specific Customization**: RKNN models can be tailored to specific Rockchip platforms, enabling better utilization of hardware resources. + +## Flash OS to Rockchip hardware + +The first step after getting your hands on a Rockchip-based device is to flash an OS so that that the hardware can boot into a working environment. In this guide we will point to getting started guides of the two devices that we tested which are Radxa Rock 5B and Radxa Zero 3W. + +- [Radxa Rock 5B Getting Started Guide](https://docs.radxa.com/en/rock5/rock5b) +- [Radxa Zero 3W Getting Started Guide](https://docs.radxa.com/en/zero/zero3) + +## Export to RKNN: Converting Your YOLO11 Model + +Export an Ultralytics YOLO11 model to RKNN format and run inference with the exported model. + +!!! note + + Make sure to use an X86-based Linux PC to export the model to RKNN because exporting on Rockchip-based devices (ARM64) are not supported. + +### Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + # Install the required package for YOLO11 + pip install ultralytics + ``` + +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. + +### Usage + +!!! note + + Export is currently only supported for detection models. More model support will be coming in the future. + +!!! Example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the YOLO11 model + model = YOLO("yolo11n.pt") + + # Export the model to RKNN format + # 'name' can be one of rk3588, rk3576, rk3566, rk3568, rk3562, rv1103, rv1106, rv1103b, rv1106b, rk2118 + model.export(format="rknn", name="rk3588") # creates '/yolo11n_rknn_model' + ``` + + === "CLI" + + ```bash + # Export a YOLO11n PyTorch model to RKNN format + # 'name' can be one of rk3588, rk3576, rk3566, rk3568, rk3562, rv1103, rv1106, rv1103b, rv1106b, rk2118 + yolo export model=yolo11n.pt format=rknn name=rk3588 # creates '/yolo11n_rknn_model' + ``` + +For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). + +## Deploying Exported YOLO11 RKNN Models + +Once you've successfully exported your Ultralytics YOLO11 models to RKNN format, the next step is deploying these models on Rockchip-based devices. + +### Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + # Install the required package for YOLO11 + pip install ultralytics + ``` + +### Usage + +!!! Example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the exported RKNN model + rknn_model = YOLO("./yolo11n_rknn_model") + + # Run inference + results = rknn_model("https://ultralytics.com/images/bus.jpg") + ``` + + === "CLI" + + ```bash + # Run inference with the exported model + yolo predict model='./yolo11n_rknn_model' source='https://ultralytics.com/images/bus.jpg' + ``` + +!!! note + + If you encounter a log message indicating that the RKNN runtime version does not match the RKNN Toolkit version and the inference fails, please replace `/usr/lib/librknnrt.so` with official [librknnrt.so file](https://github.com/airockchip/rknn-toolkit2/blob/master/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so). + + ![RKNN export screenshot](https://github.com/ultralytics/assets/releases/download/v0.0.0/rknn-npu-log.avif) + +## Benchmarks + +YOLO11 benchmarks below were run by the Ultralytics team on Radxa Rock 5B based on Rockchip RK3588 with `rknn` model format measuring speed and accuracy. + +| Model | Format | Status | Size (MB) | mAP50-95(B) | Inference time (ms/im) | +| ------- | ------ | ------ | --------- | ----------- | ---------------------- | +| YOLO11n | `rknn` | โœ… | 7.4 | 0.61 | 99.5 | +| YOLO11s | `rknn` | โœ… | 20.7 | 0.741 | 122.3 | +| YOLO11m | `rknn` | โœ… | 41.9 | 0.764 | 298.0 | +| YOLO11l | `rknn` | โœ… | 53.3 | 0.72 | 319.6 | +| YOLO11x | `rknn` | โœ… | 114.6 | 0.828 | 632.1 | + +!!! note + + Validation for the above benchmark was done using coco8 dataset + +## Summary + +In this guide, you've learned how to export Ultralytics YOLO11 models to RKNN format to enhance their deployment on Rockchip platforms. You were also introduced to the RKNN Toolkit and the specific advantages of using RKNN models for edge AI applications. + +For further details on usage, visit the [RKNN official documentation](https://github.com/airockchip/rknn-toolkit2). + +Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there. + +## FAQ + +### How do I export my Ultralytics YOLO model to RKNN format? + +You can easily export your Ultralytics YOLO model to RKNN format using the `export()` method in the Ultralytics Python package or via the command-line interface (CLI). Ensure you are using an x86-based Linux PC for the export process, as ARM64 devices like Rockchip are not supported for this operation. You can specify the target Rockchip platform using the `name` argument, such as `rk3588`, `rk3566`, or others. This process generates an optimized RKNN model ready for deployment on your Rockchip device, taking advantage of its Neural Processing Unit (NPU) for accelerated inference. + +!!! Example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load your YOLO model + model = YOLO("yolo11n.pt") + + # Export to RKNN format for a specific Rockchip platform + model.export(format="rknn", name="rk3588") + ``` + + === "CLI" + + ```bash + yolo export model=yolo11n.pt format=rknn name=rk3588 + ``` + +### What are the benefits of using RKNN models on Rockchip devices? + +RKNN models are specifically designed to leverage the hardware acceleration capabilities of Rockchip's Neural Processing Units (NPUs). This optimization results in significantly faster inference speeds and reduced latency compared to running generic model formats like ONNX or TensorFlow Lite on the same hardware. Using RKNN models allows for more efficient use of the device's resources, leading to lower power consumption and better overall performance, especially critical for real-time applications on edge devices. By converting your Ultralytics YOLO models to RKNN, you can achieve optimal performance on devices powered by Rockchip SoCs like the RK3588, RK3566, and others. + +### Can I deploy RKNN models on devices from other manufacturers like NVIDIA or Google? + +RKNN models are specifically optimized for Rockchip platforms and their integrated NPUs. While you can technically run an RKNN model on other platforms using software emulation, you will not benefit from the hardware acceleration provided by Rockchip devices. For optimal performance on other platforms, it's recommended to export your Ultralytics YOLO models to formats specifically designed for those platforms, such as TensorRT for NVIDIA GPUs or [TensorFlow Lite](https://docs.ultralytics.com/integrations/tflite/) for Google's Edge TPU. Ultralytics supports exporting to a wide range of formats, ensuring compatibility with various hardware accelerators. + +### What Rockchip platforms are supported for RKNN model deployment? + +The Ultralytics YOLO export to RKNN format supports a wide range of Rockchip platforms, including the popular RK3588, RK3576, RK3566, RK3568, RK3562, RV1103, RV1106, RV1103B, RV1106B, and RK2118. These platforms are commonly found in devices from manufacturers like Radxa, ASUS, Pine64, Orange Pi, Odroid, Khadas, and Banana Pi. This broad support ensures that you can deploy your optimized RKNN models on various Rockchip-powered devices, from single-board computers to industrial systems, taking full advantage of their AI acceleration capabilities for enhanced performance in your computer vision applications. + +### How does the performance of RKNN models compare to other formats on Rockchip devices? + +RKNN models generally outperform other formats like ONNX or TensorFlow Lite on Rockchip devices due to their optimization for Rockchip's NPUs. For instance, benchmarks on the Radxa Rock 5B (RK3588) show that [YOLO11n](https://www.ultralytics.com/blog/all-you-need-to-know-about-ultralytics-yolo11-and-its-applications) in RKNN format achieves an inference time of 99.5 ms/image, significantly faster than other formats. This performance advantage is consistent across various YOLO11 model sizes, as demonstrated in the [benchmarks section](#benchmarks). By leveraging the dedicated NPU hardware, RKNN models minimize latency and maximize throughput, making them ideal for real-time applications on Rockchip-based edge devices. diff --git a/docs/en/integrations/seeedstudio-recamera.md b/docs/en/integrations/seeedstudio-recamera.md new file mode 100644 index 00000000000..dcad49351d6 --- /dev/null +++ b/docs/en/integrations/seeedstudio-recamera.md @@ -0,0 +1,110 @@ +--- +comments: true +description: Discover how to get started with Seeed Studio reCamera for edge AI applications using Ultralytics YOLO11. Learn about its powerful features, real-world applications, and how to export YOLO11 models to ONNX format for seamless integration. +keywords: Seeed Studio reCamera, YOLO11, ONNX export, edge AI, computer vision, real-time detection, personal protective equipment detection, fire detection, waste detection, fall detection, modular AI devices, Ultralytics +--- + +# Quick Start Guide: Seeed Studio reCamera with Ultralytics YOLO11 + +[reCamera](https://www.seeedstudio.com/recamera) was introduced for the AI community at [YOLO Vision 2024 (YV24)](https://www.youtube.com/watch?v=rfI5vOo3-_A), [Ultralytics](https://ultralytics.com/) annual hybrid event. It is mainly designed for edge AI applications, offering powerful processing capabilities and effortless deployment. + +With support for diverse hardware configurations and open-source resources, it serves as an ideal platform for prototyping and deploying innovative [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) [solutions](https://docs.ultralytics.com/solutions/#solutions) at the edge. + +![Seeed Studio reCamera](https://github.com/ultralytics/docs/releases/download/0/saeed-studio-recamera.avif) + +## Why Choose reCamera? + +reCamera series is purpose-built for edge AI applications, tailored to meet the needs of developers and innovators. Here's why it stands out: + +- **RISC-V Powered Performance**: At its core is the SG200X processor, built on the RISC-V architecture, delivering exceptional performance for edge AI tasks while maintaining energy efficiency. With the ability to execute 1 trillion operations per second (1 TOPS), it handles demanding tasks like real-time object detection easily. + +- **Optimized Video Technologies**: Supports advanced video compression standards, including H.264 and H.265, to reduce storage and bandwidth requirements without sacrificing quality. Features like HDR imaging, 3D noise reduction, and lens correction ensure professional visuals, even in challenging environments. + +- **Energy-Efficient Dual Processing**: While the SG200X handles complex AI tasks, a smaller 8-bit microcontroller manages simpler operations to conserve power, making the reCamera ideal for battery-operated or low-power setups. + +- **Modular and Upgradable Design**: The reCamera is built with a modular structure, consisting of three main components: the core board, sensor board, and baseboard. This design allows developers to easily swap or upgrade components, ensuring flexibility and future-proofing for evolving projects. + +## Quick Hardware Setup of reCamera + +Please follow [reCamera Quick Start Guide](https://wiki.seeedstudio.com/recamera_getting_started) for initial onboarding of the device such as connecting the device to a WiFi network and access the [Node-RED](https://nodered.org) web UI for quick previewing of detection redsults with the pre-installed Ultralytics YOLO models. + +## Export to cvimodel: Converting Your YOLO11 Model + +Here we will first convert `PyTorch` model to `ONNX` and then convert it to `MLIR` model format. Finally `MLIR` will be converted to `cvimodel` in order to inference on-device + +

+ reCamera Toolchain +

+ +### Export to ONNX + +Export an Ultralytics YOLO11 model to ONNX model format. + +#### Installation + +To install the required packages, run: + +!!! Tip "Installation" + + === "CLI" + + ```bash + pip install ultralytics + ``` + +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. + +#### Usage + +!!! Example "Usage" + + === "Python" + + ```python + from ultralytics import YOLO + + # Load the YOLO11 model + model = YOLO("yolo11n.pt") + + # Export the model to ONNX format + model.export(format="onnx") # creates 'yolo11n.onnx' + ``` + + === "CLI" + + ```bash + # Export a YOLO11n PyTorch model to ONNX format + yolo export model=yolo11n.pt format=onnx # creates 'yolo11n.onnx' + ``` + +For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). + +### Export ONNX to MLIR and cvimodel + +After obtaining an ONNX model, refer to [Convert and Quantize AI Models](https://wiki.seeedstudio.com/recamera_model_conversion) page to convert the ONNX model to MLIR and then to cvimodel. + +!!! note + + We're actively working on adding reCamera support directly into the Ultralytics package, and it will be available soon. In the meantime, check out our blog on [Integrating Ultralytics YOLO Models with Seeed Studio's reCamera](https://www.ultralytics.com/blog/integrating-ultralytics-yolo-models-on-seeed-studios-recamera) for more insights. + +## Benchmarks + +Coming soon. + +## Real-World Applications of reCamera + +reCamera advanced computer vision capabilities and modular design make it suitable for a wide range of real-world scenarios, helping developers and businesses tackle unique challenges with ease. + +- **Fall Detection**: Designed for safety and healthcare applications, the reCamera can detect falls in real-time, making it ideal for elderly care, hospitals, and industrial settings where rapid response is critical. + +- **Personal Protective Equipment Detection**: The reCamera can be used to ensure workplace safety by detecting PPE compliance in real-time. It helps identify whether workers are wearing helmets, gloves, or other safety gear, reducing risks in industrial environments. + +![Personal protective equipment detection](https://github.com/ultralytics/docs/releases/download/0/personal-protective-equipment-detection.avif) + +- **Fire Detection**: The reCamera's real-time processing capabilities make it an excellent choice for fire detection in industrial and residential areas, providing early warnings to prevent potential disasters. + +- **Waste Detection**: It can also be utilized for waste detection applications, making it an excellent tool for environmental monitoring and waste management. + +- **Car Parts Detection**: In manufacturing and automotive industries, it aids in detecting and analyzing car parts for quality control, assembly line monitoring, and inventory management. + +![Car parts detection](https://github.com/ultralytics/docs/releases/download/0/carparts-detection.avif) diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md new file mode 100644 index 00000000000..ea0d70de986 --- /dev/null +++ b/docs/en/integrations/sony-imx500.md @@ -0,0 +1,330 @@ +--- +comments: true +description: Learn to export Ultralytics YOLOv8 models to Sony's IMX500 format to optimize your models for efficient deployment. +keywords: Sony, IMX500, IMX 500, Atrios, MCT, model export, quantization, pruning, deep learning optimization, Raspberry Pi AI Camera, edge AI, PyTorch, IMX +--- + +# Sony IMX500 Export for Ultralytics YOLOv8 + +This guide covers exporting and deploying Ultralytics YOLOv8 models to Raspberry Pi AI Cameras that feature the Sony IMX500 sensor. + +Deploying computer vision models on devices with limited computational power, such as [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/), can be tricky. Using a model format optimized for faster performance makes a huge difference. + +The IMX500 model format is designed to use minimal power while delivering fast performance for neural networks. It allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through exporting and deploying your models to the IMX500 format while making it easier for your models to perform well on the [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/). + +

+ Raspberry Pi AI Camera +

+ +## Why Should You Export to IMX500 + +Sony's [IMX500 Intelligent Vision Sensor](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera) is a game-changing piece of hardware in edge AI processing. It's the world's first intelligent vision sensor with on-chip AI capabilities. This sensor helps overcome many challenges in edge AI, including data processing bottlenecks, privacy concerns, and performance limitations. +While other sensors merely pass along images and frames, the IMX500 tells a whole story. It processes data directly on the sensor, allowing devices to generate insights in real-time. + +## Sony's IMX500 Export for YOLOv8 Models + +The IMX500 is designed to transform how devices handle data directly on the sensor, without needing to send it off to the cloud for processing. + +The IMX500 works with quantized models. Quantization makes models smaller and faster without losing much [accuracy](https://www.ultralytics.com/glossary/accuracy). It is ideal for the limited resources of edge computing, allowing applications to respond quickly by reducing latency and allowing for quick data processing locally, without cloud dependency. Local processing also keeps user data private and secure since it's not sent to a remote server. + +**IMX500 Key Features:** + +- **Metadata Output:** Instead of transmitting images only, the IMX500 can output both image and metadata (inference result), and can output metadata only for minimizing data size, reducing bandwidth, and lowering costs. +- **Addresses Privacy Concerns:** By processing data on the device, the IMX500 addresses privacy concerns, ideal for human-centric applications like person counting and occupancy tracking. +- **Real-time Processing:** Fast, on-sensor processing supports real-time decisions, perfect for edge AI applications such as autonomous systems. + +**Before You Begin:** For best results, ensure your YOLOv8 model is well-prepared for export by following our [Model Training Guide](https://docs.ultralytics.com/modes/train/), [Data Preparation Guide](https://docs.ultralytics.com/datasets/), and [Hyperparameter Tuning Guide](https://docs.ultralytics.com/guides/hyperparameter-tuning/). + +## Usage Examples + +Export an Ultralytics YOLOv8 model to IMX500 format and run inference with the exported model. + +!!! note + + IMX export is currently only supported for the YOLOv8n model. Here we perform inference just to make sure the model works as expected. However, for deployment and inference on the Raspberry Pi AI Camera, please jump to [Using IMX500 Export in Deployment](#using-imx500-export-in-deployment) section. + +!!! example + + === "Python" + + ```python + from ultralytics import YOLO + + # Load a YOLOv8n PyTorch model + model = YOLO("yolov8n.pt") + + # Export the model + model.export(format="imx") # exports with PTQ quantization by default + + # Load the exported model + imx_model = YOLO("yolov8n_imx_model") + + # Run inference + results = imx_model("https://ultralytics.com/images/bus.jpg") + ``` + + === "CLI" + + ```bash + # Export a YOLOv8n PyTorch model to imx format with Post-Training Quantization (PTQ) + yolo export model=yolov8n.pt format=imx + + # Run inference with the exported model + yolo predict model=yolov8n_imx_model source='https://ultralytics.com/images/bus.jpg' + ``` + +The export process will create an ONNX model for quantization validation, along with a directory named `_imx_model`. This directory will include the `packerOut.zip` file, which is essential for packaging the model for deployment on the IMX500 hardware. Additionally, the `_imx_model` folder will contain a text file (`labels.txt`) listing all the labels associated with the model. + +```bash +yolov8n_imx_model +โ”œโ”€โ”€ dnnParams.xml +โ”œโ”€โ”€ labels.txt +โ”œโ”€โ”€ packerOut.zip +โ”œโ”€โ”€ yolov8n_imx.onnx +โ”œโ”€โ”€ yolov8n_imx500_model_MemoryReport.json +โ””โ”€โ”€ yolov8n_imx500_model.pbtxt +``` + +## Arguments + +When exporting a model to IMX500 format, you can specify various arguments: + +| Key | Value | Description | +| -------- | ------------ | -------------------------------------------------------------- | +| `format` | `imx` | Format to export to (imx) | +| `int8` | `True` | Enable INT8 quantization for the model (default: `True`) | +| `imgsz` | `640` | Image size for the model input (default: `640`) | +| `data` | `coco8.yaml` | Path to the dataset configuration file (default: `coco8.yaml`) | + +!!! note + + When using `data` argument for quantization, please check [Dataset Guide](https://docs.ultralytics.com/datasets/detect) to learn more about the dataset format. + +## Using IMX500 Export in Deployment + +After exporting Ultralytics YOLOv8n model to IMX500 format, it can be deployed to Raspberry Pi AI Camera for inference. + +### Hardware Prerequisites + +Make sure you have the below hardware: + +1. Raspberry Pi 5 or Raspberry Pi 4 Model B +2. Raspberry Pi AI Camera + +Connect the Raspberry Pi AI camera to the 15-pin MIPI CSI connector on the Raspberry Pi and power on the Raspberry Pi + +### Software Prerequisites + +!!! note + + This guide has been tested with Raspberry Pi OS Bookworm running on a Raspberry Pi 5 + +Step 1: Open a terminal window and execute the following commands to update the Raspberry Pi software to the latest version. + +```bash +sudo apt update && sudo apt full-upgrade +``` + +Step 2: Install IMX500 firmware which is required to operate the IMX500 sensor along with a packager tool. + +```bash +sudo apt install imx500-all imx500-tools +``` + +Step 3: Install prerequisites to run `picamera2` application. We will use this application later for the deployment process. + +```bash +sudo apt install python3-opencv python3-munkres +``` + +Step 4: Reboot Raspberry Pi for the changes to take into effect + +```bash +sudo reboot +``` + +### Package Model and Deploy to AI Camera + +After obtaining `packerOut.zip` from the IMX500 conversion process, you can pass this file into the packager tool to obtain an RPK file. This file can then be deployed directly to the AI Camera using `picamera2`. + +Step 1: Package the model into RPK file + +```bash +imx500-package -i -o +``` + +The above will generate a `network.rpk` file inside the specified output folder. + +Step 2: Clone `picamera2` repository, install it and navigate to the imx500 examples + +```bash +git clone https://github.com/raspberrypi/picamera2 +cd picamera2 +pip install -e . --break-system-packages +cd examples/imx500 +``` + +Step 3: Run YOLOv8 object detection, using the labels.txt file that has been generated during the IMX500 export. + +```bash +python imx500_object_detection_demo.py --model --fps 25 --bbox-normalization --ignore-dash-labels --bbox-order xy --labels +``` + +Then you will be able to see live inference output as follows + +

+ Inference on Raspberry Pi AI Camera +

+ +## Benchmarks + +YOLOv8 benchmarks below were run by the Ultralytics team on Raspberry Pi AI Camera with `imx` model format measuring speed and accuracy. + +| Model | Format | Status | Size (MB) | mAP50-95(B) | Inference time (ms/im) | +| ------- | ------ | ------ | --------- | ----------- | ---------------------- | +| YOLOv8n | imx | โœ… | 2.9 | 0.522 | 66.66 | + +!!! note + + Validation for the above benchmark was done using coco8 dataset + +## What's Under the Hood? + +

+ IMX500 deployment +

+ +### Sony Model Compression Toolkit (MCT) + +[Sony's Model Compression Toolkit (MCT)](https://github.com/sony/model_optimization) is a powerful tool for optimizing deep learning models through quantization and pruning. It supports various quantization methods and provides advanced algorithms to reduce model size and computational complexity without significantly sacrificing accuracy. MCT is particularly useful for deploying models on resource-constrained devices, ensuring efficient inference and reduced latency. + +### Supported Features of MCT + +Sony's MCT offers a range of features designed to optimize neural network models: + +1. **Graph Optimizations**: Transforms models into more efficient versions by folding layers like batch normalization into preceding layers. +2. **Quantization Parameter Search**: Minimizes quantization noise using metrics like Mean-Square-Error, No-Clipping, and Mean-Average-Error. +3. **Advanced Quantization Algorithms**: + - **Shift Negative Correction**: Addresses performance issues from symmetric activation quantization. + - **Outliers Filtering**: Uses z-score to detect and remove outliers. + - **Clustering**: Utilizes non-uniform quantization grids for better distribution matching. + - **Mixed-Precision Search**: Assigns different quantization bit-widths per layer based on sensitivity. +4. **Visualization**: Use TensorBoard to observe model performance insights, quantization phases, and bit-width configurations. + +#### Quantization + +MCT supports several quantization methods to reduce model size and improve inference speed: + +1. **Post-Training Quantization (PTQ)**: + - Available via Keras and PyTorch APIs. + - Complexity: Low + - Computational Cost: Low (CPU minutes) +2. **Gradient-based Post-Training Quantization (GPTQ)**: + - Available via Keras and PyTorch APIs. + - Complexity: Medium + - Computational Cost: Moderate (2-3 GPU hours) +3. **Quantization-Aware Training (QAT)**: + - Complexity: High + - Computational Cost: High (12-36 GPU hours) + +MCT also supports various quantization schemes for weights and activations: + +1. Power-of-Two (hardware-friendly) +2. Symmetric +3. Uniform + +#### Structured Pruning + +MCT introduces structured, hardware-aware model pruning designed for specific hardware architectures. This technique leverages the target platform's Single Instruction, Multiple Data (SIMD) capabilities by pruning SIMD groups. This reduces model size and complexity while optimizing channel utilization, aligned with the SIMD architecture for targeted resource utilization of weights memory footprint. Available via Keras and PyTorch APIs. + +### IMX500 Converter Tool (Compiler) + +The IMX500 Converter Tool is integral to the IMX500 toolset, allowing the compilation of models for deployment on Sony's IMX500 sensor (for instance, Raspberry Pi AI Cameras). This tool facilitates the transition of Ultralytics YOLOv8 models processed through Ultralytics software, ensuring they are compatible and perform efficiently on the specified hardware. The export procedure following model quantization involves the generation of binary files that encapsulate essential data and device-specific configurations, streamlining the deployment process on the Raspberry Pi AI Camera. + +## Real-World Use Cases + +Export to IMX500 format has wide applicability across industries. Here are some examples: + +- **Edge AI and IoT**: Enable object detection on drones or security cameras, where real-time processing on low-power devices is essential. +- **Wearable Devices**: Deploy models optimized for small-scale AI processing on health-monitoring wearables. +- **Smart Cities**: Use IMX500-exported YOLOv8 models for traffic monitoring and safety analysis with faster processing and minimal latency. +- **Retail Analytics**: Enhance in-store monitoring by deploying optimized models in point-of-sale systems or smart shelves. + +## Conclusion + +Exporting Ultralytics YOLOv8 models to Sony's IMX500 format allows you to deploy your models for efficient inference on IMX500-based cameras. By leveraging advanced quantization techniques, you can reduce model size and improve inference speed without significantly compromising accuracy. + +For more information and detailed guidelines, refer to Sony's [IMX500 website](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera). + +## FAQ + +### How do I export a YOLOv8 model to IMX500 format for Raspberry Pi AI Camera? + +To export a YOLOv8 model to IMX500 format, use either the Python API or CLI command: + +```python +from ultralytics import YOLO + +model = YOLO("yolov8n.pt") +model.export(format="imx") # Exports with PTQ quantization by default +``` + +The export process will create a directory containing the necessary files for deployment, including `packerOut.zip` which can be used with the IMX500 packager tool on Raspberry Pi. + +### What are the key benefits of using the IMX500 format for edge AI deployment? + +The IMX500 format offers several important advantages for edge deployment: + +- On-chip AI processing reduces latency and power consumption +- Outputs both image and metadata (inference result) instead of images only +- Enhanced privacy by processing data locally without cloud dependency +- Real-time processing capabilities ideal for time-sensitive applications +- Optimized quantization for efficient model deployment on resource-constrained devices + +### What hardware and software prerequisites are needed for IMX500 deployment? + +For deploying IMX500 models, you'll need: + +Hardware: + +- Raspberry Pi 5 or Raspberry Pi 4 Model B +- Raspberry Pi AI Camera with IMX500 sensor + +Software: + +- Raspberry Pi OS Bookworm +- IMX500 firmware and tools (`sudo apt install imx500-all imx500-tools`) +- Python packages for `picamera2` (`sudo apt install python3-opencv python3-munkres`) + +### What performance can I expect from YOLOv8 models on the IMX500? + +Based on Ultralytics benchmarks on Raspberry Pi AI Camera: + +- YOLOv8n achieves 66.66ms inference time per image +- mAP50-95 of 0.522 on COCO8 dataset +- Model size of only 2.9MB after quantization + +This demonstrates that IMX500 format provides efficient real-time inference while maintaining good accuracy for edge AI applications. + +### How do I package and deploy my exported model to the Raspberry Pi AI Camera? + +After exporting to IMX500 format: + +1. Use the packager tool to create an RPK file: + + ```bash + imx500-package -i -o + ``` + +2. Clone and install picamera2: + + ```bash + git clone https://github.com/raspberrypi/picamera2 + cd picamera2 && pip install -e . --break-system-packages + ``` + +3. Run inference using the generated RPK file: + + ```bash + python imx500_object_detection_demo.py --model --fps 25 --bbox-normalization --labels + ``` diff --git a/docs/en/integrations/tensorboard.md b/docs/en/integrations/tensorboard.md index d563aca12b2..c6cad0d50b5 100644 --- a/docs/en/integrations/tensorboard.md +++ b/docs/en/integrations/tensorboard.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to integrate YOLOv8 with TensorBoard for real-time visual insights into your model's training metrics, performance graphs, and debugging workflows. -keywords: YOLOv8, TensorBoard, model training, visualization, machine learning, deep learning, Ultralytics, training metrics, performance analysis +description: Learn how to integrate YOLO11 with TensorBoard for real-time visual insights into your model's training metrics, performance graphs, and debugging workflows. +keywords: YOLO11, TensorBoard, model training, visualization, machine learning, deep learning, Ultralytics, training metrics, performance analysis --- -# Gain Visual Insights with YOLOv8's Integration with TensorBoard +# Gain Visual Insights with YOLO11's Integration with TensorBoard -Understanding and fine-tuning [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLOv8](https://www.ultralytics.com/) becomes more straightforward when you take a closer look at their training processes. Model training visualization helps with getting insights into the model's learning patterns, performance metrics, and overall behavior. YOLOv8's integration with TensorBoard makes this process of visualization and analysis easier and enables more efficient and informed adjustments to the model. +Understanding and fine-tuning [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models like [Ultralytics' YOLO11](https://www.ultralytics.com/) becomes more straightforward when you take a closer look at their training processes. Model training visualization helps with getting insights into the model's learning patterns, performance metrics, and overall behavior. YOLO11's integration with TensorBoard makes this process of visualization and analysis easier and enables more efficient and informed adjustments to the model. -This guide covers how to use TensorBoard with YOLOv8. You'll learn about various visualizations, from tracking metrics to analyzing model graphs. These tools will help you understand your YOLOv8 model's performance better. +This guide covers how to use TensorBoard with YOLO11. You'll learn about various visualizations, from tracking metrics to analyzing model graphs. These tools will help you understand your YOLO11 model's performance better. ## TensorBoard @@ -18,9 +18,9 @@ This guide covers how to use TensorBoard with YOLOv8. You'll learn about various [TensorBoard](https://www.tensorflow.org/tensorboard), [TensorFlow](https://www.ultralytics.com/glossary/tensorflow)'s visualization toolkit, is essential for [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) experimentation. TensorBoard features a range of visualization tools, crucial for monitoring machine learning models. These tools include tracking key metrics like loss and accuracy, visualizing model graphs, and viewing histograms of weights and biases over time. It also provides capabilities for projecting [embeddings](https://www.ultralytics.com/glossary/embeddings) to lower-dimensional spaces and displaying multimedia data. -## YOLOv8 Training with TensorBoard +## YOLO11 Training with TensorBoard -Using TensorBoard while training YOLOv8 models is straightforward and offers significant benefits. +Using TensorBoard while training YOLO11 models is straightforward and offers significant benefits. ## Installation @@ -31,13 +31,13 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 and Tensorboard + # Install the required package for YOLO11 and Tensorboard pip install ultralytics ``` -TensorBoard is conveniently pre-installed with YOLOv8, eliminating the need for additional setup for visualization purposes. +TensorBoard is conveniently pre-installed with YOLO11, eliminating the need for additional setup for visualization purposes. -For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ## Configuring TensorBoard for Google Colab @@ -54,7 +54,7 @@ When using Google Colab, it's important to set up TensorBoard before starting yo ## Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. !!! example "Usage" @@ -64,7 +64,7 @@ Before diving into the usage instructions, be sure to check out the range of [YO from ultralytics import YOLO # Load a pre-trained model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -76,17 +76,17 @@ Upon running the usage code snippet above, you can expect the following output: TensorBoard: Start with 'tensorboard --logdir path_to_your_tensorboard_logs', view at http://localhost:6006/ ``` -This output indicates that TensorBoard is now actively monitoring your YOLOv8 training session. You can access the TensorBoard dashboard by visiting the provided URL (http://localhost:6006/) to view real-time training metrics and model performance. For users working in Google Colab, the TensorBoard will be displayed in the same cell where you executed the TensorBoard configuration commands. +This output indicates that TensorBoard is now actively monitoring your YOLO11 training session. You can access the TensorBoard dashboard by visiting the provided URL (http://localhost:6006/) to view real-time training metrics and model performance. For users working in Google Colab, the TensorBoard will be displayed in the same cell where you executed the TensorBoard configuration commands. -For more information related to the model training process, be sure to check our [YOLOv8 Model Training guide](../modes/train.md). If you are interested in learning more about logging, checkpoints, plotting, and file management, read our [usage guide on configuration](../usage/cfg.md). +For more information related to the model training process, be sure to check our [YOLO11 Model Training guide](../modes/train.md). If you are interested in learning more about logging, checkpoints, plotting, and file management, read our [usage guide on configuration](../usage/cfg.md). -## Understanding Your TensorBoard for YOLOv8 Training +## Understanding Your TensorBoard for YOLO11 Training -Now, let's focus on understanding the various features and components of TensorBoard in the context of YOLOv8 training. The three key sections of the TensorBoard are Time Series, Scalars, and Graphs. +Now, let's focus on understanding the various features and components of TensorBoard in the context of YOLO11 training. The three key sections of the TensorBoard are Time Series, Scalars, and Graphs. ### Time Series -The Time Series feature in the TensorBoard offers a dynamic and detailed perspective of various training metrics over time for YOLOv8 models. It focuses on the progression and trends of metrics across training epochs. Here's an example of what you can expect to see. +The Time Series feature in the TensorBoard offers a dynamic and detailed perspective of various training metrics over time for YOLO11 models. It focuses on the progression and trends of metrics across training epochs. Here's an example of what you can expect to see. ![image](https://github.com/ultralytics/docs/releases/download/0/time-series-tensorboard-yolov8.avif) @@ -100,13 +100,13 @@ The Time Series feature in the TensorBoard offers a dynamic and detailed perspec - **In-Depth Analysis**: Time Series provides an in-depth analysis of each metric. For instance, different learning rate segments are shown, offering insights into how adjustments in learning rate impact the model's learning curve. -#### Importance of Time Series in YOLOv8 Training +#### Importance of Time Series in YOLO11 Training -The Time Series section is essential for a thorough analysis of the YOLOv8 model's training progress. It lets you track the metrics in real time to promptly identify and solve issues. It also offers a detailed view of each metrics progression, which is crucial for fine-tuning the model and enhancing its performance. +The Time Series section is essential for a thorough analysis of the YOLO11 model's training progress. It lets you track the metrics in real time to promptly identify and solve issues. It also offers a detailed view of each metrics progression, which is crucial for fine-tuning the model and enhancing its performance. ### Scalars -Scalars in the TensorBoard are crucial for plotting and analyzing simple metrics like loss and accuracy during the training of YOLOv8 models. They offer a clear and concise view of how these metrics evolve with each training [epoch](https://www.ultralytics.com/glossary/epoch), providing insights into the model's learning effectiveness and stability. Here's an example of what you can expect to see. +Scalars in the TensorBoard are crucial for plotting and analyzing simple metrics like loss and accuracy during the training of YOLO11 models. They offer a clear and concise view of how these metrics evolve with each training [epoch](https://www.ultralytics.com/glossary/epoch), providing insights into the model's learning effectiveness and stability. Here's an example of what you can expect to see. ![image](https://github.com/ultralytics/docs/releases/download/0/scalars-metrics-tensorboard.avif) @@ -130,7 +130,7 @@ Scalars in the TensorBoard are crucial for plotting and analyzing simple metrics #### Importance of Monitoring Scalars -Observing scalar metrics is crucial for fine-tuning the YOLOv8 model. Variations in these metrics, such as spikes or irregular patterns in loss graphs, can highlight potential issues such as [overfitting](https://www.ultralytics.com/glossary/overfitting), [underfitting](https://www.ultralytics.com/glossary/underfitting), or inappropriate learning rate settings. By closely monitoring these scalars, you can make informed decisions to optimize the training process, ensuring that the model learns effectively and achieves the desired performance. +Observing scalar metrics is crucial for fine-tuning the YOLO11 model. Variations in these metrics, such as spikes or irregular patterns in loss graphs, can highlight potential issues such as [overfitting](https://www.ultralytics.com/glossary/overfitting), [underfitting](https://www.ultralytics.com/glossary/underfitting), or inappropriate learning rate settings. By closely monitoring these scalars, you can make informed decisions to optimize the training process, ensuring that the model learns effectively and achieves the desired performance. ### Difference Between Scalars and Time Series @@ -138,15 +138,15 @@ While both Scalars and Time Series in TensorBoard are used for tracking metrics, ### Graphs -The Graphs section of the TensorBoard visualizes the computational graph of the YOLOv8 model, showing how operations and data flow within the model. It's a powerful tool for understanding the model's structure, ensuring that all layers are connected correctly, and for identifying any potential bottlenecks in data flow. Here's an example of what you can expect to see. +The Graphs section of the TensorBoard visualizes the computational graph of the YOLO11 model, showing how operations and data flow within the model. It's a powerful tool for understanding the model's structure, ensuring that all layers are connected correctly, and for identifying any potential bottlenecks in data flow. Here's an example of what you can expect to see. ![image](https://github.com/ultralytics/docs/releases/download/0/tensorboard-yolov8-computational-graph.avif) -Graphs are particularly useful for debugging the model, especially in complex architectures typical in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models like YOLOv8. They help in verifying layer connections and the overall design of the model. +Graphs are particularly useful for debugging the model, especially in complex architectures typical in [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) models like YOLO11. They help in verifying layer connections and the overall design of the model. ## Summary -This guide aims to help you use TensorBoard with YOLOv8 for visualization and analysis of machine learning model training. It focuses on explaining how key TensorBoard features can provide insights into training metrics and model performance during YOLOv8 training sessions. +This guide aims to help you use TensorBoard with YOLO11 for visualization and analysis of machine learning model training. It focuses on explaining how key TensorBoard features can provide insights into training metrics and model performance during YOLO11 training sessions. For a more detailed exploration of these features and effective utilization strategies, you can refer to TensorFlow's official [TensorBoard documentation](https://www.tensorflow.org/tensorboard/get_started) and their [GitHub repository](https://github.com/tensorflow/tensorboard). @@ -154,29 +154,29 @@ Want to learn more about the various integrations of Ultralytics? Check out the ## FAQ -### What benefits does using TensorBoard with YOLOv8 offer? +### What benefits does using TensorBoard with YOLO11 offer? -Using TensorBoard with YOLOv8 provides several visualization tools essential for efficient model training: +Using TensorBoard with YOLO11 provides several visualization tools essential for efficient model training: - **Real-Time Metrics Tracking:** Track key metrics such as loss, accuracy, precision, and recall live. - **Model Graph Visualization:** Understand and debug the model architecture by visualizing computational graphs. - **Embedding Visualization:** Project embeddings to lower-dimensional spaces for better insight. -These tools enable you to make informed adjustments to enhance your YOLOv8 model's performance. For more details on TensorBoard features, check out the TensorFlow [TensorBoard guide](https://www.tensorflow.org/tensorboard/get_started). +These tools enable you to make informed adjustments to enhance your YOLO11 model's performance. For more details on TensorBoard features, check out the TensorFlow [TensorBoard guide](https://www.tensorflow.org/tensorboard/get_started). -### How can I monitor training metrics using TensorBoard when training a YOLOv8 model? +### How can I monitor training metrics using TensorBoard when training a YOLO11 model? -To monitor training metrics while training a YOLOv8 model with TensorBoard, follow these steps: +To monitor training metrics while training a YOLO11 model with TensorBoard, follow these steps: -1. **Install TensorBoard and YOLOv8:** Run `pip install ultralytics` which includes TensorBoard. -2. **Configure TensorBoard Logging:** During the training process, YOLOv8 logs metrics to a specified log directory. +1. **Install TensorBoard and YOLO11:** Run `pip install ultralytics` which includes TensorBoard. +2. **Configure TensorBoard Logging:** During the training process, YOLO11 logs metrics to a specified log directory. 3. **Start TensorBoard:** Launch TensorBoard using the command `tensorboard --logdir path/to/your/tensorboard/logs`. -The TensorBoard dashboard, accessible via [http://localhost:6006/](http://localhost:6006/), provides real-time insights into various training metrics. For a deeper dive into training configurations, visit our [YOLOv8 Configuration guide](../usage/cfg.md). +The TensorBoard dashboard, accessible via [http://localhost:6006/](http://localhost:6006/), provides real-time insights into various training metrics. For a deeper dive into training configurations, visit our [YOLO11 Configuration guide](../usage/cfg.md). -### What kind of metrics can I visualize with TensorBoard when training YOLOv8 models? +### What kind of metrics can I visualize with TensorBoard when training YOLO11 models? -When training YOLOv8 models, TensorBoard allows you to visualize an array of important metrics including: +When training YOLO11 models, TensorBoard allows you to visualize an array of important metrics including: - **Loss (Training and Validation):** Indicates how well the model is performing during training and validation. - **Accuracy/Precision/[Recall](https://www.ultralytics.com/glossary/recall):** Key performance metrics to evaluate detection accuracy. @@ -185,9 +185,9 @@ When training YOLOv8 models, TensorBoard allows you to visualize an array of imp These visualizations are essential for tracking model performance and making necessary optimizations. For more information on these metrics, refer to our [Performance Metrics guide](../guides/yolo-performance-metrics.md). -### Can I use TensorBoard in a Google Colab environment for training YOLOv8? +### Can I use TensorBoard in a Google Colab environment for training YOLO11? -Yes, you can use TensorBoard in a Google Colab environment to train YOLOv8 models. Here's a quick setup: +Yes, you can use TensorBoard in a Google Colab environment to train YOLO11 models. Here's a quick setup: !!! example "Configure TensorBoard for Google Colab" @@ -198,16 +198,16 @@ Yes, you can use TensorBoard in a Google Colab environment to train YOLOv8 model %tensorboard --logdir path/to/runs ``` - Then, run the YOLOv8 training script: + Then, run the YOLO11 training script: ```python from ultralytics import YOLO # Load a pre-trained model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) ``` -TensorBoard will visualize the training progress within Colab, providing real-time insights into metrics like loss and accuracy. For additional details on configuring YOLOv8 training, see our detailed [YOLOv8 Installation guide](../quickstart.md). +TensorBoard will visualize the training progress within Colab, providing real-time insights into metrics like loss and accuracy. For additional details on configuring YOLO11 training, see our detailed [YOLO11 Installation guide](../quickstart.md). diff --git a/docs/en/integrations/tensorrt.md b/docs/en/integrations/tensorrt.md index 0e401981133..59dbb280b61 100644 --- a/docs/en/integrations/tensorrt.md +++ b/docs/en/integrations/tensorrt.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn to convert YOLOv8 models to TensorRT for high-speed NVIDIA GPU inference. Boost efficiency and deploy optimized models with our step-by-step guide. -keywords: YOLOv8, TensorRT, NVIDIA, GPU, deep learning, model optimization, high-speed inference, model export +description: Learn to convert YOLO11 models to TensorRT for high-speed NVIDIA GPU inference. Boost efficiency and deploy optimized models with our step-by-step guide. +keywords: YOLOv8, YOLO11, TensorRT, NVIDIA, GPU, deep learning, model optimization, high-speed inference, model export --- -# TensorRT Export for YOLOv8 Models +# TensorRT Export for YOLO11 Models Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models in high-performance environments can require a format that maximizes speed and efficiency. This is especially true when you are deploying your model on NVIDIA GPUs. -By using the TensorRT export format, you can enhance your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for swift and efficient inference on NVIDIA hardware. This guide will give you easy-to-follow steps for the conversion process and help you make the most of NVIDIA's advanced technology in your [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) projects. +By using the TensorRT export format, you can enhance your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for swift and efficient inference on NVIDIA hardware. This guide will give you easy-to-follow steps for the conversion process and help you make the most of NVIDIA's advanced technology in your [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) projects. ## TensorRT @@ -36,11 +36,11 @@ TensorRT models offer a range of key features that contribute to their efficienc - **Dynamic Tensor Memory Management**: TensorRT efficiently manages tensor memory usage during inference, reducing memory overhead and optimizing memory allocation. This results in more efficient GPU memory utilization. -- **Automatic Kernel Tuning**: TensorRT applies automatic kernel tuning to select the most optimized GPU kernel for each layer of the model. This adaptive approach ensures that the model takes full advantage of the GPU's computational power. +- **Automatic Kernel Tuning**: TensorRT applies automatic kernel tuning to select the most optimized GPU kernel for each layer of the model. This adaptive approach ensures that the model takes full advantage of the GPUs computational power. ## Deployment Options in TensorRT -Before we look at the code for exporting YOLOv8 models to the TensorRT format, let's understand where TensorRT models are normally used. +Before we look at the code for exporting YOLO11 models to the TensorRT format, let's understand where TensorRT models are normally used. TensorRT offers several deployment options, and each option balances ease of integration, performance optimization, and flexibility differently: @@ -54,9 +54,9 @@ TensorRT offers several deployment options, and each option balances ease of int - **NVIDIA Triton Inference Server**: An option that supports models from various frameworks. Particularly suited for cloud or edge inference, it provides features like concurrent model execution and model analysis. -## Exporting YOLOv8 Models to TensorRT +## Exporting YOLO11 Models to TensorRT -You can improve execution efficiency and optimize performance by converting YOLOv8 models to TensorRT format. +You can improve execution efficiency and optimize performance by converting YOLO11 models to TensorRT format. ### Installation @@ -67,15 +67,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. !!! example "Usage" @@ -84,14 +84,14 @@ Before diving into the usage instructions, be sure to check out the range of [YO ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TensorRT format - model.export(format="engine") # creates 'yolov8n.engine' + model.export(format="engine") # creates 'yolo11n.engine' # Load the exported TensorRT model - tensorrt_model = YOLO("yolov8n.engine") + tensorrt_model = YOLO("yolo11n.engine") # Run inference results = tensorrt_model("https://ultralytics.com/images/bus.jpg") @@ -100,11 +100,11 @@ Before diving into the usage instructions, be sure to check out the range of [YO === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TensorRT format - yolo export model=yolov8n.pt format=engine # creates 'yolov8n.engine'' + # Export a YOLO11n PyTorch model to TensorRT format + yolo export model=yolo11n.pt format=engine # creates 'yolo11n.engine'' # Run inference with the exported model - yolo predict model=yolov8n.engine source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg' ``` For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). @@ -127,11 +127,11 @@ The arguments provided when using [export](../modes/export.md) for an Ultralytic - Adjust the `workspace` value according to your calibration needs and resource availability. While a larger `workspace` may increase calibration time, it allows TensorRT to explore a wider range of optimization tactics, potentially enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy). Conversely, a smaller `workspace` can reduce calibration time but may limit the optimization strategies, affecting the quality of the quantized model. - - Default is `workspace=4` (GiB), this value may need to be increased if calibration crashes (exits without warning). + - Default is `workspace=None`, which will allow for TensorRT to automatically allocate memory, when configuring manually, this value may need to be increased if calibration crashes (exits without warning). - - TensorRT will report `UNSUPPORTED_STATE` during export if the value for `workspace` is larger than the memory available to the device, which means the value for `workspace` should be lowered. + - TensorRT will report `UNSUPPORTED_STATE` during export if the value for `workspace` is larger than the memory available to the device, which means the value for `workspace` should be lowered or set to `None`. - - If `workspace` is set to max value and calibration fails/crashes, consider reducing the values for `imgsz` and `batch` to reduce memory requirements. + - If `workspace` is set to max value and calibration fails/crashes, consider using `None` for auto-allocation or by reducing the values for `imgsz` and `batch` to reduce memory requirements. - Remember calibration for INT8 is specific to each device, borrowing a "high-end" GPU for calibration, might result in poor performance when inference is run on another device. @@ -176,8 +176,8 @@ Experimentation by NVIDIA led them to recommend using at least 500 calibration i === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TensorRT format with INT8 quantization - yolo export model=yolov8n.pt format=engine batch=8 workspace=4 int8=True data=coco.yaml # creates 'yolov8n.engine'' + # Export a YOLO11n PyTorch model to TensorRT format with INT8 quantization + yolo export model=yolo11n.pt format=engine batch=8 workspace=4 int8=True data=coco.yaml # creates 'yolov8n.engine'' # Run inference with the exported TensorRT quantized model yolo predict model=yolov8n.engine source='https://ultralytics.com/images/bus.jpg' @@ -185,7 +185,7 @@ Experimentation by NVIDIA led them to recommend using at least 500 calibration i ???+ warning "Calibration Cache" - TensorRT will generate a calibration `.cache` which can be re-used to speed up export of future model weights using the same data, but this may result in poor calibration when the data is vastly different or if the `batch` value is changed drastically. In these circumstances, the existing `.cache` should be renamed and moved to a different directory or deleted entirely. + TensorRT will generate a calibration `.cache` which can be reused to speed up export of future model weights using the same data, but this may result in poor calibration when the data is vastly different or if the `batch` value is changed drastically. In these circumstances, the existing `.cache` should be renamed and moved to a different directory or deleted entirely. #### Advantages of using YOLO with TensorRT INT8 @@ -380,7 +380,7 @@ Expand sections below for information on how these models were exported and test See [export mode](../modes/export.md) for details regarding export configuration arguments. - ```py + ```python from ultralytics import YOLO model = YOLO("yolov8n.pt") @@ -401,7 +401,7 @@ Expand sections below for information on how these models were exported and test See [predict mode](../modes/predict.md) for additional information. - ```py + ```python import cv2 from ultralytics import YOLO @@ -421,7 +421,7 @@ Expand sections below for information on how these models were exported and test See [`val` mode](../modes/val.md) to learn more about validation configuration arguments. - ```py + ```python from ultralytics import YOLO model = YOLO("yolov8n.engine") @@ -434,9 +434,9 @@ Expand sections below for information on how these models were exported and test ) ``` -## Deploying Exported YOLOv8 TensorRT Models +## Deploying Exported YOLO11 TensorRT Models -Having successfully exported your Ultralytics YOLOv8 models to TensorRT format, you're now ready to deploy them. For in-depth instructions on deploying your TensorRT models in various settings, take a look at the following resources: +Having successfully exported your Ultralytics YOLO11 models to TensorRT format, you're now ready to deploy them. For in-depth instructions on deploying your TensorRT models in various settings, take a look at the following resources: - **[Deploy Ultralytics with a Triton Server](../guides/triton-inference-server.md)**: Our guide on how to use NVIDIA's Triton Inference (formerly TensorRT Inference) Server specifically for use with Ultralytics YOLO models. @@ -448,17 +448,17 @@ Having successfully exported your Ultralytics YOLOv8 models to TensorRT format, ## Summary -In this guide, we focused on converting Ultralytics YOLOv8 models to NVIDIA's TensorRT model format. This conversion step is crucial for improving the efficiency and speed of YOLOv8 models, making them more effective and suitable for diverse deployment environments. +In this guide, we focused on converting Ultralytics YOLO11 models to NVIDIA's TensorRT model format. This conversion step is crucial for improving the efficiency and speed of YOLO11 models, making them more effective and suitable for diverse deployment environments. For more information on usage details, take a look at the [TensorRT official documentation](https://docs.nvidia.com/deeplearning/tensorrt/). -If you're curious about additional Ultralytics YOLOv8 integrations, our [integration guide page](../integrations/index.md) provides an extensive selection of informative resources and insights. +If you're curious about additional Ultralytics YOLO11 integrations, our [integration guide page](../integrations/index.md) provides an extensive selection of informative resources and insights. ## FAQ -### How do I convert YOLOv8 models to TensorRT format? +### How do I convert YOLO11 models to TensorRT format? -To convert your Ultralytics YOLOv8 models to TensorRT format for optimized NVIDIA GPU inference, follow these steps: +To convert your Ultralytics YOLO11 models to TensorRT format for optimized NVIDIA GPU inference, follow these steps: 1. **Install the required package**: @@ -466,24 +466,24 @@ To convert your Ultralytics YOLOv8 models to TensorRT format for optimized NVIDI pip install ultralytics ``` -2. **Export your YOLOv8 model**: +2. **Export your YOLO11 model**: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.export(format="engine") # creates 'yolov8n.engine' # Run inference - model = YOLO("yolov8n.engine") + model = YOLO("yolo11n.engine") results = model("https://ultralytics.com/images/bus.jpg") ``` -For more details, visit the [YOLOv8 Installation guide](../quickstart.md) and the [export documentation](../modes/export.md). +For more details, visit the [YOLO11 Installation guide](../quickstart.md) and the [export documentation](../modes/export.md). -### What are the benefits of using TensorRT for YOLOv8 models? +### What are the benefits of using TensorRT for YOLO11 models? -Using TensorRT to optimize YOLOv8 models offers several benefits: +Using TensorRT to optimize YOLO11 models offers several benefits: - **Faster Inference Speed**: TensorRT optimizes the model layers and uses precision calibration (INT8 and FP16) to speed up inference without significantly sacrificing accuracy. - **Memory Efficiency**: TensorRT manages tensor memory dynamically, reducing overhead and improving GPU memory utilization. @@ -492,9 +492,9 @@ Using TensorRT to optimize YOLOv8 models offers several benefits: For more information, explore the detailed features of TensorRT [here](https://developer.nvidia.com/tensorrt) and read our [TensorRT overview section](#tensorrt). -### Can I use INT8 quantization with TensorRT for YOLOv8 models? +### Can I use INT8 quantization with TensorRT for YOLO11 models? -Yes, you can export YOLOv8 models using TensorRT with INT8 quantization. This process involves post-training quantization (PTQ) and calibration: +Yes, you can export YOLO11 models using TensorRT with INT8 quantization. This process involves post-training quantization (PTQ) and calibration: 1. **Export with INT8**: @@ -516,9 +516,9 @@ Yes, you can export YOLOv8 models using TensorRT with INT8 quantization. This pr For more details, refer to the [exporting TensorRT with INT8 quantization section](#exporting-tensorrt-with-int8-quantization). -### How do I deploy YOLOv8 TensorRT models on an NVIDIA Triton Inference Server? +### How do I deploy YOLO11 TensorRT models on an NVIDIA Triton Inference Server? -Deploying YOLOv8 TensorRT models on an NVIDIA Triton Inference Server can be done using the following resources: +Deploying YOLO11 TensorRT models on an NVIDIA Triton Inference Server can be done using the following resources: - **[Deploy Ultralytics YOLOv8 with Triton Server](../guides/triton-inference-server.md)**: Step-by-step guidance on setting up and using Triton Inference Server. - **[NVIDIA Triton Inference Server Documentation](https://developer.nvidia.com/blog/deploying-deep-learning-nvidia-tensorrt/)**: Official NVIDIA documentation for detailed deployment options and configurations. diff --git a/docs/en/integrations/tf-graphdef.md b/docs/en/integrations/tf-graphdef.md index 15cbd48426d..fd6d86a32a5 100644 --- a/docs/en/integrations/tf-graphdef.md +++ b/docs/en/integrations/tf-graphdef.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to export YOLOv8 models to the TF GraphDef format for seamless deployment on various platforms, including mobile and web. -keywords: YOLOv8, export, TensorFlow, GraphDef, model deployment, TensorFlow Serving, TensorFlow Lite, TensorFlow.js, machine learning, AI, computer vision +description: Learn how to export YOLO11 models to the TF GraphDef format for seamless deployment on various platforms, including mobile and web. +keywords: YOLO11, export, TensorFlow, GraphDef, model deployment, TensorFlow Serving, TensorFlow Lite, TensorFlow.js, machine learning, AI, computer vision --- -# How to Export to TF GraphDef from YOLOv8 for Deployment +# How to Export to TF GraphDef from YOLO11 for Deployment -When you are deploying cutting-edge [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models, like YOLOv8, in different environments, you might run into compatibility issues. Google's [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) GraphDef, or TF GraphDef, offers a solution by providing a serialized, platform-independent representation of your model. Using the TF GraphDef model format, you can deploy your YOLOv8 model in environments where the complete TensorFlow ecosystem may not be available, such as mobile devices or specialized hardware. +When you are deploying cutting-edge [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models, like YOLO11, in different environments, you might run into compatibility issues. Google's [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) GraphDef, or TF GraphDef, offers a solution by providing a serialized, platform-independent representation of your model. Using the TF GraphDef model format, you can deploy your YOLO11 model in environments where the complete TensorFlow ecosystem may not be available, such as mobile devices or specialized hardware. -In this guide, we'll walk you step by step through how to export your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models to the TF GraphDef model format. By converting your model, you can streamline deployment and use YOLOv8's computer vision capabilities in a broader range of applications and platforms. +In this guide, we'll walk you step by step through how to export your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models to the TF GraphDef model format. By converting your model, you can streamline deployment and use YOLO11's computer vision capabilities in a broader range of applications and platforms.

TensorFlow GraphDef @@ -16,11 +16,11 @@ In this guide, we'll walk you step by step through how to export your [Ultralyti ## Why Should You Export to TF GraphDef? -TF GraphDef is a powerful component of the TensorFlow ecosystem that was developed by Google. It can be used to optimize and deploy models like YOLOv8. Exporting to TF GraphDef lets us move models from research to real-world applications. It allows models to run in environments without the full TensorFlow framework. +TF GraphDef is a powerful component of the TensorFlow ecosystem that was developed by Google. It can be used to optimize and deploy models like YOLO11. Exporting to TF GraphDef lets us move models from research to real-world applications. It allows models to run in environments without the full TensorFlow framework. The GraphDef format represents the model as a serialized computation graph. This enables various optimization techniques like constant folding, quantization, and graph transformations. These optimizations ensure efficient execution, reduced memory usage, and faster inference speeds. -GraphDef models can use hardware accelerators such as GPUs, TPUs, and AI chips, unlocking significant performance gains for the YOLOv8 inference pipeline. The TF GraphDef format creates a self-contained package with the model and its dependencies, simplifying deployment and integration into diverse systems. +GraphDef models can use hardware accelerators such as GPUs, TPUs, and AI chips, unlocking significant performance gains for the YOLO11 inference pipeline. The TF GraphDef format creates a self-contained package with the model and its dependencies, simplifying deployment and integration into diverse systems. ## Key Features of TF GraphDef Models @@ -38,7 +38,7 @@ Here's a look at its key characteristics: ## Deployment Options with TF GraphDef -Before we dive into the process of exporting YOLOv8 models to TF GraphDef, let's take a look at some typical deployment situations where this format is used. +Before we dive into the process of exporting YOLO11 models to TF GraphDef, let's take a look at some typical deployment situations where this format is used. Here's how you can deploy with TF GraphDef efficiently across various platforms. @@ -46,13 +46,13 @@ Here's how you can deploy with TF GraphDef efficiently across various platforms. - **Mobile and Embedded Devices:** With tools like TensorFlow Lite, you can convert TF GraphDef models into formats optimized for smartphones, tablets, and various embedded devices. Your models can then be used for on-device inference, where execution is done locally, often providing performance gains and offline capabilities. -- **Web Browsers:** TensorFlow.js enables the deployment of TF GraphDef models directly within web browsers. It paves the way for real-time object detection applications running on the client side, using the capabilities of YOLOv8 through JavaScript. +- **Web Browsers:** TensorFlow.js enables the deployment of TF GraphDef models directly within web browsers. It paves the way for real-time object detection applications running on the client side, using the capabilities of YOLO11 through JavaScript. - **Specialized Hardware:** TF GraphDef's platform-agnostic nature allows it to target custom hardware, such as accelerators and TPUs (Tensor Processing Units). These devices can provide performance advantages for computationally intensive models. -## Exporting YOLOv8 Models to TF GraphDef +## Exporting YOLO11 Models to TF GraphDef -You can convert your YOLOv8 object detection model to the TF GraphDef format, which is compatible with various systems, to improve its performance across platforms. +You can convert your YOLO11 object detection model to the TF GraphDef format, which is compatible with various systems, to improve its performance across platforms. ### Installation @@ -63,15 +63,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -80,14 +80,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TF GraphDef format - model.export(format="pb") # creates 'yolov8n.pb' + model.export(format="pb") # creates 'yolo11n.pb' # Load the exported TF GraphDef model - tf_graphdef_model = YOLO("yolov8n.pb") + tf_graphdef_model = YOLO("yolo11n.pb") # Run inference results = tf_graphdef_model("https://ultralytics.com/images/bus.jpg") @@ -96,18 +96,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TF GraphDef format - yolo export model=yolov8n.pt format=pb # creates 'yolov8n.pb' + # Export a YOLO11n PyTorch model to TF GraphDef format + yolo export model=yolo11n.pt format=pb # creates 'yolo11n.pb' # Run inference with the exported model - yolo predict model='yolov8n.pb' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='yolo11n.pb' source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -## Deploying Exported YOLOv8 TF GraphDef Models +## Deploying Exported YOLO11 TF GraphDef Models -Once you've exported your YOLOv8 model to the TF GraphDef format, the next step is deployment. The primary and recommended first step for running a TF GraphDef model is to use the YOLO("model.pb") method, as previously shown in the usage code snippet. +Once you've exported your YOLO11 model to the TF GraphDef format, the next step is deployment. The primary and recommended first step for running a TF GraphDef model is to use the YOLO("model.pb") method, as previously shown in the usage code snippet. However, for more information on deploying your TF GraphDef models, take a look at the following resources: @@ -119,17 +119,17 @@ However, for more information on deploying your TF GraphDef models, take a look ## Summary -In this guide, we explored how to export Ultralytics YOLOv8 models to the TF GraphDef format. By doing this, you can flexibly deploy your optimized YOLOv8 models in different environments. +In this guide, we explored how to export Ultralytics YOLO11 models to the TF GraphDef format. By doing this, you can flexibly deploy your optimized YOLO11 models in different environments. For further details on usage, visit the [TF GraphDef official documentation](https://www.tensorflow.org/api_docs/python/tf/Graph). -For more information on integrating Ultralytics YOLOv8 with other platforms and frameworks, don't forget to check out our [integration guide page](index.md). It has great resources and insights to help you make the most of YOLOv8 in your projects. +For more information on integrating Ultralytics YOLO11 with other platforms and frameworks, don't forget to check out our [integration guide page](index.md). It has great resources and insights to help you make the most of YOLO11 in your projects. ## FAQ -### How do I export a YOLOv8 model to TF GraphDef format? +### How do I export a YOLO11 model to TF GraphDef format? -Ultralytics YOLOv8 models can be exported to TensorFlow GraphDef (TF GraphDef) format seamlessly. This format provides a serialized, platform-independent representation of the model, ideal for deploying in varied environments like mobile and web. To export a YOLOv8 model to TF GraphDef, follow these steps: +Ultralytics YOLO11 models can be exported to TensorFlow GraphDef (TF GraphDef) format seamlessly. This format provides a serialized, platform-independent representation of the model, ideal for deploying in varied environments like mobile and web. To export a YOLO11 model to TF GraphDef, follow these steps: !!! example "Usage" @@ -138,14 +138,14 @@ Ultralytics YOLOv8 models can be exported to TensorFlow GraphDef (TF GraphDef) f ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TF GraphDef format - model.export(format="pb") # creates 'yolov8n.pb' + model.export(format="pb") # creates 'yolo11n.pb' # Load the exported TF GraphDef model - tf_graphdef_model = YOLO("yolov8n.pb") + tf_graphdef_model = YOLO("yolo11n.pb") # Run inference results = tf_graphdef_model("https://ultralytics.com/images/bus.jpg") @@ -154,18 +154,18 @@ Ultralytics YOLOv8 models can be exported to TensorFlow GraphDef (TF GraphDef) f === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TF GraphDef format - yolo export model="yolov8n.pt" format="pb" # creates 'yolov8n.pb' + # Export a YOLO11n PyTorch model to TF GraphDef format + yolo export model="yolo11n.pt" format="pb" # creates 'yolo11n.pb' # Run inference with the exported model - yolo predict model="yolov8n.pb" source="https://ultralytics.com/images/bus.jpg" + yolo predict model="yolo11n.pb" source="https://ultralytics.com/images/bus.jpg" ``` For more information on different export options, visit the [Ultralytics documentation on model export](../modes/export.md). -### What are the benefits of using TF GraphDef for YOLOv8 model deployment? +### What are the benefits of using TF GraphDef for YOLO11 model deployment? -Exporting YOLOv8 models to the TF GraphDef format offers multiple advantages, including: +Exporting YOLO11 models to the TF GraphDef format offers multiple advantages, including: 1. **Platform Independence**: TF GraphDef provides a platform-independent format, allowing models to be deployed across various environments including mobile and web browsers. 2. **Optimizations**: The format enables several optimizations, such as constant folding, quantization, and graph transformations, which enhance execution efficiency and reduce memory usage. @@ -173,19 +173,19 @@ Exporting YOLOv8 models to the TF GraphDef format offers multiple advantages, in Read more about the benefits in the [TF GraphDef section](#why-should-you-export-to-tf-graphdef) of our documentation. -### Why should I use Ultralytics YOLOv8 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models? +### Why should I use Ultralytics YOLO11 over other [object detection](https://www.ultralytics.com/glossary/object-detection) models? -Ultralytics YOLOv8 offers numerous advantages compared to other models like YOLOv5 and YOLOv7. Some key benefits include: +Ultralytics YOLO11 offers numerous advantages compared to other models like YOLOv5 and YOLOv7. Some key benefits include: -1. **State-of-the-Art Performance**: YOLOv8 provides exceptional speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for real-time object detection, segmentation, and classification. +1. **State-of-the-Art Performance**: YOLO11 provides exceptional speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) for real-time object detection, segmentation, and classification. 2. **Ease of Use**: Features a user-friendly API for model training, validation, prediction, and export, making it accessible for both beginners and experts. 3. **Broad Compatibility**: Supports multiple export formats including ONNX, TensorRT, CoreML, and TensorFlow, for versatile deployment options. -Explore further details in our [introduction to YOLOv8](https://docs.ultralytics.com/models/yolov8/). +Explore further details in our [introduction to YOLO11](https://docs.ultralytics.com/models/yolov8/). -### How can I deploy a YOLOv8 model on specialized hardware using TF GraphDef? +### How can I deploy a YOLO11 model on specialized hardware using TF GraphDef? -Once a YOLOv8 model is exported to TF GraphDef format, you can deploy it across various specialized hardware platforms. Typical deployment scenarios include: +Once a YOLO11 model is exported to TF GraphDef format, you can deploy it across various specialized hardware platforms. Typical deployment scenarios include: - **TensorFlow Serving**: Use TensorFlow Serving for scalable model deployment in production environments. It supports model management and efficient serving. - **Mobile Devices**: Convert TF GraphDef models to TensorFlow Lite, optimized for mobile and embedded devices, enabling on-device inference. @@ -194,11 +194,11 @@ Once a YOLOv8 model is exported to TF GraphDef format, you can deploy it across Check the [deployment options](#deployment-options-with-tf-graphdef) section for detailed information. -### Where can I find solutions for common issues while exporting YOLOv8 models? +### Where can I find solutions for common issues while exporting YOLO11 models? -For troubleshooting common issues with exporting YOLOv8 models, Ultralytics provides comprehensive guides and resources. If you encounter problems during installation or model export, refer to: +For troubleshooting common issues with exporting YOLO11 models, Ultralytics provides comprehensive guides and resources. If you encounter problems during installation or model export, refer to: - **[Common Issues Guide](../guides/yolo-common-issues.md)**: Offers solutions to frequently faced problems. - **[Installation Guide](../quickstart.md)**: Step-by-step instructions for setting up the required packages. -These resources should help you resolve most issues related to YOLOv8 model export and deployment. +These resources should help you resolve most issues related to YOLO11 model export and deployment. diff --git a/docs/en/integrations/tf-savedmodel.md b/docs/en/integrations/tf-savedmodel.md index 9f04dc7893c..682743b1675 100644 --- a/docs/en/integrations/tf-savedmodel.md +++ b/docs/en/integrations/tf-savedmodel.md @@ -1,14 +1,14 @@ --- comments: true -description: Learn how to export Ultralytics YOLOv8 models to TensorFlow SavedModel format for easy deployment across various platforms and environments. -keywords: YOLOv8, TF SavedModel, Ultralytics, TensorFlow, model export, model deployment, machine learning, AI +description: Learn how to export Ultralytics YOLO11 models to TensorFlow SavedModel format for easy deployment across various platforms and environments. +keywords: YOLO11, TF SavedModel, Ultralytics, TensorFlow, model export, model deployment, machine learning, AI --- -# Understand How to Export to TF SavedModel Format From YOLOv8 +# Understand How to Export to TF SavedModel Format From YOLO11 Deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models can be challenging. However, using an efficient and flexible model format can make your job easier. TF SavedModel is an open-source machine-learning framework used by TensorFlow to load machine-learning models in a consistent way. It is like a suitcase for TensorFlow models, making them easy to carry and use on different devices and systems. -Learning how to export to TF SavedModel from [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models can help you deploy models easily across different platforms and environments. In this guide, we'll walk through how to convert your models to the TF SavedModel format, simplifying the process of running inferences with your models on different devices. +Learning how to export to TF SavedModel from [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models can help you deploy models easily across different platforms and environments. In this guide, we'll walk through how to convert your models to the TF SavedModel format, simplifying the process of running inferences with your models on different devices. ## Why Should You Export to TF SavedModel? @@ -32,7 +32,7 @@ Here are the key features that make TF SavedModel a great option for AI develope ## Deployment Options with TF SavedModel -Before we dive into the process of exporting YOLOv8 models to the TF SavedModel format, let's explore some typical deployment scenarios where this format is used. +Before we dive into the process of exporting YOLO11 models to the TF SavedModel format, let's explore some typical deployment scenarios where this format is used. TF SavedModel provides a range of options to deploy your machine learning models: @@ -44,9 +44,9 @@ TF SavedModel provides a range of options to deploy your machine learning models - **TensorFlow Runtime:** TensorFlow Runtime (`tfrt`) is a high-performance runtime for executing [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) graphs. It provides lower-level APIs for loading and running TF SavedModels in C++ environments. TensorFlow Runtime offers better performance compared to the standard TensorFlow runtime. It is suitable for deployment scenarios that require low-latency inference and tight integration with existing C++ codebases. -## Exporting YOLOv8 Models to TF SavedModel +## Exporting YOLO11 Models to TF SavedModel -By exporting YOLOv8 models to the TF SavedModel format, you enhance their adaptability and ease of deployment across various platforms. +By exporting YOLO11 models to the TF SavedModel format, you enhance their adaptability and ease of deployment across various platforms. ### Installation @@ -57,15 +57,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -74,14 +74,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TF SavedModel format - model.export(format="saved_model") # creates '/yolov8n_saved_model' + model.export(format="saved_model") # creates '/yolo11n_saved_model' # Load the exported TF SavedModel model - tf_savedmodel_model = YOLO("./yolov8n_saved_model") + tf_savedmodel_model = YOLO("./yolo11n_saved_model") # Run inference results = tf_savedmodel_model("https://ultralytics.com/images/bus.jpg") @@ -90,18 +90,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TF SavedModel format - yolo export model=yolov8n.pt format=saved_model # creates '/yolov8n_saved_model' + # Export a YOLO11n PyTorch model to TF SavedModel format + yolo export model=yolo11n.pt format=saved_model # creates '/yolo11n_saved_model' # Run inference with the exported model - yolo predict model='./yolov8n_saved_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_saved_model' source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -## Deploying Exported YOLOv8 TF SavedModel Models +## Deploying Exported YOLO11 TF SavedModel Models -Now that you have exported your YOLOv8 model to the TF SavedModel format, the next step is to deploy it. The primary and recommended first step for running a TF GraphDef model is to use the YOLO("./yolov8n_saved_model") method, as previously shown in the usage code snippet. +Now that you have exported your YOLO11 model to the TF SavedModel format, the next step is to deploy it. The primary and recommended first step for running a TF GraphDef model is to use the YOLO("yolo11n_saved_model/") method, as previously shown in the usage code snippet. However, for in-depth instructions on deploying your TF SavedModel models, take a look at the following resources: @@ -113,11 +113,11 @@ However, for in-depth instructions on deploying your TF SavedModel models, take ## Summary -In this guide, we explored how to export Ultralytics YOLOv8 models to the TF SavedModel format. By exporting to TF SavedModel, you gain the flexibility to optimize, deploy, and scale your YOLOv8 models on a wide range of platforms. +In this guide, we explored how to export Ultralytics YOLO11 models to the TF SavedModel format. By exporting to TF SavedModel, you gain the flexibility to optimize, deploy, and scale your YOLO11 models on a wide range of platforms. For further details on usage, visit the [TF SavedModel official documentation](https://www.tensorflow.org/guide/saved_model). -For more information on integrating Ultralytics YOLOv8 with other platforms and frameworks, don't forget to check out our [integration guide page](index.md). It's packed with great resources to help you make the most of YOLOv8 in your projects. +For more information on integrating Ultralytics YOLO11 with other platforms and frameworks, don't forget to check out our [integration guide page](index.md). It's packed with great resources to help you make the most of YOLO11 in your projects. ## FAQ @@ -125,32 +125,32 @@ For more information on integrating Ultralytics YOLOv8 with other platforms and Exporting an Ultralytics YOLO model to the TensorFlow SavedModel format is straightforward. You can use either Python or CLI to achieve this: -!!! example "Exporting YOLOv8 to TF SavedModel" +!!! example "Exporting YOLO11 to TF SavedModel" === "Python" ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TF SavedModel format - model.export(format="saved_model") # creates '/yolov8n_saved_model' + model.export(format="saved_model") # creates '/yolo11n_saved_model' # Load the exported TF SavedModel for inference - tf_savedmodel_model = YOLO("./yolov8n_saved_model") + tf_savedmodel_model = YOLO("./yolo11n_saved_model") results = tf_savedmodel_model("https://ultralytics.com/images/bus.jpg") ``` === "CLI" ```bash - # Export the YOLOv8 model to TF SavedModel format - yolo export model=yolov8n.pt format=saved_model # creates '/yolov8n_saved_model' + # Export the YOLO11 model to TF SavedModel format + yolo export model=yolo11n.pt format=saved_model # creates '/yolo11n_saved_model' # Run inference with the exported model - yolo predict model='./yolov8n_saved_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_saved_model' source='https://ultralytics.com/images/bus.jpg' ``` Refer to the [Ultralytics Export documentation](../modes/export.md) for more details. @@ -176,9 +176,9 @@ TF SavedModel can be deployed in various environments, including: For detailed deployment options, visit the official guides on [deploying TensorFlow models](https://www.tensorflow.org/tfx/guide/serving). -### How can I install the necessary packages to export YOLOv8 models? +### How can I install the necessary packages to export YOLO11 models? -To export YOLOv8 models, you need to install the `ultralytics` package. Run the following command in your terminal: +To export YOLO11 models, you need to install the `ultralytics` package. Run the following command in your terminal: ```bash pip install ultralytics diff --git a/docs/en/integrations/tfjs.md b/docs/en/integrations/tfjs.md index ea2d613c62f..a8168215b63 100644 --- a/docs/en/integrations/tfjs.md +++ b/docs/en/integrations/tfjs.md @@ -1,14 +1,14 @@ --- comments: true -description: Convert your Ultralytics YOLOv8 models to TensorFlow.js for high-speed, local object detection. Learn how to optimize ML models for browser and Node.js apps. -keywords: YOLOv8, TensorFlow.js, TF.js, model export, machine learning, object detection, browser ML, Node.js, Ultralytics, YOLO, export models +description: Convert your Ultralytics YOLO11 models to TensorFlow.js for high-speed, local object detection. Learn how to optimize ML models for browser and Node.js apps. +keywords: YOLO11, TensorFlow.js, TF.js, model export, machine learning, object detection, browser ML, Node.js, Ultralytics, YOLO, export models --- -# Export to TF.js Model Format From a YOLOv8 Model Format +# Export to TF.js Model Format From a YOLO11 Model Format Deploying [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models directly in the browser or on Node.js can be tricky. You'll need to make sure your model format is optimized for faster performance so that the model can be used to run interactive applications locally on the user's device. The TensorFlow.js, or TF.js, model format is designed to use minimal power while delivering fast performance. -The 'export to TF.js model format' feature allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and locally-run [object detection](https://www.ultralytics.com/glossary/object-detection) inference. In this guide, we'll walk you through converting your models to the TF.js format, making it easier for your models to perform well on various local browsers and Node.js applications. +The 'export to TF.js model format' feature allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for high-speed and locally-run [object detection](https://www.ultralytics.com/glossary/object-detection) inference. In this guide, we'll walk you through converting your models to the TF.js format, making it easier for your models to perform well on various local browsers and Node.js applications. ## Why Should You Export to TF.js? @@ -32,7 +32,7 @@ Here are the key features that make TF.js a powerful tool for developers: ## Deployment Options with TensorFlow.js -Before we dive into the process of exporting YOLOv8 models to the TF.js format, let's explore some typical deployment scenarios where this format is used. +Before we dive into the process of exporting YOLO11 models to the TF.js format, let's explore some typical deployment scenarios where this format is used. TF.js provides a range of options to deploy your machine learning models: @@ -42,9 +42,9 @@ TF.js provides a range of options to deploy your machine learning models: - **Chrome Extensions:** An interesting deployment scenario is the creation of Chrome extensions with TensorFlow.js. For instance, you can develop an extension that allows users to right-click on an image within any webpage to classify it using a pre-trained ML model. TensorFlow.js can be integrated into everyday web browsing experiences to provide immediate insights or augmentations based on machine learning. -## Exporting YOLOv8 Models to TensorFlow.js +## Exporting YOLO11 Models to TensorFlow.js -You can expand model compatibility and deployment flexibility by converting YOLOv8 models to TF.js. +You can expand model compatibility and deployment flexibility by converting YOLO11 models to TF.js. ### Installation @@ -55,15 +55,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -72,14 +72,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TF.js format - model.export(format="tfjs") # creates '/yolov8n_web_model' + model.export(format="tfjs") # creates '/yolo11n_web_model' # Load the exported TF.js model - tfjs_model = YOLO("./yolov8n_web_model") + tfjs_model = YOLO("./yolo11n_web_model") # Run inference results = tfjs_model("https://ultralytics.com/images/bus.jpg") @@ -88,18 +88,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TF.js format - yolo export model=yolov8n.pt format=tfjs # creates '/yolov8n_web_model' + # Export a YOLO11n PyTorch model to TF.js format + yolo export model=yolo11n.pt format=tfjs # creates '/yolo11n_web_model' # Run inference with the exported model - yolo predict model='./yolov8n_web_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_web_model' source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -## Deploying Exported YOLOv8 TensorFlow.js Models +## Deploying Exported YOLO11 TensorFlow.js Models -Now that you have exported your YOLOv8 model to the TF.js format, the next step is to deploy it. The primary and recommended first step for running a TF.js is to use the YOLO("./yolov8n_web_model") method, as previously shown in the usage code snippet. +Now that you have exported your YOLO11 model to the TF.js format, the next step is to deploy it. The primary and recommended first step for running a TF.js is to use the `YOLO("./yolo11n_web_model")` method, as previously shown in the usage code snippet. However, for in-depth instructions on deploying your TF.js models, take a look at the following resources: @@ -111,17 +111,17 @@ However, for in-depth instructions on deploying your TF.js models, take a look a ## Summary -In this guide, we learned how to export Ultralytics YOLOv8 models to the TensorFlow.js format. By exporting to TF.js, you gain the flexibility to optimize, deploy, and scale your YOLOv8 models on a wide range of platforms. +In this guide, we learned how to export Ultralytics YOLO11 models to the TensorFlow.js format. By exporting to TF.js, you gain the flexibility to optimize, deploy, and scale your YOLO11 models on a wide range of platforms. For further details on usage, visit the [TensorFlow.js official documentation](https://www.tensorflow.org/js/guide). -For more information on integrating Ultralytics YOLOv8 with other platforms and frameworks, don't forget to check out our [integration guide page](index.md). It's packed with great resources to help you make the most of YOLOv8 in your projects. +For more information on integrating Ultralytics YOLO11 with other platforms and frameworks, don't forget to check out our [integration guide page](index.md). It's packed with great resources to help you make the most of YOLO11 in your projects. ## FAQ -### How do I export Ultralytics YOLOv8 models to TensorFlow.js format? +### How do I export Ultralytics YOLO11 models to TensorFlow.js format? -Exporting Ultralytics YOLOv8 models to TensorFlow.js (TF.js) format is straightforward. You can follow these steps: +Exporting Ultralytics YOLO11 models to TensorFlow.js (TF.js) format is straightforward. You can follow these steps: !!! example "Usage" @@ -130,14 +130,14 @@ Exporting Ultralytics YOLOv8 models to TensorFlow.js (TF.js) format is straightf ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TF.js format - model.export(format="tfjs") # creates '/yolov8n_web_model' + model.export(format="tfjs") # creates '/yolo11n_web_model' # Load the exported TF.js model - tfjs_model = YOLO("./yolov8n_web_model") + tfjs_model = YOLO("./yolo11n_web_model") # Run inference results = tfjs_model("https://ultralytics.com/images/bus.jpg") @@ -146,18 +146,18 @@ Exporting Ultralytics YOLOv8 models to TensorFlow.js (TF.js) format is straightf === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TF.js format - yolo export model=yolov8n.pt format=tfjs # creates '/yolov8n_web_model' + # Export a YOLO11n PyTorch model to TF.js format + yolo export model=yolo11n.pt format=tfjs # creates '/yolo11n_web_model' # Run inference with the exported model - yolo predict model='./yolov8n_web_model' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='./yolo11n_web_model' source='https://ultralytics.com/images/bus.jpg' ``` For more details about supported export options, visit the [Ultralytics documentation page on deployment options](../guides/model-deployment-options.md). -### Why should I export my YOLOv8 models to TensorFlow.js? +### Why should I export my YOLO11 models to TensorFlow.js? -Exporting YOLOv8 models to TensorFlow.js offers several advantages, including: +Exporting YOLO11 models to TensorFlow.js offers several advantages, including: 1. **Local Execution:** Models can run directly in the browser or Node.js, reducing latency and enhancing user experience. 2. **Cross-Platform Support:** TF.js supports multiple environments, allowing flexibility in deployment. @@ -177,7 +177,7 @@ TensorFlow.js is specifically designed for efficient execution of ML models in b Interested in learning more about TF.js? Check out the [official TensorFlow.js guide](https://www.tensorflow.org/js/guide). -### What are the key features of TensorFlow.js for deploying YOLOv8 models? +### What are the key features of TensorFlow.js for deploying YOLO11 models? Key features of TensorFlow.js include: @@ -185,10 +185,10 @@ Key features of TensorFlow.js include: - **Multiple Backends:** Supports CPU, WebGL for GPU acceleration, WebAssembly (WASM), and WebGPU for advanced operations. - **Offline Capabilities:** Models can run directly in the browser without internet connectivity, making it ideal for developing responsive web applications. -For deployment scenarios and more in-depth information, see our section on [Deployment Options with TensorFlow.js](#deploying-exported-yolov8-tensorflowjs-models). +For deployment scenarios and more in-depth information, see our section on [Deployment Options with TensorFlow.js](#deploying-exported-yolo11-tensorflowjs-models). -### Can I deploy a YOLOv8 model on server-side Node.js applications using TensorFlow.js? +### Can I deploy a YOLO11 model on server-side Node.js applications using TensorFlow.js? -Yes, TensorFlow.js allows the deployment of YOLOv8 models on Node.js environments. This enables server-side machine learning applications that benefit from the processing power of a server and access to server-side data. Typical use cases include real-time data processing and machine learning pipelines on backend servers. +Yes, TensorFlow.js allows the deployment of YOLO11 models on Node.js environments. This enables server-side machine learning applications that benefit from the processing power of a server and access to server-side data. Typical use cases include real-time data processing and machine learning pipelines on backend servers. To get started with Node.js deployment, refer to the [Run TensorFlow.js in Node.js](https://www.tensorflow.org/js/guide/nodejs) guide from TensorFlow. diff --git a/docs/en/integrations/tflite.md b/docs/en/integrations/tflite.md index 028675eabbc..9f0ebad1566 100644 --- a/docs/en/integrations/tflite.md +++ b/docs/en/integrations/tflite.md @@ -1,10 +1,10 @@ --- comments: true -description: Learn how to convert YOLOv8 models to TFLite for edge device deployment. Optimize performance and ensure seamless execution on various platforms. -keywords: YOLOv8, TFLite, model export, TensorFlow Lite, edge devices, deployment, Ultralytics, machine learning, on-device inference, model optimization +description: Learn how to convert YOLO11 models to TFLite for edge device deployment. Optimize performance and ensure seamless execution on various platforms. +keywords: YOLO11, TFLite, model export, TensorFlow Lite, edge devices, deployment, Ultralytics, machine learning, on-device inference, model optimization --- -# A Guide on YOLOv8 Model Export to TFLite for Deployment +# A Guide on YOLO11 Model Export to TFLite for Deployment

TFLite Logo @@ -12,7 +12,7 @@ keywords: YOLOv8, TFLite, model export, TensorFlow Lite, edge devices, deploymen Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models on edge devices or embedded devices requires a format that can ensure seamless performance. -The TensorFlow Lite or TFLite export format allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and [image classification](https://www.ultralytics.com/glossary/image-classification) in edge device-based applications. In this guide, we'll walk through the steps for converting your models to the TFLite format, making it easier for your models to perform well on various edge devices. +The TensorFlow Lite or TFLite export format allows you to optimize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for tasks like [object detection](https://www.ultralytics.com/glossary/object-detection) and [image classification](https://www.ultralytics.com/glossary/image-classification) in edge device-based applications. In this guide, we'll walk through the steps for converting your models to the TFLite format, making it easier for your models to perform well on various edge devices. ## Why should you export to TFLite? @@ -34,7 +34,7 @@ TFLite models offer a wide range of key features that enable on-device machine l ## Deployment Options in TFLite -Before we look at the code for exporting YOLOv8 models to the TFLite format, let's understand how TFLite models are normally used. +Before we look at the code for exporting YOLO11 models to the TFLite format, let's understand how TFLite models are normally used. TFLite offers various on-device deployment options for machine learning models, including: @@ -48,7 +48,7 @@ TFLite offers various on-device deployment options for machine learning models, - **Deploying with Microcontrollers**: TFLite models can also be deployed on microcontrollers and other devices with only a few kilobytes of memory. The core runtime just fits in 16 KB on an Arm Cortex M3 and can run many basic models. It doesn't require operating system support, any standard C or C++ libraries, or dynamic memory allocation. -## Export to TFLite: Converting Your YOLOv8 Model +## Export to TFLite: Converting Your YOLO11 Model You can improve on-device model execution efficiency and optimize performance by converting them to TFLite format. @@ -61,15 +61,15 @@ To install the required packages, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -78,14 +78,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TFLite format - model.export(format="tflite") # creates 'yolov8n_float32.tflite' + model.export(format="tflite") # creates 'yolo11n_float32.tflite' # Load the exported TFLite model - tflite_model = YOLO("yolov8n_float32.tflite") + tflite_model = YOLO("yolo11n_float32.tflite") # Run inference results = tflite_model("https://ultralytics.com/images/bus.jpg") @@ -94,18 +94,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TFLite format - yolo export model=yolov8n.pt format=tflite # creates 'yolov8n_float32.tflite' + # Export a YOLO11n PyTorch model to TFLite format + yolo export model=yolo11n.pt format=tflite # creates 'yolo11n_float32.tflite' # Run inference with the exported model - yolo predict model='yolov8n_float32.tflite' source='https://ultralytics.com/images/bus.jpg' + yolo predict model='yolo11n_float32.tflite' source='https://ultralytics.com/images/bus.jpg' ``` For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). -## Deploying Exported YOLOv8 TFLite Models +## Deploying Exported YOLO11 TFLite Models -After successfully exporting your Ultralytics YOLOv8 models to TFLite format, you can now deploy them. The primary and recommended first step for running a TFLite model is to utilize the YOLO("model.tflite") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your TFLite models in various other settings, take a look at the following resources: +After successfully exporting your Ultralytics YOLO11 models to TFLite format, you can now deploy them. The primary and recommended first step for running a TFLite model is to utilize the YOLO("model.tflite") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your TFLite models in various other settings, take a look at the following resources: - **[Android](https://ai.google.dev/edge/litert/android)**: A quick start guide for integrating [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) Lite into Android applications, providing easy-to-follow steps for setting up and running [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models. @@ -115,17 +115,17 @@ After successfully exporting your Ultralytics YOLOv8 models to TFLite format, yo ## Summary -In this guide, we focused on how to export to TFLite format. By converting your Ultralytics YOLOv8 models to TFLite model format, you can improve the efficiency and speed of YOLOv8 models, making them more effective and suitable for [edge computing](https://www.ultralytics.com/glossary/edge-computing) environments. +In this guide, we focused on how to export to TFLite format. By converting your Ultralytics YOLO11 models to TFLite model format, you can improve the efficiency and speed of YOLO11 models, making them more effective and suitable for [edge computing](https://www.ultralytics.com/glossary/edge-computing) environments. For further details on usage, visit the [TFLite official documentation](https://ai.google.dev/edge/litert). -Also, if you're curious about other Ultralytics YOLOv8 integrations, make sure to check out our [integration guide page](../integrations/index.md). You'll find tons of helpful info and insights waiting for you there. +Also, if you're curious about other Ultralytics YOLO11 integrations, make sure to check out our [integration guide page](../integrations/index.md). You'll find tons of helpful info and insights waiting for you there. ## FAQ -### How do I export a YOLOv8 model to TFLite format? +### How do I export a YOLO11 model to TFLite format? -To export a YOLOv8 model to TFLite format, you can use the Ultralytics library. First, install the required package using: +To export a YOLO11 model to TFLite format, you can use the Ultralytics library. First, install the required package using: ```bash pip install ultralytics @@ -136,24 +136,24 @@ Then, use the following code snippet to export your model: ```python from ultralytics import YOLO -# Load the YOLOv8 model -model = YOLO("yolov8n.pt") +# Load the YOLO11 model +model = YOLO("yolo11n.pt") # Export the model to TFLite format -model.export(format="tflite") # creates 'yolov8n_float32.tflite' +model.export(format="tflite") # creates 'yolo11n_float32.tflite' ``` For CLI users, you can achieve this with: ```bash -yolo export model=yolov8n.pt format=tflite # creates 'yolov8n_float32.tflite' +yolo export model=yolo11n.pt format=tflite # creates 'yolo11n_float32.tflite' ``` For more details, visit the [Ultralytics export guide](../modes/export.md). -### What are the benefits of using TensorFlow Lite for YOLOv8 [model deployment](https://www.ultralytics.com/glossary/model-deployment)? +### What are the benefits of using TensorFlow Lite for YOLO11 [model deployment](https://www.ultralytics.com/glossary/model-deployment)? -TensorFlow Lite (TFLite) is an open-source [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) framework designed for on-device inference, making it ideal for deploying YOLOv8 models on mobile, embedded, and IoT devices. Key benefits include: +TensorFlow Lite (TFLite) is an open-source [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) framework designed for on-device inference, making it ideal for deploying YOLO11 models on mobile, embedded, and IoT devices. Key benefits include: - **On-device optimization**: Minimize latency and enhance privacy by processing data locally. - **Platform compatibility**: Supports Android, iOS, embedded Linux, and MCU. @@ -161,33 +161,33 @@ TensorFlow Lite (TFLite) is an open-source [deep learning](https://www.ultralyti To learn more, check out the [TFLite guide](https://ai.google.dev/edge/litert). -### Is it possible to run YOLOv8 TFLite models on Raspberry Pi? +### Is it possible to run YOLO11 TFLite models on Raspberry Pi? -Yes, you can run YOLOv8 TFLite models on Raspberry Pi to improve inference speeds. First, export your model to TFLite format as explained [here](#how-do-i-export-a-yolov8-model-to-tflite-format). Then, use a tool like TensorFlow Lite Interpreter to execute the model on your Raspberry Pi. +Yes, you can run YOLO11 TFLite models on Raspberry Pi to improve inference speeds. First, export your model to TFLite format as explained [here](#how-do-i-export-a-yolo11-model-to-tflite-format). Then, use a tool like TensorFlow Lite Interpreter to execute the model on your Raspberry Pi. For further optimizations, you might consider using [Coral Edge TPU](https://coral.withgoogle.com/). For detailed steps, refer to our [Raspberry Pi deployment guide](../guides/raspberry-pi.md). -### Can I use TFLite models on microcontrollers for YOLOv8 predictions? +### Can I use TFLite models on microcontrollers for YOLO11 predictions? -Yes, TFLite supports deployment on microcontrollers with limited resources. TFLite's core runtime requires only 16 KB of memory on an Arm Cortex M3 and can run basic YOLOv8 models. This makes it suitable for deployment on devices with minimal computational power and memory. +Yes, TFLite supports deployment on microcontrollers with limited resources. TFLite's core runtime requires only 16 KB of memory on an Arm Cortex M3 and can run basic YOLO11 models. This makes it suitable for deployment on devices with minimal computational power and memory. To get started, visit the [TFLite Micro for Microcontrollers guide](https://ai.google.dev/edge/litert/microcontrollers/overview). -### What platforms are compatible with TFLite exported YOLOv8 models? +### What platforms are compatible with TFLite exported YOLO11 models? -TensorFlow Lite provides extensive platform compatibility, allowing you to deploy YOLOv8 models on a wide range of devices, including: +TensorFlow Lite provides extensive platform compatibility, allowing you to deploy YOLO11 models on a wide range of devices, including: - **Android and iOS**: Native support through TFLite Android and iOS libraries. - **Embedded Linux**: Ideal for single-board computers such as Raspberry Pi. - **Microcontrollers**: Suitable for MCUs with constrained resources. -For more information on deployment options, see our detailed [deployment guide](#deploying-exported-yolov8-tflite-models). +For more information on deployment options, see our detailed [deployment guide](#deploying-exported-yolo11-tflite-models). -### How do I troubleshoot common issues during YOLOv8 model export to TFLite? +### How do I troubleshoot common issues during YOLO11 model export to TFLite? -If you encounter errors while exporting YOLOv8 models to TFLite, common solutions include: +If you encounter errors while exporting YOLO11 models to TFLite, common solutions include: - **Check package compatibility**: Ensure you're using compatible versions of Ultralytics and TensorFlow. Refer to our [installation guide](../quickstart.md). -- **Model support**: Verify that the specific YOLOv8 model supports TFLite export by checking [here](../modes/export.md). +- **Model support**: Verify that the specific YOLO11 model supports TFLite export by checking [here](../modes/export.md). For additional troubleshooting tips, visit our [Common Issues guide](../guides/yolo-common-issues.md). diff --git a/docs/en/integrations/torchscript.md b/docs/en/integrations/torchscript.md index 839caff921a..1be1516c0b6 100644 --- a/docs/en/integrations/torchscript.md +++ b/docs/en/integrations/torchscript.md @@ -1,22 +1,22 @@ --- comments: true -description: Learn how to export Ultralytics YOLOv8 models to TorchScript for flexible, cross-platform deployment. Boost performance and utilize in various environments. -keywords: YOLOv8, TorchScript, model export, Ultralytics, PyTorch, deep learning, AI deployment, cross-platform, performance optimization +description: Learn how to export Ultralytics YOLO11 models to TorchScript for flexible, cross-platform deployment. Boost performance and utilize in various environments. +keywords: YOLO11, TorchScript, model export, Ultralytics, PyTorch, deep learning, AI deployment, cross-platform, performance optimization --- -# YOLOv8 Model Export to TorchScript for Quick Deployment +# YOLO11 Model Export to TorchScript for Quick Deployment Deploying [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) models across different environments, including embedded systems, web browsers, or platforms with limited Python support, requires a flexible and portable solution. TorchScript focuses on portability and the ability to run models in environments where the entire Python framework is unavailable. This makes it ideal for scenarios where you need to deploy your computer vision capabilities across various devices or platforms. -Export to Torchscript to serialize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for cross-platform compatibility and streamlined deployment. In this guide, we'll show you how to export your YOLOv8 models to the TorchScript format, making it easier for you to use them across a wider range of applications. +Export to Torchscript to serialize your [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) models for cross-platform compatibility and streamlined deployment. In this guide, we'll show you how to export your YOLO11 models to the TorchScript format, making it easier for you to use them across a wider range of applications. ## Why should you export to TorchScript? ![Torchscript Overview](https://github.com/ultralytics/docs/releases/download/0/torchscript-overview.avif) -Developed by the creators of PyTorch, TorchScript is a powerful tool for optimizing and deploying PyTorch models across a variety of platforms. Exporting YOLOv8 models to [TorchScript](https://pytorch.org/docs/stable/jit.html) is crucial for moving from research to real-world applications. TorchScript, part of the PyTorch framework, helps make this transition smoother by allowing PyTorch models to be used in environments that don't support Python. +Developed by the creators of PyTorch, TorchScript is a powerful tool for optimizing and deploying PyTorch models across a variety of platforms. Exporting YOLO11 models to [TorchScript](https://pytorch.org/docs/stable/jit.html) is crucial for moving from research to real-world applications. TorchScript, part of the PyTorch framework, helps make this transition smoother by allowing PyTorch models to be used in environments that don't support Python. -The process involves two techniques: tracing and scripting. Tracing records operations during model execution, while scripting allows for the definition of models using a subset of Python. These techniques ensure that models like YOLOv8 can still work their magic even outside their usual Python environment. +The process involves two techniques: tracing and scripting. Tracing records operations during model execution, while scripting allows for the definition of models using a subset of Python. These techniques ensure that models like YOLO11 can still work their magic even outside their usual Python environment. ![TorchScript Script and Trace](https://github.com/ultralytics/docs/releases/download/0/torchscript-script-and-trace.avif) @@ -42,7 +42,7 @@ Here are the key features that make TorchScript a valuable tool for developers: ## Deployment Options in TorchScript -Before we look at the code for exporting YOLOv8 models to the TorchScript format, let's understand where TorchScript models are normally used. +Before we look at the code for exporting YOLO11 models to the TorchScript format, let's understand where TorchScript models are normally used. TorchScript offers various deployment options for [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) models, such as: @@ -52,9 +52,9 @@ TorchScript offers various deployment options for [machine learning](https://www - **Cloud Deployment**: TorchScript models can be deployed to cloud-based servers using solutions like TorchServe. It provides features like model versioning, batching, and metrics monitoring for scalable deployment in production environments. Cloud deployment with TorchScript can make your models accessible via APIs or other web services. -## Export to TorchScript: Converting Your YOLOv8 Model +## Export to TorchScript: Converting Your YOLO11 Model -Exporting YOLOv8 models to TorchScript makes it easier to use them in different places and helps them run faster and more efficiently. This is great for anyone looking to use deep learning models more effectively in real-world applications. +Exporting YOLO11 models to TorchScript makes it easier to use them in different places and helps them run faster and more efficiently. This is great for anyone looking to use deep learning models more effectively in real-world applications. ### Installation @@ -65,15 +65,15 @@ To install the required package, run: === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` -For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, check our [Ultralytics Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ### Usage -Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLOv8 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). +Before diving into the usage instructions, it's important to note that while all [Ultralytics YOLO11 models](../models/index.md) are available for exporting, you can ensure that the model you select supports export functionality [here](../modes/export.md). !!! example "Usage" @@ -82,14 +82,14 @@ Before diving into the usage instructions, it's important to note that while all ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TorchScript format - model.export(format="torchscript") # creates 'yolov8n.torchscript' + model.export(format="torchscript") # creates 'yolo11n.torchscript' # Load the exported TorchScript model - torchscript_model = YOLO("yolov8n.torchscript") + torchscript_model = YOLO("yolo11n.torchscript") # Run inference results = torchscript_model("https://ultralytics.com/images/bus.jpg") @@ -98,18 +98,18 @@ Before diving into the usage instructions, it's important to note that while all === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TorchScript format - yolo export model=yolov8n.pt format=torchscript # creates 'yolov8n.torchscript' + # Export a YOLO11n PyTorch model to TorchScript format + yolo export model=yolo11n.pt format=torchscript # creates 'yolo11n.torchscript' # Run inference with the exported model - yolo predict model=yolov8n.torchscript source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.torchscript source='https://ultralytics.com/images/bus.jpg' ``` For more details about the export process, visit the [Ultralytics documentation page on exporting](../modes/export.md). -## Deploying Exported YOLOv8 TorchScript Models +## Deploying Exported YOLO11 TorchScript Models -After successfully exporting your Ultralytics YOLOv8 models to TorchScript format, you can now deploy them. The primary and recommended first step for running a TorchScript model is to utilize the YOLO("model.torchscript") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your TorchScript models in various other settings, take a look at the following resources: +After successfully exporting your Ultralytics YOLO11 models to TorchScript format, you can now deploy them. The primary and recommended first step for running a TorchScript model is to utilize the YOLO("model.torchscript") method, as outlined in the previous usage code snippet. However, for in-depth instructions on deploying your TorchScript models in various other settings, take a look at the following resources: - **[Explore Mobile Deployment](https://pytorch.org/mobile/home/)**: The [PyTorch](https://www.ultralytics.com/glossary/pytorch) Mobile Documentation provides comprehensive guidelines for deploying models on mobile devices, ensuring your applications are efficient and responsive. @@ -119,21 +119,21 @@ After successfully exporting your Ultralytics YOLOv8 models to TorchScript forma ## Summary -In this guide, we explored the process of exporting Ultralytics YOLOv8 models to the TorchScript format. By following the provided instructions, you can optimize YOLOv8 models for performance and gain the flexibility to deploy them across various platforms and environments. +In this guide, we explored the process of exporting Ultralytics YOLO11 models to the TorchScript format. By following the provided instructions, you can optimize YOLO11 models for performance and gain the flexibility to deploy them across various platforms and environments. For further details on usage, visit [TorchScript's official documentation](https://pytorch.org/docs/stable/jit.html). -Also, if you'd like to know more about other Ultralytics YOLOv8 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there. +Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find plenty of useful resources and insights there. ## FAQ -### What is Ultralytics YOLOv8 model export to TorchScript? +### What is Ultralytics YOLO11 model export to TorchScript? -Exporting an Ultralytics YOLOv8 model to TorchScript allows for flexible, cross-platform deployment. TorchScript, a part of the PyTorch ecosystem, facilitates the serialization of models, which can then be executed in environments that lack Python support. This makes it ideal for deploying models on embedded systems, C++ environments, mobile applications, and even web browsers. Exporting to TorchScript enables efficient performance and wider applicability of your YOLOv8 models across diverse platforms. +Exporting an Ultralytics YOLO11 model to TorchScript allows for flexible, cross-platform deployment. TorchScript, a part of the PyTorch ecosystem, facilitates the serialization of models, which can then be executed in environments that lack Python support. This makes it ideal for deploying models on embedded systems, C++ environments, mobile applications, and even web browsers. Exporting to TorchScript enables efficient performance and wider applicability of your YOLO11 models across diverse platforms. -### How can I export my YOLOv8 model to TorchScript using Ultralytics? +### How can I export my YOLO11 model to TorchScript using Ultralytics? -To export a YOLOv8 model to TorchScript, you can use the following example code: +To export a YOLO11 model to TorchScript, you can use the following example code: !!! example "Usage" @@ -142,14 +142,14 @@ To export a YOLOv8 model to TorchScript, you can use the following example code: ```python from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Export the model to TorchScript format - model.export(format="torchscript") # creates 'yolov8n.torchscript' + model.export(format="torchscript") # creates 'yolo11n.torchscript' # Load the exported TorchScript model - torchscript_model = YOLO("yolov8n.torchscript") + torchscript_model = YOLO("yolo11n.torchscript") # Run inference results = torchscript_model("https://ultralytics.com/images/bus.jpg") @@ -158,18 +158,18 @@ To export a YOLOv8 model to TorchScript, you can use the following example code: === "CLI" ```bash - # Export a YOLOv8n PyTorch model to TorchScript format - yolo export model=yolov8n.pt format=torchscript # creates 'yolov8n.torchscript' + # Export a YOLO11n PyTorch model to TorchScript format + yolo export model=yolo11n.pt format=torchscript # creates 'yolo11n.torchscript' # Run inference with the exported model - yolo predict model=yolov8n.torchscript source='https://ultralytics.com/images/bus.jpg' + yolo predict model=yolo11n.torchscript source='https://ultralytics.com/images/bus.jpg' ``` For more details about the export process, refer to the [Ultralytics documentation on exporting](../modes/export.md). -### Why should I use TorchScript for deploying YOLOv8 models? +### Why should I use TorchScript for deploying YOLO11 models? -Using TorchScript for deploying YOLOv8 models offers several advantages: +Using TorchScript for deploying YOLO11 models offers several advantages: - **Portability**: Exported models can run in environments without the need for Python, such as C++ applications, embedded systems, or mobile devices. - **Optimization**: TorchScript supports static graph execution and Just-In-Time (JIT) compilation, which can optimize model performance. @@ -178,24 +178,24 @@ Using TorchScript for deploying YOLOv8 models offers several advantages: For more insights into deployment, visit the [PyTorch Mobile Documentation](https://pytorch.org/mobile/home/), [TorchServe Documentation](https://pytorch.org/serve/getting_started.html), and [C++ Deployment Guide](https://pytorch.org/tutorials/advanced/cpp_export.html). -### What are the installation steps for exporting YOLOv8 models to TorchScript? +### What are the installation steps for exporting YOLO11 models to TorchScript? -To install the required package for exporting YOLOv8 models, use the following command: +To install the required package for exporting YOLO11 models, use the following command: !!! tip "Installation" === "CLI" ```bash - # Install the required package for YOLOv8 + # Install the required package for YOLO11 pip install ultralytics ``` For detailed instructions, visit the [Ultralytics Installation guide](../quickstart.md). If any issues arise during installation, consult the [Common Issues guide](../guides/yolo-common-issues.md). -### How do I deploy my exported TorchScript YOLOv8 models? +### How do I deploy my exported TorchScript YOLO11 models? -After exporting YOLOv8 models to the TorchScript format, you can deploy them across a variety of platforms: +After exporting YOLO11 models to the TorchScript format, you can deploy them across a variety of platforms: - **C++ API**: Ideal for low-overhead, highly efficient production environments. - **Mobile Deployment**: Use [PyTorch Mobile](https://pytorch.org/mobile/home/) for iOS and Android applications. diff --git a/docs/en/integrations/vscode.md b/docs/en/integrations/vscode.md index b6785d15297..c1ce44e7015 100644 --- a/docs/en/integrations/vscode.md +++ b/docs/en/integrations/vscode.md @@ -125,7 +125,7 @@ These are the current snippet categories available to the Ultralytics-snippets e ### Learning with Examples -The `ultra.examples` snippets are to useful for anyone looking to learn how to get started with the basics of working with Ultralytics YOLO. Example snippets are intended to run once inserted (some have dropdown options as well). An example of this is shown at the animation at the [top] of this page, where after the snippet is inserted, all code is selected and run interactively using Shift โ‡‘+Enter โ†ต. +The `ultra.examples` snippets are very useful for anyone looking to learn how to get started with the basics of working with Ultralytics YOLO. Example snippets are intended to run once inserted (some have dropdown options as well). An example of this is shown at the animation at the [top] of this page, where after the snippet is inserted, all code is selected and run interactively using Shift โ‡‘+Enter โ†ต. !!! example @@ -134,7 +134,7 @@ The `ultra.examples` snippets are to useful for anyone looking to learn how to g ```python from ultralytics import ASSETS, YOLO - model = YOLO("yolov8n.pt", task="detect") + model = YOLO("yolo11n.pt", task="detect") results = model(source=ASSETS / "bus.jpg") for result in results: @@ -168,7 +168,7 @@ However, since Ultralytics supports numerous [tasks], when [working with inferen ### Keywords Arguments -There are over ๐Ÿ’ฏ keyword arguments for all of the various Ultralytics [tasks] and [modes]! That's a lot to remember and it can be easy to forget if the argument is `save_frame` or `save_frames` (it's definitely `save_frames` by the way). This is where the `ultra.kwargs` snippets can help out! +There are over ๐Ÿ’ฏ keyword arguments for all the various Ultralytics [tasks] and [modes]! That's a lot to remember, and it can be easy to forget if the argument is `save_frame` or `save_frames` (it's definitely `save_frames` by the way). This is where the `ultra.kwargs` snippets can help out! !!! example @@ -181,7 +181,7 @@ There are over ๐Ÿ’ฏ keyword arguments for all of the various Ultralytics [tasks] conf=0.25, # (float) minimum confidence threshold iou=0.7, # (float) intersection over union (IoU) threshold for NMS vid_stride=1, # (int) video frame-rate stride - stream_buffer=False, # (bool) buffer all streaming frames (True) or return the most recent frame (False) + stream_buffer=False, # (bool) buffer incoming frames in a queue (True) or only keep the most recent frame (False) visualize=False, # (bool) visualize model features augment=False, # (bool) apply image augmentation to prediction sources agnostic_nms=False, # (bool) class-agnostic NMS @@ -229,7 +229,7 @@ If you use VS Code and have started to see a message prompting you to install th 1. Install Ultralytics-snippets and the message will no longer be shown ๐Ÿ˜†! -2. You can using `yolo settings vscode_msg False` to disable the message from showing without having to install the extension. You can learn more about the [Ultralytics Settings] on the [quickstart] page if you're unfamiliar. +2. You can be using `yolo settings vscode_msg False` to disable the message from showing without having to install the extension. You can learn more about the [Ultralytics Settings] on the [quickstart] page if you're unfamiliar. ### I have an idea for a new Ultralytics code snippet, how can I get one added? @@ -259,7 +259,7 @@ Like any other VS Code extension, you can uninstall it by navigating to the Exte [working with inference results]: ../modes/predict.md#working-with-results [inference arguments]: ../modes/predict.md#inference-arguments [Simple Utilities page]: ../usage/simple-utilities.md -[Ultralytics Settings]: ../quickstart.md/#ultralytics-settings +[Ultralytics Settings]: ../quickstart.md#ultralytics-settings [quickstart]: ../quickstart.md [Discord]: https://ultralytics.com/discord [Discourse]: https://community.ultralytics.com diff --git a/docs/en/integrations/weights-biases.md b/docs/en/integrations/weights-biases.md index b831f0b72a7..45c74dc86ba 100644 --- a/docs/en/integrations/weights-biases.md +++ b/docs/en/integrations/weights-biases.md @@ -1,12 +1,12 @@ --- comments: true -description: Learn how to enhance YOLOv8 experiment tracking and visualization with Weights & Biases for better model performance and management. -keywords: YOLOv8, Weights & Biases, model training, experiment tracking, Ultralytics, machine learning, computer vision, model visualization +description: Learn how to enhance YOLO11 experiment tracking and visualization with Weights & Biases for better model performance and management. +keywords: YOLO11, Weights & Biases, model training, experiment tracking, Ultralytics, machine learning, computer vision, model visualization --- -# Enhancing YOLOv8 Experiment Tracking and Visualization with Weights & Biases +# Enhancing YOLO11 Experiment Tracking and Visualization with Weights & Biases -[Object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) have become integral to many [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. However, training, evaluating, and deploying these complex models introduces several challenges. Tracking key training metrics, comparing model variants, analyzing model behavior, and detecting issues require substantial instrumentation and experiment management. +[Object detection](https://www.ultralytics.com/glossary/object-detection) models like [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) have become integral to many [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) applications. However, training, evaluating, and deploying these complex models introduce several challenges. Tracking key training metrics, comparing model variants, analyzing model behavior, and detecting issues require significant instrumentation and experiment management.


@@ -16,10 +16,10 @@ keywords: YOLOv8, Weights & Biases, model training, experiment tracking, Ultraly allowfullscreen>
- Watch: How to use Ultralytics YOLOv8 with Weights and Biases + Watch: How to use Ultralytics YOLO11 with Weights and Biases

-This guide showcases Ultralytics YOLOv8 integration with Weights & Biases' for enhanced experiment tracking, model-checkpointing, and visualization of model performance. It also includes instructions for setting up the integration, training, fine-tuning, and visualizing results using Weights & Biases' interactive features. +This guide showcases Ultralytics YOLO11 integration with Weights & Biases for enhanced experiment tracking, model-checkpointing, and visualization of model performance. It also includes instructions for setting up the integration, training, fine-tuning, and visualizing results using Weights & Biases' interactive features. ## Weights & Biases @@ -29,9 +29,9 @@ This guide showcases Ultralytics YOLOv8 integration with Weights & Biases' for e [Weights & Biases](https://wandb.ai/site) is a cutting-edge MLOps platform designed for tracking, visualizing, and managing [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) experiments. It features automatic logging of training metrics for full experiment reproducibility, an interactive UI for streamlined data analysis, and efficient model management tools for deploying across various environments. -## YOLOv8 Training with Weights & Biases +## YOLO11 Training with Weights & Biases -You can use Weights & Biases to bring efficiency and automation to your YOLOv8 training process. +You can use Weights & Biases to bring efficiency and automation to your YOLO11 training process. ## Installation @@ -42,11 +42,14 @@ To install the required packages, run: === "CLI" ```bash - # Install the required packages for YOLOv8 and Weights & Biases - pip install --upgrade ultralytics==8.0.186 wandb + # Install the required packages for Ultralytics YOLO and Weights & Biases + pip install -U ultralytics wandb + + # Enable W&B logging for Ultralytics + yolo settings wandb=True ``` -For detailed instructions and best practices related to the installation process, be sure to check our [YOLOv8 Installation guide](../quickstart.md). While installing the required packages for YOLOv8, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. +For detailed instructions and best practices related to the installation process, be sure to check our [YOLO11 Installation guide](../quickstart.md). While installing the required packages for YOLO11, if you encounter any difficulties, consult our [Common Issues guide](../guides/yolo-common-issues.md) for solutions and tips. ## Configuring Weights & Biases @@ -56,69 +59,69 @@ Start by initializing the Weights & Biases environment in your workspace. You ca !!! tip "Initial SDK Setup" + === "Python" + + ```python + import wandb + + # Initialize your Weights & Biases environment + wandb.login(key="") + ``` + === "CLI" ```bash # Initialize your Weights & Biases environment - import wandb - wandb.login() + wandb login ``` Navigate to the Weights & Biases authorization page to create and retrieve your API key. Use this key to authenticate your environment with W&B. -## Usage: Training YOLOv8 with Weights & Biases +## Usage: Training YOLO11 with Weights & Biases -Before diving into the usage instructions for YOLOv8 model training with Weights & Biases, be sure to check out the range of [YOLOv8 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. +Before diving into the usage instructions for YOLO11 model training with Weights & Biases, be sure to check out the range of [YOLO11 models offered by Ultralytics](../models/index.md). This will help you choose the most appropriate model for your project requirements. -!!! example "Usage: Training YOLOv8 with Weights & Biases" +!!! example "Usage: Training YOLO11 with Weights & Biases" === "Python" ```python - import wandb - from wandb.integration.ultralytics import add_wandb_callback - from ultralytics import YOLO - # Initialize a Weights & Biases run - wandb.init(project="ultralytics", job_type="training") - # Load a YOLO model - model = YOLO("yolov8n.pt") - - # Add W&B Callback for Ultralytics - add_wandb_callback(model, enable_model_checkpointing=True) + model = YOLO("yolo11n.pt") # Train and Fine-Tune the Model - model.train(project="ultralytics", data="coco8.yaml", epochs=5, imgsz=640) - - # Validate the Model - model.val() - - # Perform Inference and Log Results - model(["path/to/image1", "path/to/image2"]) - - # Finalize the W&B Run - wandb.finish() + model.train(data="coco8.yaml", epochs=5, project="ultralytics", name="yolo11n") ``` -### Understanding the Code + === "CLI" -Let's understand the steps showcased in the usage code snippet above. + ```bash + # Train a YOLO11 model with Weights & Biases + yolo train data=coco8.yaml epochs=5 project=ultralytics name=yolo11n + ``` -- **Step 1: Initialize a Weights & Biases Run**: Start by initializing a Weights & Biases run, specifying the project name and the job type. This run will track and manage the training and validation processes of your model. +### W&B Arguments -- **Step 2: Define the YOLOv8 Model and Dataset**: Specify the model variant and the dataset you wish to use. The YOLO model is then initialized with the specified model file. +| Argument | Default | Description | +| -------- | ------- | ------------------------------------------------------------------------------------------------------------------ | +| project | `None` | Specifies the name of the project logged locally and in W&B. This way you can group multiple runs together. | +| name | `None` | The name of the training run. This determines the name used to create subfolders and the name used for W&B logging | -- **Step 3: Add Weights & Biases Callback for Ultralytics**: This step is crucial as it enables the automatic logging of training metrics and validation results to Weights & Biases, providing a detailed view of the model's performance. +!!! tip "Enable or Disable Weights & Biases" -- **Step 4: Train and Fine-Tune the Model**: Begin training the model with the specified dataset, number of epochs, and image size. The training process includes logging of metrics and predictions at the end of each [epoch](https://www.ultralytics.com/glossary/epoch), offering a comprehensive view of the model's learning progress. + If you want to enable or disable Weights & Biases logging in Ultralytics, you can use the `yolo settings` command. By default, Weights & Biases logging is disabled. -- **Step 5: Validate the Model**: After training, the model is validated. This step is crucial for assessing the model's performance on unseen data and ensuring its generalizability. + === "CLI" -- **Step 6: Perform Inference and Log Results**: The model performs predictions on specified images. These predictions, along with visual overlays and insights, are automatically logged in a W&B Table for interactive exploration. + ```bash + # Enable Weights & Biases logging + yolo settings wandb=True -- **Step 7: Finalize the W&B Run**: This step marks the end of data logging and saves the final state of your model's training and validation process in the W&B dashboard. + # Disable Weights & Biases logging + yolo settings wandb=False + ``` ### Understanding the Output @@ -126,19 +129,19 @@ Upon running the usage code snippet above, you can expect the following key outp - The setup of a new run with its unique ID, indicating the start of the training process. - A concise summary of the model's structure, including the number of layers and parameters. -- Regular updates on important metrics such as box loss, cls loss, dfl loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP scores during each training epoch. +- Regular updates on important metrics such as box loss, cls loss, dfl loss, [precision](https://www.ultralytics.com/glossary/precision), [recall](https://www.ultralytics.com/glossary/recall), and mAP scores during each training [epoch](https://www.ultralytics.com/glossary/epoch). - At the end of training, detailed metrics including the model's inference speed, and overall [accuracy](https://www.ultralytics.com/glossary/accuracy) metrics are displayed. - Links to the Weights & Biases dashboard for in-depth analysis and visualization of the training process, along with information on local log file locations. ### Viewing the Weights & Biases Dashboard -After running the usage code snippet, you can access the Weights & Biases (W&B) dashboard through the provided link in the output. This dashboard offers a comprehensive view of your model's training process with YOLOv8. +After running the usage code snippet, you can access the Weights & Biases (W&B) dashboard through the provided link in the output. This dashboard offers a comprehensive view of your model's training process with YOLO11. ## Key Features of the Weights & Biases Dashboard - **Real-Time Metrics Tracking**: Observe metrics like loss, accuracy, and validation scores as they evolve during the training, offering immediate insights for model tuning. [See how experiments are tracked using Weights & Biases](https://imgur.com/D6NVnmN). -- **Hyperparameter Optimization**: Weights & Biases aids in fine-tuning critical parameters such as [learning rate](https://www.ultralytics.com/glossary/learning-rate), batch size, and more, enhancing the performance of YOLOv8. +- **Hyperparameter Optimization**: Weights & Biases aids in fine-tuning critical parameters such as [learning rate](https://www.ultralytics.com/glossary/learning-rate), [batch size](https://www.ultralytics.com/glossary/batch-size), and more, enhancing the performance of YOLO11. - **Comparative Analysis**: The platform allows side-by-side comparisons of different training runs, essential for assessing the impact of various model configurations. @@ -150,11 +153,11 @@ After running the usage code snippet, you can access the Weights & Biases (W&B) - **Viewing Inference Results with Image Overlay**: Visualize the prediction results on images using interactive overlays in Weights & Biases, providing a clear and detailed view of model performance on real-world data. For more detailed information on Weights & Biases' image overlay capabilities, check out this [link](https://docs.wandb.ai/guides/track/log/media/#image-overlays). [See how Weights & Biases' image overlays helps visualize model inferences](https://imgur.com/a/UTSiufs). -By using these features, you can effectively track, analyze, and optimize your YOLOv8 model's training, ensuring the best possible performance and efficiency. +By using these features, you can effectively track, analyze, and optimize your YOLO11 model's training, ensuring the best possible performance and efficiency. ## Summary -This guide helped you explore Ultralytics' YOLOv8 integration with Weights & Biases. It illustrates the ability of this integration to efficiently track and visualize model training and prediction results. +This guide helped you explore the Ultralytics YOLO integration with Weights & Biases. It illustrates the ability of this integration to efficiently track and visualize model training and prediction results. For further details on usage, visit [Weights & Biases' official documentation](https://docs.wandb.ai/guides/integrations/ultralytics/). @@ -162,83 +165,84 @@ Also, be sure to check out the [Ultralytics integration guide page](../integrati ## FAQ -### How do I install the required packages for YOLOv8 and Weights & Biases? +### How do I integrate Weights & Biases with Ultralytics YOLO11? -To install the required packages for YOLOv8 and Weights & Biases, open your command line interface and run: +To integrate Weights & Biases with Ultralytics YOLO11: -```bash -pip install --upgrade ultralytics==8.0.186 wandb -``` - -For further guidance on installation steps, refer to our [YOLOv8 Installation guide](../quickstart.md). If you encounter issues, consult the [Common Issues guide](../guides/yolo-common-issues.md) for troubleshooting tips. +1. Install the required packages: -### What are the benefits of integrating Ultralytics YOLOv8 with Weights & Biases? + ```bash + pip install -U ultralytics wandb + yolo settings wandb=True + ``` -Integrating Ultralytics YOLOv8 with Weights & Biases offers several benefits including: +2. Log in to your Weights & Biases account: -- **Real-Time Metrics Tracking:** Observe metric changes during training for immediate insights. -- **Hyperparameter Optimization:** Improve model performance by fine-tuning learning rate, [batch size](https://www.ultralytics.com/glossary/batch-size), etc. -- **Comparative Analysis:** Side-by-side comparison of different training runs. -- **Resource Monitoring:** Keep track of CPU, GPU, and memory usage. -- **Model Artifacts Management:** Easy access and sharing of model checkpoints. + ```python + import wandb -Explore these features in detail in the Weights & Biases Dashboard section above. + wandb.login(key="") + ``` -### How can I configure Weights & Biases for YOLOv8 training? +3. Train your YOLO11 model with W&B logging enabled: -To configure Weights & Biases for YOLOv8 training, follow these steps: + ```python + from ultralytics import YOLO -1. Run the command to initialize Weights & Biases: - ```bash - import wandb - wandb.login() + model = YOLO("yolo11n.pt") + model.train(data="coco8.yaml", epochs=5, project="ultralytics", name="yolo11n") ``` -2. Retrieve your API key from the Weights & Biases website. -3. Use the API key to authenticate your development environment. -Detailed setup instructions can be found in the Configuring Weights & Biases section above. +This will automatically log metrics, hyperparameters, and model artifacts to your W&B project. -### How do I train a YOLOv8 model using Weights & Biases? +### What are the key features of Weights & Biases integration with YOLO11? -For training a YOLOv8 model using Weights & Biases, use the following steps in a Python script: +The key features include: -```python -import wandb -from wandb.integration.ultralytics import add_wandb_callback +- Real-time metrics tracking during training +- Hyperparameter optimization tools +- Comparative analysis of different training runs +- Visualization of training progress through graphs +- Resource monitoring (CPU, GPU, memory usage) +- Model artifacts management and sharing +- Viewing inference results with image overlays -from ultralytics import YOLO +These features help in tracking experiments, optimizing models, and collaborating more effectively on YOLO11 projects. -# Initialize a Weights & Biases run -wandb.init(project="ultralytics", job_type="training") +### How can I view the Weights & Biases dashboard for my YOLO11 training? -# Load a YOLO model -model = YOLO("yolov8n.pt") +After running your training script with W&B integration: -# Add W&B Callback for Ultralytics -add_wandb_callback(model, enable_model_checkpointing=True) +1. A link to your W&B dashboard will be provided in the console output. +2. Click on the link or go to [wandb.ai](https://wandb.ai/) and log in to your account. +3. Navigate to your project to view detailed metrics, visualizations, and model performance data. -# Train and Fine-Tune the Model -model.train(project="ultralytics", data="coco8.yaml", epochs=5, imgsz=640) +The dashboard offers insights into your model's training process, allowing you to analyze and improve your YOLO11 models effectively. -# Validate the Model -model.val() +### Can I disable Weights & Biases logging for YOLO11 training? -# Perform Inference and Log Results -model(["path/to/image1", "path/to/image2"]) +Yes, you can disable W&B logging using the following command: -# Finalize the W&B Run -wandb.finish() +```bash +yolo settings wandb=True +``` + +To re-enable logging, use: + +```bash +yolo settings wandb=False ``` -This script initializes Weights & Biases, sets up the model, trains it, and logs results. For more details, visit the Usage section above. +This allows you to control when you want to use W&B logging without modifying your training scripts. -### Why should I use Ultralytics YOLOv8 with Weights & Biases over other platforms? +### How does Weights & Biases help in optimizing YOLO11 models? -Ultralytics YOLOv8 integrated with Weights & Biases offers several unique advantages: +Weights & Biases helps optimize YOLO11 models by: -- **High Efficiency:** Real-time tracking of training metrics and performance optimization. -- **Scalability:** Easily manage large-scale training jobs with robust resource monitoring and utilization tools. -- **Interactivity:** A user-friendly interactive UI for [data visualization](https://www.ultralytics.com/glossary/data-visualization) and model management. -- **Community and Support:** Strong integration documentation and community support with flexible customization and enhancement options. +1. Providing detailed visualizations of training metrics +2. Enabling easy comparison between different model versions +3. Offering tools for [hyperparameter tuning](https://www.ultralytics.com/glossary/hyperparameter-tuning) +4. Allowing for collaborative analysis of model performance +5. Facilitating easy sharing of model artifacts and results -For comparisons with other platforms like Comet and ClearML, refer to [Ultralytics integrations](../integrations/index.md). +These features help researchers and developers iterate faster and make data-driven decisions to improve their YOLO11 models. diff --git a/docs/en/macros/augmentation-args.md b/docs/en/macros/augmentation-args.md index b4d6c9df6d4..bee27ddd0da 100644 --- a/docs/en/macros/augmentation-args.md +++ b/docs/en/macros/augmentation-args.md @@ -13,7 +13,7 @@ | `bgr` | `float` | `0.0` | `0.0 - 1.0` | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering. | | `mosaic` | `float` | `1.0` | `0.0 - 1.0` | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding. | | `mixup` | `float` | `0.0` | `0.0 - 1.0` | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability. | -| `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion. | +| `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies and pastes objects across images, useful for increasing object instances and learning object occlusion. Requires segmentation labels. | | `copy_paste_mode` | `str` | `flip` | - | Copy-Paste augmentation method selection among the options of (`"flip"`, `"mixup"`). | | `auto_augment` | `str` | `randaugment` | - | Automatically applies a predefined augmentation policy (`randaugment`, `autoaugment`, `augmix`), optimizing for classification tasks by diversifying the visual features. | | `erasing` | `float` | `0.4` | `0.0 - 0.9` | Randomly erases a portion of the image during classification training, encouraging the model to focus on less obvious features for recognition. | diff --git a/docs/en/macros/export-args.md b/docs/en/macros/export-args.md index ee88a56918d..4664efcd8e6 100644 --- a/docs/en/macros/export-args.md +++ b/docs/en/macros/export-args.md @@ -1,16 +1,18 @@ -| Argument | Type | Default | Description | -| ----------- | ---------------- | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `format` | `str` | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments. | -| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. | -| `keras` | `bool` | `False` | Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs. | -| `optimize` | `bool` | `False` | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance. | -| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. | -| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices. | -| `dynamic` | `bool` | `False` | Allows dynamic input sizes for ONNX and TensorRT exports, enhancing flexibility in handling varying image dimensions. | -| `simplify` | `bool` | `True` | Simplifies the model graph for ONNX exports with `onnxslim`, potentially improving performance and compatibility. | -| `opset` | `int` | `None` | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. | -| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance. | -| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. | +| Argument | Type | Default | Description | +| ----------- | ----------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `format` | `str` | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments. | +| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. | +| `keras` | `bool` | `False` | Enables export to Keras format for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) SavedModel, providing compatibility with TensorFlow serving and APIs. | +| `optimize` | `bool` | `False` | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance. | +| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. | +| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. | +| `dynamic` | `bool` | `False` | Allows dynamic input sizes for ONNX, TensorRT and OpenVINO exports, enhancing flexibility in handling varying image dimensions. | +| `simplify` | `bool` | `True` | Simplifies the model graph for ONNX exports with `onnxslim`, potentially improving performance and compatibility. | +| `opset` | `int` | `None` | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. | +| `workspace` | `float` or `None` | `None` | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance; use `None` for auto-allocation by TensorRT up to device maximum. | +| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. | +| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | +| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`) or DLA for NVIDIA Jetson (`device=dla:0` or `device=dla:1`). | +| `data` | `str` | `coco8.yaml` | Path to the dataset configuration file (default: `coco8.yaml`), essential for quantization. | | `separate_outputs` | `bool` | `False` | Separate outputs for better quantization performance. | | `export_hw_optimized` | `bool` | `False` | Optimize c2f block for faster inference on some hardware. | -| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. | diff --git a/docs/en/macros/export-table.md b/docs/en/macros/export-table.md index 924a8727c9b..509abd56d8a 100644 --- a/docs/en/macros/export-table.md +++ b/docs/en/macros/export-table.md @@ -1,15 +1,18 @@ -| Format | `format` Argument | Model | Metadata | Arguments | -| ------------------------------------------------- | ----------------- | ----------------------------------------------- | -------- | -------------------------------------------------------------------- | -| [PyTorch](https://pytorch.org/) | - | `{{ model_name or "yolov8n" }}.pt` | โœ… | - | -| [TorchScript](../integrations/torchscript.md) | `torchscript` | `{{ model_name or "yolov8n" }}.torchscript` | โœ… | `imgsz`, `optimize`, `batch` | -| [ONNX](../integrations/onnx.md) | `onnx` | `{{ model_name or "yolov8n" }}.onnx` | โœ… | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` | -| [OpenVINO](../integrations/openvino.md) | `openvino` | `{{ model_name or "yolov8n" }}_openvino_model/` | โœ… | `imgsz`, `half`, `int8`, `batch` | -| [TensorRT](../integrations/tensorrt.md) | `engine` | `{{ model_name or "yolov8n" }}.engine` | โœ… | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` | -| [CoreML](../integrations/coreml.md) | `coreml` | `{{ model_name or "yolov8n" }}.mlpackage` | โœ… | `imgsz`, `half`, `int8`, `nms`, `batch` | -| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `{{ model_name or "yolov8n" }}_saved_model/` | โœ… | `imgsz`, `keras`, `int8`, `batch` | -| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `{{ model_name or "yolov8n" }}.pb` | โŒ | `imgsz`, `batch` | -| [TF Lite](../integrations/tflite.md) | `tflite` | `{{ model_name or "yolov8n" }}.tflite` | โœ… | `imgsz`, `half`, `int8`, `batch` | -| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `{{ model_name or "yolov8n" }}_edgetpu.tflite` | โœ… | `imgsz` | -| [TF.js](../integrations/tfjs.md) | `tfjs` | `{{ model_name or "yolov8n" }}_web_model/` | โœ… | `imgsz`, `half`, `int8`, `batch` | -| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `{{ model_name or "yolov8n" }}_paddle_model/` | โœ… | `imgsz`, `batch` | -| [NCNN](../integrations/ncnn.md) | `ncnn` | `{{ model_name or "yolov8n" }}_ncnn_model/` | โœ… | `imgsz`, `half`, `batch` | +| Format | `format` Argument | Model | Metadata | Arguments | +| ------------------------------------------------- | ----------------- | ----------------------------------------------- | -------- | ----------------------------------------------------------------------------------- | +| [PyTorch](https://pytorch.org/) | - | `{{ model_name or "yolo11n" }}.pt` | โœ… | - | +| [TorchScript](../integrations/torchscript.md) | `torchscript` | `{{ model_name or "yolo11n" }}.torchscript` | โœ… | `imgsz`, `optimize`, `nms`, `batch` | +| [ONNX](../integrations/onnx.md) | `onnx` | `{{ model_name or "yolo11n" }}.onnx` | โœ… | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `nms`, `batch` | +| [OpenVINO](../integrations/openvino.md) | `openvino` | `{{ model_name or "yolo11n" }}_openvino_model/` | โœ… | `imgsz`, `half`, `dynamic`, `int8`, `nms`, `batch`, `data` | +| [TensorRT](../integrations/tensorrt.md) | `engine` | `{{ model_name or "yolo11n" }}.engine` | โœ… | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `nms`, `batch`, `data` | +| [CoreML](../integrations/coreml.md) | `coreml` | `{{ model_name or "yolo11n" }}.mlpackage` | โœ… | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `{{ model_name or "yolo11n" }}_saved_model/` | โœ… | `imgsz`, `keras`, `int8`, `nms`, `batch` | +| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `{{ model_name or "yolo11n" }}.pb` | โŒ | `imgsz`, `batch` | +| [TF Lite](../integrations/tflite.md) | `tflite` | `{{ model_name or "yolo11n" }}.tflite` | โœ… | `imgsz`, `half`, `int8`, `nms`, `batch`, `data` | +| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `{{ model_name or "yolo11n" }}_edgetpu.tflite` | โœ… | `imgsz` | +| [TF.js](../integrations/tfjs.md) | `tfjs` | `{{ model_name or "yolo11n" }}_web_model/` | โœ… | `imgsz`, `half`, `int8`, `nms`, `batch` | +| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `{{ model_name or "yolo11n" }}_paddle_model/` | โœ… | `imgsz`, `batch` | +| [MNN](../integrations/mnn.md) | `mnn` | `{{ model_name or "yolo11n" }}.mnn` | โœ… | `imgsz`, `batch`, `int8`, `half` | +| [NCNN](../integrations/ncnn.md) | `ncnn` | `{{ model_name or "yolo11n" }}_ncnn_model/` | โœ… | `imgsz`, `half`, `batch` | +| [IMX500](../integrations/sony-imx500.md) | `imx` | `{{ model_name or "yolov8n" }}_imx_model/` | โœ… | `imgsz`, `int8`, `data` | +| [RKNN](../integrations/rockchip-rknn.md) | `rknn` | `{{ model_name or "yolo11n" }}_rknn_model/` | โœ… | `imgsz`, `batch`, `name` | diff --git a/docs/en/macros/predict-args.md b/docs/en/macros/predict-args.md index 4486d597de1..8edf2171c36 100644 --- a/docs/en/macros/predict-args.md +++ b/docs/en/macros/predict-args.md @@ -1,18 +1,21 @@ -| Argument | Type | Default | Description | -| --------------- | -------------- | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across different types of input. | -| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | -| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | -| `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | -| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | -| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. | -| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | -| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | -| `stream_buffer` | `bool` | `False` | Determines if all frames should be buffered when processing video streams (`True`), or if the model should return the most recent frame (`False`). Useful for real-time applications. | -| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. | -| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. | -| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. | -| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. | -| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. | -| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. | +| Argument | Type | Default | Description | +| --------------- | ---------------- | ---------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across [different types of input](/modes/predict.md/#inference-sources). | +| `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | +| `iou` | `float` | `0.7` | [Intersection Over Union](https://www.ultralytics.com/glossary/intersection-over-union-iou) (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | +| `imgsz` | `int` or `tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | +| `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | +| `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. | +| `batch` | `int` | `1` | Specifies the batch size for inference (only works when the source is [a directory, video file or `.txt` file](/modes/predict.md/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference. | +| `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | +| `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | +| `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accommodate new frames (optimized for real-time applications). If `True', queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. | +| `visualize` | `bool` | `False` | Activates visualization of model features during inference, providing insights into what the model is "seeing". Useful for debugging and model interpretation. | +| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. | +| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. | +| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. | +| `retina_masks` | `bool` | `False` | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference. | +| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. | +| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. | +| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. | | `separate_outputs`| `bool` | `False` | Used for models exported with separate_outputs flag set to True | diff --git a/docs/en/macros/sam-auto-annotate.md b/docs/en/macros/sam-auto-annotate.md new file mode 100644 index 00000000000..6cadbf0e766 --- /dev/null +++ b/docs/en/macros/sam-auto-annotate.md @@ -0,0 +1,12 @@ +| Argument | Type | Default | Description | +| ------------ | ----------- | -------------- | ---------------------------------------------------------------------------------------------- | +| `data` | `str` | required | Path to directory containing target images/videos for annotation or segmentation. | +| `det_model` | `str` | `"yolo11x.pt"` | YOLO detection model path for initial object detection. | +| `sam_model` | `str` | `"sam2_b.pt"` | SAM2 model path for segmentation (supports t/s/b/l variants and SAM2.1) and mobile_sam models. | +| `device` | `str` | `""` | Computation device (e.g., 'cuda:0', 'cpu', or '' for automatic device detection). | +| `conf` | `float` | `0.25` | YOLO detection confidence threshold for filtering weak detections. | +| `iou` | `float` | `0.45` | IoU threshold for Non-Maximum Suppression to filter overlapping boxes. | +| `imgsz` | `int` | `640` | Input size for resizing images (must be multiple of 32). | +| `max_det` | `int` | `300` | Maximum number of detections per image for memory efficiency. | +| `classes` | `list[int]` | `None` | List of class indices to detect (e.g., `[0, 1]` for person & bicycle). | +| `output_dir` | `str` | `None` | Save directory for annotations (defaults to './labels' relative to data path). | diff --git a/docs/en/macros/solutions-args.md b/docs/en/macros/solutions-args.md new file mode 100644 index 00000000000..42ae58e605e --- /dev/null +++ b/docs/en/macros/solutions-args.md @@ -0,0 +1,12 @@ +| Argument | Type | Default | Description | +| ---------------- | -------------- | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `region` | `list` | `[(20, 400), (1080, 400), (1080, 360), (20, 360)]` | Defines the region points for object counting, queue monitoring, trackzone or speed estimation. The points are defined as coordinates forming a polygonal area for analysis. | +| `show_in` | `bool` | `True` | Indicates whether to display objects that are counted as entering the defined region. Essential for real-world analytics, such as monitoring ingress trends. | +| `show_out` | `bool` | `True` | Indicates whether to display objects that are counted as exiting the defined region. Useful for applications requiring egress tracking and analytics. | +| `colormap` | `int or tuple` | `COLORMAP_PARULA` | Specifies the OpenCV-supported colormap for heatmap visualization. Default is `COLORMAP_PARULA`, but other colormaps can be used for different visualization preferences. | +| `up_angle` | `float` | `145.0` | Angle threshold for detecting the "up" position in workouts monitoring. Can be adjusted based on the position of keypoints for different exercises. | +| `down_angle` | `float` | `90.0` | Angle threshold for detecting the "down" position in workouts monitoring. Adjust this based on keypoint positions for specific exercises. | +| `kpts` | `list` | `[6, 8, 10]` | List of keypoints used for monitoring workouts. These keypoints correspond to body joints or parts, such as shoulders, elbows, and wrists, for exercises like push-ups, pull-ups, squats, ab-workouts. | +| `analytics_type` | `str` | `line` | Specifies the type of analytics visualization to generate. Options include `"line"`, `"pie"`, `"bar"`, or `"area"`. The default is `"line"` for trend visualization. | +| `json_file` | `str` | `None` | Path to the JSON file defining regions for parking systems or similar applications. Enables flexible configuration of analysis areas. | +| `records` | `int` | `5` | Total detections count that triggers an automated email notification about unusual activity. | diff --git a/docs/en/macros/train-args.md b/docs/en/macros/train-args.md index cb72bdeced8..69f9ce926ca 100644 --- a/docs/en/macros/train-args.md +++ b/docs/en/macros/train-args.md @@ -1,50 +1,50 @@ -| Argument | Default | Description | -| ----------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `model` | `None` | Specifies the model file for training. Accepts a path to either a `.pt` pretrained model or a `.yaml` configuration file. Essential for defining the model structure or initializing weights. | -| `data` | `None` | Path to the dataset configuration file (e.g., `coco8.yaml`). This file contains dataset-specific parameters, including paths to training and [validation data](https://www.ultralytics.com/glossary/validation-data), class names, and number of classes. | -| `epochs` | `100` | Total number of training epochs. Each [epoch](https://www.ultralytics.com/glossary/epoch) represents a full pass over the entire dataset. Adjusting this value can affect training duration and model performance. | -| `time` | `None` | Maximum training time in hours. If set, this overrides the `epochs` argument, allowing training to automatically stop after the specified duration. Useful for time-constrained training scenarios. | -| `patience` | `100` | Number of epochs to wait without improvement in validation metrics before early stopping the training. Helps prevent [overfitting](https://www.ultralytics.com/glossary/overfitting) by stopping training when performance plateaus. | -| `batch` | `16` | [Batch size](https://www.ultralytics.com/glossary/batch-size), with three modes: set as an integer (e.g., `batch=16`), auto mode for 60% GPU memory utilization (`batch=-1`), or auto mode with specified utilization fraction (`batch=0.70`). | -| `imgsz` | `640` | Target image size for training. All images are resized to this dimension before being fed into the model. Affects model [accuracy](https://www.ultralytics.com/glossary/accuracy) and computational complexity. | -| `save` | `True` | Enables saving of training checkpoints and final model weights. Useful for resuming training or [model deployment](https://www.ultralytics.com/glossary/model-deployment). | -| `save_period` | `-1` | Frequency of saving model checkpoints, specified in epochs. A value of -1 disables this feature. Useful for saving interim models during long training sessions. | -| `cache` | `False` | Enables caching of dataset images in memory (`True`/`ram`), on disk (`disk`), or disables it (`False`). Improves training speed by reducing disk I/O at the cost of increased memory usage. | -| `device` | `None` | Specifies the computational device(s) for training: a single GPU (`device=0`), multiple GPUs (`device=0,1`), CPU (`device=cpu`), or MPS for Apple silicon (`device=mps`). | -| `workers` | `8` | Number of worker threads for data loading (per `RANK` if Multi-GPU training). Influences the speed of data preprocessing and feeding into the model, especially useful in multi-GPU setups. | -| `project` | `None` | Name of the project directory where training outputs are saved. Allows for organized storage of different experiments. | -| `name` | `None` | Name of the training run. Used for creating a subdirectory within the project folder, where training logs and outputs are stored. | -| `exist_ok` | `False` | If True, allows overwriting of an existing project/name directory. Useful for iterative experimentation without needing to manually clear previous outputs. | -| `pretrained` | `True` | Determines whether to start training from a pretrained model. Can be a boolean value or a string path to a specific model from which to load weights. Enhances training efficiency and model performance. | -| `optimizer` | `'auto'` | Choice of optimizer for training. Options include `SGD`, `Adam`, `AdamW`, `NAdam`, `RAdam`, `RMSProp` etc., or `auto` for automatic selection based on model configuration. Affects convergence speed and stability. | -| `verbose` | `False` | Enables verbose output during training, providing detailed logs and progress updates. Useful for debugging and closely monitoring the training process. | -| `seed` | `0` | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations. | -| `deterministic` | `True` | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms. | -| `single_cls` | `False` | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification. | -| `rect` | `False` | Enables rectangular training, optimizing batch composition for minimal padding. Can improve efficiency and speed but may affect model accuracy. | -| `cos_lr` | `False` | Utilizes a cosine [learning rate](https://www.ultralytics.com/glossary/learning-rate) scheduler, adjusting the learning rate following a cosine curve over epochs. Helps in managing learning rate for better convergence. | -| `close_mosaic` | `10` | Disables mosaic [data augmentation](https://www.ultralytics.com/glossary/data-augmentation) in the last N epochs to stabilize training before completion. Setting to 0 disables this feature. | -| `resume` | `False` | Resumes training from the last saved checkpoint. Automatically loads model weights, optimizer state, and epoch count, continuing training seamlessly. | -| `amp` | `True` | Enables Automatic [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision) (AMP) training, reducing memory usage and possibly speeding up training with minimal impact on accuracy. | -| `fraction` | `1.0` | Specifies the fraction of the dataset to use for training. Allows for training on a subset of the full dataset, useful for experiments or when resources are limited. | -| `profile` | `False` | Enables profiling of ONNX and TensorRT speeds during training, useful for optimizing model deployment. | -| `freeze` | `None` | Freezes the first N layers of the model or specified layers by index, reducing the number of trainable parameters. Useful for fine-tuning or [transfer learning](https://www.ultralytics.com/glossary/transfer-learning). | -| `lr0` | `0.01` | Initial learning rate (i.e. `SGD=1E-2`, `Adam=1E-3`) . Adjusting this value is crucial for the optimization process, influencing how rapidly model weights are updated. | -| `lrf` | `0.01` | Final learning rate as a fraction of the initial rate = (`lr0 * lrf`), used in conjunction with schedulers to adjust the learning rate over time. | -| `momentum` | `0.937` | Momentum factor for SGD or beta1 for [Adam optimizers](https://www.ultralytics.com/glossary/adam-optimizer), influencing the incorporation of past gradients in the current update. | -| `weight_decay` | `0.0005` | L2 [regularization](https://www.ultralytics.com/glossary/regularization) term, penalizing large weights to prevent overfitting. | -| `warmup_epochs` | `3.0` | Number of epochs for learning rate warmup, gradually increasing the learning rate from a low value to the initial learning rate to stabilize training early on. | -| `warmup_momentum` | `0.8` | Initial momentum for warmup phase, gradually adjusting to the set momentum over the warmup period. | -| `warmup_bias_lr` | `0.1` | Learning rate for bias parameters during the warmup phase, helping stabilize model training in the initial epochs. | -| `box` | `7.5` | Weight of the box loss component in the [loss function](https://www.ultralytics.com/glossary/loss-function), influencing how much emphasis is placed on accurately predicting [bounding box](https://www.ultralytics.com/glossary/bounding-box) coordinates. | -| `cls` | `0.5` | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components. | -| `dfl` | `1.5` | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification. | -| `pose` | `12.0` | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints. | -| `kobj` | `2.0` | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy. | -| `label_smoothing` | `0.0` | Applies label smoothing, softening hard labels to a mix of the target label and a uniform distribution over labels, can improve generalization. | -| `nbs` | `64` | Nominal batch size for normalization of loss. | -| `overlap_mask` | `True` | Determines whether segmentation masks should overlap during training, applicable in [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) tasks. | -| `mask_ratio` | `4` | Downsample ratio for segmentation masks, affecting the resolution of masks used during training. | -| `dropout` | `0.0` | Dropout rate for regularization in classification tasks, preventing overfitting by randomly omitting units during training. | -| `val` | `True` | Enables validation during training, allowing for periodic evaluation of model performance on a separate dataset. | -| `plots` | `False` | Generates and saves plots of training and validation metrics, as well as prediction examples, providing visual insights into model performance and learning progression. | +| Argument | Type | Default | Description | +| ----------------- | ------------------------ | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `model` | `str` | `None` | Specifies the model file for training. Accepts a path to either a `.pt` pretrained model or a `.yaml` configuration file. Essential for defining the model structure or initializing weights. | +| `data` | `str` | `None` | Path to the dataset configuration file (e.g., `coco8.yaml`). This file contains dataset-specific parameters, including paths to training and [validation data](https://www.ultralytics.com/glossary/validation-data), class names, and number of classes. | +| `epochs` | `int` | `100` | Total number of training epochs. Each [epoch](https://www.ultralytics.com/glossary/epoch) represents a full pass over the entire dataset. Adjusting this value can affect training duration and model performance. | +| `time` | `float` | `None` | Maximum training time in hours. If set, this overrides the `epochs` argument, allowing training to automatically stop after the specified duration. Useful for time-constrained training scenarios. | +| `patience` | `int` | `100` | Number of epochs to wait without improvement in validation metrics before early stopping the training. Helps prevent [overfitting](https://www.ultralytics.com/glossary/overfitting) by stopping training when performance plateaus. | +| `batch` | `int` | `16` | [Batch size](https://www.ultralytics.com/glossary/batch-size), with three modes: set as an integer (e.g., `batch=16`), auto mode for 60% GPU memory utilization (`batch=-1`), or auto mode with specified utilization fraction (`batch=0.70`). | +| `imgsz` | `int` or `list` | `640` | Target image size for training. All images are resized to this dimension before being fed into the model. Affects model [accuracy](https://www.ultralytics.com/glossary/accuracy) and computational complexity. | +| `save` | `bool` | `True` | Enables saving of training checkpoints and final model weights. Useful for resuming training or [model deployment](https://www.ultralytics.com/glossary/model-deployment). | +| `save_period` | `int` | `-1` | Frequency of saving model checkpoints, specified in epochs. A value of -1 disables this feature. Useful for saving interim models during long training sessions. | +| `cache` | `bool` | `False` | Enables caching of dataset images in memory (`True`/`ram`), on disk (`disk`), or disables it (`False`). Improves training speed by reducing disk I/O at the cost of increased memory usage. | +| `device` | `int` or `str` or `list` | `None` | Specifies the computational device(s) for training: a single GPU (`device=0`), multiple GPUs (`device=0,1`), CPU (`device=cpu`), or MPS for Apple silicon (`device=mps`). | +| `workers` | `int` | `8` | Number of worker threads for data loading (per `RANK` if Multi-GPU training). Influences the speed of data preprocessing and feeding into the model, especially useful in multi-GPU setups. | +| `project` | `str` | `None` | Name of the project directory where training outputs are saved. Allows for organized storage of different experiments. | +| `name` | `str` | `None` | Name of the training run. Used for creating a subdirectory within the project folder, where training logs and outputs are stored. | +| `exist_ok` | `bool` | `False` | If True, allows overwriting of an existing project/name directory. Useful for iterative experimentation without needing to manually clear previous outputs. | +| `pretrained` | `bool` | `True` | Determines whether to start training from a pretrained model. Can be a boolean value or a string path to a specific model from which to load weights. Enhances training efficiency and model performance. | +| `optimizer` | `str` | `'auto'` | Choice of optimizer for training. Options include `SGD`, `Adam`, `AdamW`, `NAdam`, `RAdam`, `RMSProp` etc., or `auto` for automatic selection based on model configuration. Affects convergence speed and stability. | +| `seed` | `int` | `0` | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations. | +| `deterministic` | `bool` | `True` | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms. | +| `single_cls` | `bool` | `False` | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification. | +| `classes` | `list[int]` | `None` | Specifies a list of class IDs to train on. Useful for filtering out and focusing only on certain classes during training. | +| `rect` | `bool` | `False` | Enables rectangular training, optimizing batch composition for minimal padding. Can improve efficiency and speed but may affect model accuracy. | +| `multi_scale` | `bool` | `False` | Enables multi-scale training by increasing/decreasing `imgsz` by upto a factor of `0.5` during training. Trains the model to be more accurate with multiple `imgsz` during inference. | +| `cos_lr` | `bool` | `False` | Utilizes a cosine [learning rate](https://www.ultralytics.com/glossary/learning-rate) scheduler, adjusting the learning rate following a cosine curve over epochs. Helps in managing learning rate for better convergence. | +| `close_mosaic` | `int` | `10` | Disables mosaic [data augmentation](https://www.ultralytics.com/glossary/data-augmentation) in the last N epochs to stabilize training before completion. Setting to 0 disables this feature. | +| `resume` | `bool` | `False` | Resumes training from the last saved checkpoint. Automatically loads model weights, optimizer state, and epoch count, continuing training seamlessly. | +| `amp` | `bool` | `True` | Enables Automatic [Mixed Precision](https://www.ultralytics.com/glossary/mixed-precision) (AMP) training, reducing memory usage and possibly speeding up training with minimal impact on accuracy. | +| `fraction` | `float` | `1.0` | Specifies the fraction of the dataset to use for training. Allows for training on a subset of the full dataset, useful for experiments or when resources are limited. | +| `profile` | `bool` | `False` | Enables profiling of ONNX and TensorRT speeds during training, useful for optimizing model deployment. | +| `freeze` | `int` or `list` | `None` | Freezes the first N layers of the model or specified layers by index, reducing the number of trainable parameters. Useful for fine-tuning or [transfer learning](https://www.ultralytics.com/glossary/transfer-learning). | +| `lr0` | `float` | `0.01` | Initial learning rate (i.e. `SGD=1E-2`, `Adam=1E-3`) . Adjusting this value is crucial for the optimization process, influencing how rapidly model weights are updated. | +| `lrf` | `float` | `0.01` | Final learning rate as a fraction of the initial rate = (`lr0 * lrf`), used in conjunction with schedulers to adjust the learning rate over time. | +| `momentum` | `float` | `0.937` | Momentum factor for SGD or beta1 for [Adam optimizers](https://www.ultralytics.com/glossary/adam-optimizer), influencing the incorporation of past gradients in the current update. | +| `weight_decay` | `float` | `0.0005` | L2 [regularization](https://www.ultralytics.com/glossary/regularization) term, penalizing large weights to prevent overfitting. | +| `warmup_epochs` | `float` | `3.0` | Number of epochs for learning rate warmup, gradually increasing the learning rate from a low value to the initial learning rate to stabilize training early on. | +| `warmup_momentum` | `float` | `0.8` | Initial momentum for warmup phase, gradually adjusting to the set momentum over the warmup period. | +| `warmup_bias_lr` | `float` | `0.1` | Learning rate for bias parameters during the warmup phase, helping stabilize model training in the initial epochs. | +| `box` | `float` | `7.5` | Weight of the box loss component in the [loss function](https://www.ultralytics.com/glossary/loss-function), influencing how much emphasis is placed on accurately predicting [bounding box](https://www.ultralytics.com/glossary/bounding-box) coordinates. | +| `cls` | `float` | `0.5` | Weight of the classification loss in the total loss function, affecting the importance of correct class prediction relative to other components. | +| `dfl` | `float` | `1.5` | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification. | +| `pose` | `float` | `12.0` | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints. | +| `kobj` | `float` | `2.0` | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy. | +| `nbs` | `int` | `64` | Nominal batch size for normalization of loss. | +| `overlap_mask` | `bool` | `True` | Determines whether object masks should be merged into a single mask for training, or kept separate for each object. In case of overlap, the smaller mask is overlaid on top of the larger mask during merge. | +| `mask_ratio` | `int` | `4` | Downsample ratio for segmentation masks, affecting the resolution of masks used during training. | +| `dropout` | `float` | `0.0` | Dropout rate for regularization in classification tasks, preventing overfitting by randomly omitting units during training. | +| `val` | `bool` | `True` | Enables validation during training, allowing for periodic evaluation of model performance on a separate dataset. | +| `plots` | `bool` | `False` | Generates and saves plots of training and validation metrics, as well as prediction examples, providing visual insights into model performance and learning progression. | diff --git a/docs/en/macros/validation-args.md b/docs/en/macros/validation-args.md index 5c709f7bfcf..ab5014c08de 100644 --- a/docs/en/macros/validation-args.md +++ b/docs/en/macros/validation-args.md @@ -12,5 +12,7 @@ | `device` | `str` | `None` | Specifies the device for validation (`cpu`, `cuda:0`, etc.). Allows flexibility in utilizing CPU or GPU resources. | | `dnn` | `bool` | `False` | If `True`, uses the [OpenCV](https://www.ultralytics.com/glossary/opencv) DNN module for ONNX model inference, offering an alternative to [PyTorch](https://www.ultralytics.com/glossary/pytorch) inference methods. | | `plots` | `bool` | `False` | When set to `True`, generates and saves plots of predictions versus ground truth for visual evaluation of the model's performance. | -| `rect` | `bool` | `False` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. | +| `rect` | `bool` | `True` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. | | `split` | `str` | `val` | Determines the dataset split to use for validation (`val`, `test`, or `train`). Allows flexibility in choosing the data segment for performance evaluation. | +| `project` | `str` | `None` | Name of the project directory where validation outputs are saved. | +| `name` | `str` | `None` | Name of the validation run. Used for creating a subdirectory within the project folder, where validation logs and outputs are stored. | diff --git a/docs/en/macros/yolo-cls-perf.md b/docs/en/macros/yolo-cls-perf.md new file mode 100644 index 00000000000..855e2c0ffca --- /dev/null +++ b/docs/en/macros/yolo-cls-perf.md @@ -0,0 +1,7 @@ +| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) at 640 | +| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | +| [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.0 ยฑ 0.3 | 1.1 ยฑ 0.0 | 1.6 | 3.3 | +| [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.9 ยฑ 0.2 | 1.3 ยฑ 0.0 | 5.5 | 12.1 | +| [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.2 ยฑ 0.4 | 2.0 ยฑ 0.0 | 10.4 | 39.3 | +| [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.2 ยฑ 0.3 | 2.8 ยฑ 0.0 | 12.9 | 49.4 | +| [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.4 ยฑ 0.9 | 3.8 ยฑ 0.0 | 28.4 | 110.4 | diff --git a/docs/en/macros/yolo-det-perf.md b/docs/en/macros/yolo-det-perf.md new file mode 100644 index 00000000000..1b146cfc037 --- /dev/null +++ b/docs/en/macros/yolo-det-perf.md @@ -0,0 +1,7 @@ +| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | +| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.1 ยฑ 0.8 | 1.5 ยฑ 0.0 | 2.6 | 6.5 | +| [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.0 ยฑ 1.2 | 2.5 ยฑ 0.0 | 9.4 | 21.5 | +| [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.2 ยฑ 2.0 | 4.7 ยฑ 0.1 | 20.1 | 68.0 | +| [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.6 ยฑ 1.4 | 6.2 ยฑ 0.1 | 25.3 | 86.9 | +| [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.8 ยฑ 6.7 | 11.3 ยฑ 0.2 | 56.9 | 194.9 | diff --git a/docs/en/macros/yolo-obb-perf.md b/docs/en/macros/yolo-obb-perf.md new file mode 100644 index 00000000000..37a7d7b17b8 --- /dev/null +++ b/docs/en/macros/yolo-obb-perf.md @@ -0,0 +1,7 @@ +| Model | size
(pixels) | mAPtest
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | +| -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.6 ยฑ 0.8 | 4.4 ยฑ 0.0 | 2.7 | 17.2 | +| [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.4 ยฑ 4.0 | 5.1 ยฑ 0.0 | 9.7 | 57.5 | +| [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.8 ยฑ 2.9 | 10.1 ยฑ 0.4 | 20.9 | 183.5 | +| [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.5 ยฑ 5.0 | 13.5 ยฑ 0.6 | 26.2 | 232.0 | +| [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.6 ยฑ 7.7 | 28.6 ยฑ 1.0 | 58.8 | 520.2 | diff --git a/docs/en/macros/yolo-pose-perf.md b/docs/en/macros/yolo-pose-perf.md new file mode 100644 index 00000000000..d699017b836 --- /dev/null +++ b/docs/en/macros/yolo-pose-perf.md @@ -0,0 +1,7 @@ +| Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | +| ---------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.4 ยฑ 0.5 | 1.7 ยฑ 0.0 | 2.9 | 7.6 | +| [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.5 ยฑ 0.6 | 2.6 ยฑ 0.0 | 9.9 | 23.2 | +| [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.3 ยฑ 0.8 | 4.9 ยฑ 0.1 | 20.9 | 71.7 | +| [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.7 ยฑ 1.1 | 6.4 ยฑ 0.1 | 26.2 | 90.7 | +| [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 488.0 ยฑ 13.9 | 12.1 ยฑ 0.2 | 58.8 | 203.3 | diff --git a/docs/en/macros/yolo-seg-perf.md b/docs/en/macros/yolo-seg-perf.md new file mode 100644 index 00000000000..af97e6f4557 --- /dev/null +++ b/docs/en/macros/yolo-seg-perf.md @@ -0,0 +1,7 @@ +| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | +| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | +| [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.9 ยฑ 1.1 | 1.8 ยฑ 0.0 | 2.9 | 10.4 | +| [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.6 ยฑ 4.9 | 2.9 ยฑ 0.0 | 10.1 | 35.5 | +| [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.6 ยฑ 1.2 | 6.3 ยฑ 0.1 | 22.4 | 123.3 | +| [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.2 ยฑ 3.2 | 7.8 ยฑ 0.2 | 27.6 | 142.2 | +| [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.5 ยฑ 3.2 | 15.8 ยฑ 0.7 | 62.1 | 319.0 | diff --git a/docs/en/models/index.md b/docs/en/models/index.md index baa5c9b2600..8300c520655 100644 --- a/docs/en/models/index.md +++ b/docs/en/models/index.md @@ -8,6 +8,8 @@ keywords: Ultralytics, supported models, YOLOv3, YOLOv4, YOLOv5, YOLOv6, YOLOv7, Welcome to Ultralytics' model documentation! We offer support for a wide range of models, each tailored to specific tasks like [object detection](../tasks/detect.md), [instance segmentation](../tasks/segment.md), [image classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and [multi-object tracking](../modes/track.md). If you're interested in contributing your model architecture to Ultralytics, check out our [Contributing Guide](../help/contributing.md). +![Ultralytics YOLO11 Comparison Plots](https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png) + ## Featured Models Here are some of the key models supported: @@ -15,12 +17,12 @@ Here are some of the key models supported: 1. **[YOLOv3](yolov3.md)**: The third iteration of the YOLO model family, originally by Joseph Redmon, known for its efficient real-time object detection capabilities. 2. **[YOLOv4](yolov4.md)**: A darknet-native update to YOLOv3, released by Alexey Bochkovskiy in 2020. 3. **[YOLOv5](yolov5.md)**: An improved version of the YOLO architecture by Ultralytics, offering better performance and speed trade-offs compared to previous versions. -4. **[YOLOv6](yolov6.md)**: Released by [Meituan](https://about.meituan.com/) in 2022, and in use in many of the company's autonomous delivery robots. +4. **[YOLOv6](yolov6.md)**: Released by [Meituan](https://www.meituan.com/) in 2022, and in use in many of the company's autonomous delivery robots. 5. **[YOLOv7](yolov7.md)**: Updated YOLO models released in 2022 by the authors of YOLOv4. 6. **[YOLOv8](yolov8.md)**: The latest version of the YOLO family, featuring enhanced capabilities such as [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation), pose/keypoints estimation, and classification. 7. **[YOLOv9](yolov9.md)**: An experimental model trained on the Ultralytics [YOLOv5](yolov5.md) codebase implementing Programmable Gradient Information (PGI). 8. **[YOLOv10](yolov10.md)**: By Tsinghua University, featuring NMS-free training and efficiency-accuracy driven architecture, delivering state-of-the-art performance and latency. -9. **[YOLO11](yolo11.md) NEW ๐Ÿš€**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks. +9. **[YOLO11](yolo11.md) ๐Ÿš€ NEW**: Ultralytics' latest YOLO models delivering state-of-the-art (SOTA) performance across multiple tasks. 10. **[Segment Anything Model (SAM)](sam.md)**: Meta's original Segment Anything Model (SAM). 11. **[Segment Anything Model 2 (SAM2)](sam-2.md)**: The next generation of Meta's Segment Anything Model (SAM) for videos and images. 12. **[Mobile Segment Anything Model (MobileSAM)](mobile-sam.md)**: MobileSAM for mobile applications, by Kyung Hee University. diff --git a/docs/en/models/mobile-sam.md b/docs/en/models/mobile-sam.md index 26c92f68298..34740c6c07d 100644 --- a/docs/en/models/mobile-sam.md +++ b/docs/en/models/mobile-sam.md @@ -4,14 +4,25 @@ description: Discover MobileSAM, a lightweight and fast image segmentation model keywords: MobileSAM, image segmentation, lightweight model, fast segmentation, mobile applications, SAM, ViT encoder, Tiny-ViT, Ultralytics --- -![MobileSAM Logo](https://github.com/ChaoningZhang/MobileSAM/blob/master/assets/logo2.png) +![MobileSAM Logo](https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/assets/logo2.png) # Mobile Segment Anything (MobileSAM) -The MobileSAM paper is now available on [arXiv](https://arxiv.org/pdf/2306.14289.pdf). +The MobileSAM paper is now available on [arXiv](https://arxiv.org/pdf/2306.14289). A demonstration of MobileSAM running on a CPU can be accessed at this [demo link](https://huggingface.co/spaces/dhkim2810/MobileSAM). The performance on a Mac i5 CPU takes approximately 3 seconds. On the Hugging Face demo, the interface and lower-performance CPUs contribute to a slower response, but it continues to function effectively. +

+
+ +
+ Watch: How to Run Inference with MobileSAM using Ultralytics | Step-by-Step Guide ๐ŸŽ‰ +

+ MobileSAM is implemented in various projects including [Grounding-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything), [AnyLabeling](https://github.com/vietanhdev/anylabeling), and [Segment Anything in 3D](https://github.com/Jumpat/SegmentAnythingin3D). MobileSAM is trained on a single GPU with a 100k dataset (1% of the original images) in less than a day. The code for this training will be made available in the future. @@ -79,8 +90,17 @@ You can download the model [here](https://github.com/ChaoningZhang/MobileSAM/blo # Load the model model = SAM("mobile_sam.pt") - # Predict a segment based on a point prompt + # Predict a segment based on a single point prompt model.predict("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + + # Predict multiple segments based on multiple points prompt + model.predict("ultralytics/assets/zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Predict a segment based on multiple points prompt per object + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Predict a segment using both positive and negative prompts. + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` ### Box Prompt @@ -95,12 +115,37 @@ You can download the model [here](https://github.com/ChaoningZhang/MobileSAM/blo # Load the model model = SAM("mobile_sam.pt") - # Predict a segment based on a box prompt - model.predict("ultralytics/assets/zidane.jpg", bboxes=[439, 437, 524, 709]) + # Predict a segment based on a single point prompt + model.predict("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + + # Predict multiple segments based on multiple points prompt + model.predict("ultralytics/assets/zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Predict a segment based on multiple points prompt per object + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Predict a segment using both positive and negative prompts. + model.predict("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` We have implemented `MobileSAM` and `SAM` using the same API. For more usage information, please see the [SAM page](sam.md). +### Automatically Build Segmentation Datasets Leveraging a Detection Model + +To automatically annotate your dataset using the Ultralytics framework, utilize the `auto_annotate` function as demonstrated below: + +!!! example + + === "Python" + + ```python + from ultralytics.data.annotator import auto_annotate + + auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="mobile_sam.pt") + ``` + +{% include "macros/sam-auto-annotate.md" %} + ## Citations and Acknowledgements If you find MobileSAM useful in your research or development work, please consider citing our paper: diff --git a/docs/en/models/rtdetr.md b/docs/en/models/rtdetr.md index 47710e03209..382e79dbc9f 100644 --- a/docs/en/models/rtdetr.md +++ b/docs/en/models/rtdetr.md @@ -21,7 +21,7 @@ Real-Time Detection Transformer (RT-DETR), developed by Baidu, is a cutting-edge Watch: Real-Time Detection Transformer (RT-DETR)

-![Model example image](https://github.com/ultralytics/docs/releases/download/0/baidu-rtdetr-model-overview.avif) **Overview of Baidu's RT-DETR.** The RT-DETR model architecture diagram shows the last three stages of the backbone {S3, S4, S5} as the input to the encoder. The efficient hybrid encoder transforms multiscale features into a sequence of image features through intrascale feature interaction (AIFI) and cross-scale feature-fusion module (CCFM). The IoU-aware query selection is employed to select a fixed number of image features to serve as initial object queries for the decoder. Finally, the decoder with auxiliary prediction heads iteratively optimizes object queries to generate boxes and confidence scores ([source](https://arxiv.org/pdf/2304.08069.pdf)). +![Model example image](https://github.com/ultralytics/docs/releases/download/0/baidu-rtdetr-model-overview.avif) **Overview of Baidu's RT-DETR.** The RT-DETR model architecture diagram shows the last three stages of the backbone {S3, S4, S5} as the input to the encoder. The efficient hybrid encoder transforms multiscale features into a sequence of image features through intrascale feature interaction (AIFI) and cross-scale feature-fusion module (CCFM). The IoU-aware query selection is employed to select a fixed number of image features to serve as initial object queries for the decoder. Finally, the decoder with auxiliary prediction heads iteratively optimizes object queries to generate boxes and confidence scores ([source](https://arxiv.org/pdf/2304.08069)). ### Key Features @@ -36,6 +36,11 @@ The Ultralytics Python API provides pre-trained PaddlePaddle RT-DETR models with - RT-DETR-L: 53.0% AP on COCO val2017, 114 FPS on T4 GPU - RT-DETR-X: 54.8% AP on COCO val2017, 74 FPS on T4 GPU + + + + + ## Usage Examples This example provides simple RT-DETR training and inference examples. For full documentation on these and other [modes](../modes/index.md) see the [Predict](../modes/predict.md), [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md) docs pages. @@ -104,7 +109,7 @@ We would like to acknowledge Baidu and the [PaddlePaddle](https://github.com/Pad ### What is Baidu's RT-DETR model and how does it work? -Baidu's RT-DETR (Real-Time Detection Transformer) is an advanced real-time object detector built upon the Vision Transformer architecture. It efficiently processes multiscale features by decoupling intra-scale interaction and cross-scale fusion through its efficient hybrid encoder. By employing IoU-aware query selection, the model focuses on the most relevant objects, enhancing detection accuracy. Its adaptable inference speed, achieved by adjusting decoder layers without retraining, makes RT-DETR suitable for various real-time object detection scenarios. Learn more about RT-DETR features [here](https://arxiv.org/pdf/2304.08069.pdf). +Baidu's RT-DETR (Real-Time Detection Transformer) is an advanced real-time object detector built upon the Vision Transformer architecture. It efficiently processes multiscale features by decoupling intra-scale interaction and cross-scale fusion through its efficient hybrid encoder. By employing IoU-aware query selection, the model focuses on the most relevant objects, enhancing detection accuracy. Its adaptable inference speed, achieved by adjusting decoder layers without retraining, makes RT-DETR suitable for various real-time object detection scenarios. Learn more about RT-DETR features [here](https://arxiv.org/pdf/2304.08069). ### How can I use the pre-trained RT-DETR models provided by Ultralytics? diff --git a/docs/en/models/sam-2.md b/docs/en/models/sam-2.md index 025c18d2678..9dbea52627a 100644 --- a/docs/en/models/sam-2.md +++ b/docs/en/models/sam-2.md @@ -1,9 +1,13 @@ --- comments: true description: Discover SAM 2, the next generation of Meta's Segment Anything Model, supporting real-time promptable segmentation in both images and videos with state-of-the-art performance. Learn about its key features, datasets, and how to use it. -keywords: SAM 2, Segment Anything, video segmentation, image segmentation, promptable segmentation, zero-shot performance, SA-V dataset, Ultralytics, real-time segmentation, AI, machine learning +keywords: SAM 2, SAM 2.1, Segment Anything, video segmentation, image segmentation, promptable segmentation, zero-shot performance, SA-V dataset, Ultralytics, real-time segmentation, AI, machine learning --- +!!! tip "SAM 2.1" + + We have just supported the more accurate SAM2.1 model. Please give it a try! + # SAM 2: Segment Anything Model 2 SAM 2, the successor to Meta's [Segment Anything Model (SAM)](sam.md), is a cutting-edge tool designed for comprehensive object segmentation in both images and videos. It excels in handling complex visual data through a unified, promptable model architecture that supports real-time processing and zero-shot generalization. @@ -12,6 +16,17 @@ SAM 2, the successor to Meta's [Segment Anything Model (SAM)](sam.md), is a cutt ## Key Features +

+
+ +
+ Watch: How to Run Inference with Meta's SAM2 using Ultralytics | Step-by-Step Guide ๐ŸŽ‰ +

+ ### Unified Model Architecture SAM 2 combines the capabilities of image and video segmentation in a single model. This unification simplifies deployment and allows for consistent performance across different media types. It leverages a flexible prompt-based interface, enabling users to specify objects of interest through various prompt types, such as points, bounding boxes, or masks. @@ -54,7 +69,7 @@ SAM 2 sets a new benchmark in the field, outperforming previous models on variou - **Memory Mechanism**: Includes a memory encoder, memory bank, and memory attention module. These components collectively store and utilize information from past frames, enabling the model to maintain consistent object tracking over time. - **Mask Decoder**: Generates the final segmentation masks based on the encoded image features and prompts. In video, it also uses memory context to ensure accurate tracking across frames. -![SAM 2 Architecture Diagram](https://github.com/facebookresearch/segment-anything-2/blob/main/assets/model_diagram.png) +![SAM 2 Architecture Diagram](https://raw.githubusercontent.com/facebookresearch/sam2/refs/heads/main/assets/model_diagram.png) ### Memory Mechanism and Occlusion Handling @@ -103,12 +118,16 @@ pip install ultralytics The following table details the available SAM 2 models, their pre-trained weights, supported tasks, and compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). -| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | -| ----------- | ------------------------------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ | -| SAM 2 tiny | [sam2_t.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_t.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | -| SAM 2 small | [sam2_s.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_s.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | -| SAM 2 base | [sam2_b.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_b.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | -| SAM 2 large | [sam2_l.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_l.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | +| ------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ | +| SAM 2 tiny | [sam2_t.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_t.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2 small | [sam2_s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_s.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2 base | [sam2_b.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_b.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2 large | [sam2_l.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_l.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2.1 tiny | [sam2.1_t.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_t.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2.1 small | [sam2.1_s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_s.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2.1 base | [sam2.1_b.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_b.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | +| SAM 2.1 large | [sam2.1_l.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_l.pt) | [Instance Segmentation](../tasks/segment.md) | โœ… | โŒ | โŒ | โŒ | ### SAM 2 Prediction Examples @@ -126,16 +145,25 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide from ultralytics import SAM # Load a model - model = SAM("sam2_b.pt") + model = SAM("sam2.1_b.pt") # Display model information (optional) model.info() - # Segment with bounding box prompt + # Run inference with bboxes prompt results = model("path/to/image.jpg", bboxes=[100, 100, 200, 200]) - # Segment with point prompt - results = model("path/to/image.jpg", points=[150, 150], labels=[1]) + # Run inference with single point + results = model(points=[900, 370], labels=[1]) + + # Run inference with multiple points + results = model(points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Run inference with multiple points prompt per object + results = model(points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Run inference with negative points prompt + results = model(points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` #### Segment Everything @@ -150,7 +178,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide from ultralytics import SAM # Load a model - model = SAM("sam2_b.pt") + model = SAM("sam2.1_b.pt") # Display model information (optional) model.info() @@ -163,7 +191,35 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide ```bash # Run inference with a SAM 2 model - yolo predict model=sam2_b.pt source=path/to/video.mp4 + yolo predict model=sam2.1_b.pt source=path/to/video.mp4 + ``` + +#### Segment Video and Track objects + +!!! example "Segment Video" + + Segment the entire video content with specific prompts and track objects. + + === "Python" + + ```python + from ultralytics.models.sam import SAM2VideoPredictor + + # Create SAM2VideoPredictor + overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model="sam2_b.pt") + predictor = SAM2VideoPredictor(overrides=overrides) + + # Run inference with single point + results = predictor(source="test.mp4", points=[920, 470], labels=1) + + # Run inference with multiple points + results = predictor(source="test.mp4", points=[[920, 470], [909, 138]], labels=[1, 1]) + + # Run inference with multiple points prompt per object + results = predictor(source="test.mp4", points=[[[920, 470], [909, 138]]], labels=[[1, 1]]) + + # Run inference with negative points prompt + results = predictor(source="test.mp4", points=[[[920, 470], [909, 138]]], labels=[[1, 0]]) ``` - This example demonstrates how SAM 2 can be used to segment the entire content of an image or video if no prompts (bboxes/points/masks) are provided. @@ -183,7 +239,7 @@ Here we compare Meta's smallest SAM 2 model, SAM2-t, with Ultralytics smallest s This comparison shows the order-of-magnitude differences in the model sizes and speeds between models. Whereas SAM presents unique capabilities for automatic segmenting, it is not a direct competitor to YOLOv8 segment models, which are smaller, faster and more efficient. -Tests run on a 2023 Apple M2 Macbook with 16GB of RAM using `torch==2.3.1` and `ultralytics==8.3.82`. To reproduce this test: +Tests run on a 2023 Apple M2 Macbook with 16GB of RAM using `torch==2.3.1` and `ultralytics==8.2.82`. To reproduce this test: !!! example @@ -215,6 +271,17 @@ Auto-annotation is a powerful feature of SAM 2, enabling users to generate segme ### How to Auto-Annotate with SAM 2 +

+
+ +
+ Watch: Auto Annotation with Meta's Segment Anything 2 Model using Ultralytics | Data Labeling +

+ To auto-annotate your dataset using SAM 2, follow this example: !!! example "Auto-Annotation Example" @@ -222,16 +289,10 @@ To auto-annotate your dataset using SAM 2, follow this example: ```python from ultralytics.data.annotator import auto_annotate - auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model="sam2_b.pt") + auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="sam2_b.pt") ``` -| Argument | Type | Description | Default | -| ------------ | ----------------------- | ------------------------------------------------------------------------------------------------------- | -------------- | -| `data` | `str` | Path to a folder containing images to be annotated. | | -| `det_model` | `str`, optional | Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'. | `'yolov8x.pt'` | -| `sam_model` | `str`, optional | Pre-trained SAM 2 segmentation model. Defaults to 'sam2_b.pt'. | `'sam2_b.pt'` | -| `device` | `str`, optional | Device to run the models on. Defaults to an empty string (CPU or GPU, if available). | | -| `output_dir` | `str`, `None`, optional | Directory to save the annotated results. Defaults to a 'labels' folder in the same directory as 'data'. | `None` | +{% include "macros/sam-auto-annotate.md" %} This function facilitates the rapid creation of high-quality segmentation datasets, ideal for researchers and developers aiming to accelerate their projects. diff --git a/docs/en/models/sam.md b/docs/en/models/sam.md index 304fc002874..15b6b442752 100644 --- a/docs/en/models/sam.md +++ b/docs/en/models/sam.md @@ -58,8 +58,17 @@ The Segment Anything Model can be employed for a multitude of downstream tasks t # Run inference with bboxes prompt results = model("ultralytics/assets/zidane.jpg", bboxes=[439, 437, 524, 709]) - # Run inference with points prompt - results = model("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + # Run inference with single point + results = model(points=[900, 370], labels=[1]) + + # Run inference with multiple points + results = model(points=[[400, 370], [900, 370]], labels=[1, 1]) + + # Run inference with multiple points prompt per object + results = model(points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + + # Run inference with negative points prompt + results = model(points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` !!! example "Segment everything" @@ -107,8 +116,16 @@ The Segment Anything Model can be employed for a multitude of downstream tasks t predictor.set_image("ultralytics/assets/zidane.jpg") # set with image file predictor.set_image(cv2.imread("ultralytics/assets/zidane.jpg")) # set with np.ndarray results = predictor(bboxes=[439, 437, 524, 709]) + + # Run inference with single point prompt results = predictor(points=[900, 370], labels=[1]) + # Run inference with multiple points prompt + results = predictor(points=[[400, 370], [900, 370]], labels=[[1, 1]]) + + # Run inference with negative points prompt + results = predictor(points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) + # Reset image predictor.reset_image() ``` @@ -188,16 +205,10 @@ To auto-annotate your dataset with the Ultralytics framework, use the `auto_anno ```python from ultralytics.data.annotator import auto_annotate - auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model="sam_b.pt") + auto_annotate(data="path/to/images", det_model="yolo11x.pt", sam_model="sam_b.pt") ``` -| Argument | Type | Description | Default | -| ------------ | --------------------- | ------------------------------------------------------------------------------------------------------- | -------------- | -| `data` | `str` | Path to a folder containing images to be annotated. | | -| `det_model` | `str`, optional | Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'. | `'yolov8x.pt'` | -| `sam_model` | `str`, optional | Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'. | `'sam_b.pt'` | -| `device` | `str`, optional | Device to run the models on. Defaults to an empty string (CPU or GPU, if available). | | -| `output_dir` | `str`, None, optional | Directory to save the annotated results. Defaults to a 'labels' folder in the same directory as 'data'. | `None` | +{% include "macros/sam-auto-annotate.md" %} The `auto_annotate` function takes the path to your images, with optional arguments for specifying the pre-trained detection and SAM segmentation models, the device to run the models on, and the output directory for saving the annotated results. @@ -245,6 +256,15 @@ model("ultralytics/assets/zidane.jpg", bboxes=[439, 437, 524, 709]) # Segment with points prompt model("ultralytics/assets/zidane.jpg", points=[900, 370], labels=[1]) + +# Segment with multiple points prompt +model("ultralytics/assets/zidane.jpg", points=[[400, 370], [900, 370]], labels=[[1, 1]]) + +# Segment with multiple points prompt per object +model("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 1]]) + +# Segment with negative points prompt. +model("ultralytics/assets/zidane.jpg", points=[[[400, 370], [900, 370]]], labels=[[1, 0]]) ``` Alternatively, you can run inference with SAM in the command line interface (CLI): diff --git a/docs/en/models/yolo-nas.md b/docs/en/models/yolo-nas.md index 5523cb1b322..394bc831973 100644 --- a/docs/en/models/yolo-nas.md +++ b/docs/en/models/yolo-nas.md @@ -149,7 +149,8 @@ YOLO-NAS introduces several key features that make it a superior choice for obje - **Quantization-Friendly Basic Block:** Enhanced architecture that improves model performance with minimal [precision](https://www.ultralytics.com/glossary/precision) drop post quantization. - **Sophisticated Training and Quantization:** Employs advanced training schemes and post-training quantization techniques. - **AutoNAC Optimization and Pre-training:** Utilizes AutoNAC optimization and is pre-trained on prominent datasets like COCO, Objects365, and Roboflow 100. - These features contribute to its high accuracy, efficient performance, and suitability for deployment in production environments. Learn more in the [Key Features](#key-features) section. + +These features contribute to its high accuracy, efficient performance, and suitability for deployment in production environments. Learn more in the [Key Features](#key-features) section. ### Which tasks and modes are supported by YOLO-NAS models? diff --git a/docs/en/models/yolo-world.md b/docs/en/models/yolo-world.md index 96a6e0b6068..2831fff638b 100644 --- a/docs/en/models/yolo-world.md +++ b/docs/en/models/yolo-world.md @@ -320,7 +320,7 @@ This approach provides a powerful means of customizing state-of-the-art object d ## Citations and Acknowledgements -We extend our gratitude to the [Tencent AILab Computer Vision Center](https://ai.tencent.com/) for their pioneering work in real-time open-vocabulary object detection with YOLO-World: +We extend our gratitude to the [Tencent AILab Computer Vision Center](https://www.tencent.com/) for their pioneering work in real-time open-vocabulary object detection with YOLO-World: !!! quote "" @@ -335,7 +335,7 @@ We extend our gratitude to the [Tencent AILab Computer Vision Center](https://ai } ``` -For further reading, the original YOLO-World paper is available on [arXiv](https://arxiv.org/pdf/2401.17270v2.pdf). The project's source code and additional resources can be accessed via their [GitHub repository](https://github.com/AILab-CVC/YOLO-World). We appreciate their commitment to advancing the field and sharing their valuable insights with the community. +For further reading, the original YOLO-World paper is available on [arXiv](https://arxiv.org/pdf/2401.17270v2). The project's source code and additional resources can be accessed via their [GitHub repository](https://github.com/AILab-CVC/YOLO-World). We appreciate their commitment to advancing the field and sharing their valuable insights with the community. ## FAQ diff --git a/docs/en/models/yolo11.md b/docs/en/models/yolo11.md index fcda8726c12..05761521751 100644 --- a/docs/en/models/yolo11.md +++ b/docs/en/models/yolo11.md @@ -8,19 +8,27 @@ keywords: YOLO11, state-of-the-art object detection, YOLO series, Ultralytics, c ## Overview -YOLO11 is the latest iteration in the [Ultralytics](https://www.ultralytics.com) YOLO series of real-time object detectors, redefining what's possible with cutting-edge [accuracy](https://www.ultralytics.com/glossary/accuracy), speed, and efficiency. Building upon the impressive advancements of previous YOLO versions, YOLO11 introduces significant improvements in architecture and training methods, making it a versatile choice for a wide range of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. +YOLO11 is the latest iteration in the [Ultralytics](https://www.ultralytics.com/) YOLO series of real-time object detectors, redefining what's possible with cutting-edge [accuracy](https://www.ultralytics.com/glossary/accuracy), speed, and efficiency. Building upon the impressive advancements of previous YOLO versions, YOLO11 introduces significant improvements in architecture and training methods, making it a versatile choice for a wide range of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks. -![Ultralytics YOLO11 Comparison Plots](https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845) +![Ultralytics YOLO11 Comparison Plots](https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png) + +
+ +

Ultralytics YOLO11 ๐Ÿš€ Podcast generated by NotebookLM

+


-
- Watch: Ultralytics YOLO11 Announcement at YOLO Vision 2024 + Watch: How to Use Ultralytics YOLO11 for Object Detection and Tracking | How to Benchmark | YOLO11 RELEASED๐Ÿš€

## Key Features @@ -47,67 +55,52 @@ This table provides an overview of the YOLO11 model variants, showcasing their a ## Performance Metrics + + + + + !!! performance === "Detection (COCO)" See [Detection Docs](../tasks/detect.md) for usage examples with these models trained on [COCO](../datasets/detect/coco.md), which include 80 pre-trained classes. - | Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | - | ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | --------------------------------------- | ------------------ | ----------------- | - | [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | 640 | 39.5 | 56.12 ยฑ 0.82 ms | 1.55 ยฑ 0.01 ms | 2.6 | 6.5 | - | [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | 640 | 47.0 | 90.01 ยฑ 1.17 ms | 2.46 ยฑ 0.00 ms | 9.4 | 21.5 | - | [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | 640 | 51.5 | 183.20 ยฑ 2.04 ms | 4.70 ยฑ 0.06 ms | 20.1 | 68.0 | - | [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | 640 | 53.4 | 238.64 ยฑ 1.39 ms | 6.16 ยฑ 0.08 ms | 25.3 | 86.9 | - | [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | 640 | 54.7 | 462.78 ยฑ 6.66 ms | 11.31 ยฑ 0.24 ms | 56.9 | 194.9 | +{% filter indent(width=8, first=False, blank=True) %} +{% include "macros/yolo-det-perf.md" %} +{% endfilter %} === "Segmentation (COCO)" See [Segmentation Docs](../tasks/segment.md) for usage examples with these models trained on [COCO](../datasets/segment/coco.md), which include 80 pre-trained classes. - | Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | - | -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | --------------------------------------- | ------------------ | ----------------- | - | [YOLO11n-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-seg.pt) | 640 | 38.9 | 32.0 | 65.90 ยฑ 1.14 ms | 1.84 ยฑ 0.00 ms | 2.9 | 10.4 | - | [YOLO11s-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-seg.pt) | 640 | 46.6 | 37.8 | 117.56 ยฑ 4.89 ms | 2.94 ยฑ 0.01 ms | 10.1 | 35.5 | - | [YOLO11m-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-seg.pt) | 640 | 51.5 | 41.5 | 281.63 ยฑ 1.16 ms | 6.31 ยฑ 0.09 ms | 22.4 | 123.3 | - | [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640 | 53.4 | 42.9 | 344.16 ยฑ 3.17 ms | 7.78 ยฑ 0.16 ms | 27.6 | 142.2 | - | [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640 | 54.7 | 43.8 | 664.50 ยฑ 3.24 ms | 15.75 ยฑ 0.67 ms | 62.1 | 319.0 | +{% filter indent(width=8, first=False, blank=True) %} +{% include "macros/yolo-seg-perf.md" %} +{% endfilter %} === "Classification (ImageNet)" See [Classification Docs](../tasks/classify.md) for usage examples with these models trained on [ImageNet](../datasets/classify/imagenet.md), which include 1000 pre-trained classes. - | Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) at 640 | - | -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | --------------------------------------- | ------------------ | ------------------------ | - | [YOLO11n-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt) | 224 | 70.0 | 89.4 | 5.03 ยฑ 0.32 ms | 1.10 ยฑ 0.01 ms | 1.6 | 3.3 | - | [YOLO11s-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt) | 224 | 75.4 | 92.7 | 7.89 ยฑ 0.18 ms | 1.34 ยฑ 0.01 ms | 5.5 | 12.1 | - | [YOLO11m-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-cls.pt) | 224 | 77.3 | 93.9 | 17.17 ยฑ 0.40 ms | 1.95 ยฑ 0.00 ms | 10.4 | 39.3 | - | [YOLO11l-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-cls.pt) | 224 | 78.3 | 94.3 | 23.17 ยฑ 0.29 ms | 2.76 ยฑ 0.00 ms | 12.9 | 49.4 | - | [YOLO11x-cls](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-cls.pt) | 224 | 79.5 | 94.9 | 41.41 ยฑ 0.94 ms | 3.82 ยฑ 0.00 ms | 28.4 | 110.4 | +{% filter indent(width=8, first=False, blank=True) %} +{% include "macros/yolo-cls-perf.md" %} +{% endfilter %} === "Pose (COCO)" See [Pose Estimation Docs](../tasks/pose.md) for usage examples with these models trained on [COCO](../datasets/pose/coco.md), which include 1 pre-trained class, 'person'. - | Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | - | ---------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | --------------------------------------- | ------------------ | ----------------- | - | [YOLO11n-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt) | 640 | 50.0 | 81.0 | 52.40 ยฑ 0.51 ms | 1.72 ยฑ 0.01 ms | 2.9 | 7.6 | - | [YOLO11s-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-pose.pt) | 640 | 58.9 | 86.3 | 90.54 ยฑ 0.59 ms | 2.57 ยฑ 0.00 ms | 9.9 | 23.2 | - | [YOLO11m-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-pose.pt) | 640 | 64.9 | 89.4 | 187.28 ยฑ 0.77 ms | 4.94 ยฑ 0.05 ms | 20.9 | 71.7 | - | [YOLO11l-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-pose.pt) | 640 | 66.1 | 89.9 | 247.69 ยฑ 1.10 ms | 6.42 ยฑ 0.13 ms | 26.2 | 90.7 | - | [YOLO11x-pose](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-pose.pt) | 640 | 69.5 | 91.1 | 487.97 ยฑ 13.91 ms | 12.06 ยฑ 0.20 ms | 58.8 | 203.3 | +{% filter indent(width=8, first=False, blank=True) %} +{% include "macros/yolo-pose-perf.md" %} +{% endfilter %} === "OBB (DOTAv1)" See [Oriented Detection Docs](../tasks/obb.md) for usage examples with these models trained on [DOTAv1](../datasets/obb/dota-v2.md#dota-v10), which include 15 pre-trained classes. - | Model | size
(pixels) | mAPtest
50 | Speed
CPU ONNX
(ms) | Speed
T4 TensorRT10
(ms) | params
(M) | FLOPs
(B) | - | -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | --------------------------------------- | ------------------ | ----------------- | - | [YOLO11n-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-obb.pt) | 1024 | 78.4 | 117.56 ยฑ 0.80 ms | 4.43 ยฑ 0.01 ms | 2.7 | 17.2 | - | [YOLO11s-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-obb.pt) | 1024 | 79.5 | 219.41 ยฑ 4.00 ms | 5.13 ยฑ 0.02 ms | 9.7 | 57.5 | - | [YOLO11m-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m-obb.pt) | 1024 | 80.9 | 562.81 ยฑ 2.87 ms | 10.07 ยฑ 0.38 ms | 20.9 | 183.5 | - | [YOLO11l-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-obb.pt) | 1024 | 81.0 | 712.49 ยฑ 4.98 ms | 13.46 ยฑ 0.55 ms | 26.2 | 232.0 | - | [YOLO11x-obb](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-obb.pt) | 1024 | 81.3 | 1408.63 ยฑ 7.67 ms | 28.59 ยฑ 0.96 ms | 58.8 | 520.2 | +{% filter indent(width=8, first=False, blank=True) %} +{% include "macros/yolo-obb-perf.md" %} +{% endfilter %} ## Usage Examples @@ -148,6 +141,10 @@ Note that the example below is for YOLO11 [Detect](../tasks/detect.md) models fo ## Citations and Acknowledgements +!!! tip "Ultralytics YOLO11 Publication" + + Ultralytics has not published a formal research paper for YOLO11 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com/). + If you use YOLO11 or any other software from this repository in your work, please cite it using the following format: !!! quote "" diff --git a/docs/en/models/yolov10.md b/docs/en/models/yolov10.md index 4cfd0cf9028..2f6de949db0 100644 --- a/docs/en/models/yolov10.md +++ b/docs/en/models/yolov10.md @@ -53,6 +53,11 @@ YOLOv10 comes in various model scales to cater to different application needs: ## Performance + + + + + YOLOv10 outperforms previous YOLO versions and other state-of-the-art models in terms of accuracy and efficiency. For example, YOLOv10-S is 1.8x faster than RT-DETR-R18 with similar AP on the COCO dataset, and YOLOv10-B has 46% less latency and 25% fewer parameters than YOLOv9-C with the same performance. | Model | Input Size | APval | FLOPs (G) | Latency (ms) | diff --git a/docs/en/models/yolov3.md b/docs/en/models/yolov3.md index 33e3cd8256e..d2d06031f83 100644 --- a/docs/en/models/yolov3.md +++ b/docs/en/models/yolov3.md @@ -4,7 +4,7 @@ description: Discover YOLOv3 and its variants YOLOv3-Ultralytics and YOLOv3u. Le keywords: YOLOv3, YOLOv3-Ultralytics, YOLOv3u, object detection, Ultralytics, computer vision, AI models, deep learning --- -# YOLOv3, YOLOv3-Ultralytics, and YOLOv3u +# YOLOv3, and YOLOv3u ## Overview @@ -12,9 +12,7 @@ This document presents an overview of three closely related object detection mod 1. **YOLOv3:** This is the third version of the You Only Look Once (YOLO) object detection algorithm. Originally developed by Joseph Redmon, YOLOv3 improved on its predecessors by introducing features such as multiscale predictions and three different sizes of detection kernels. -2. **YOLOv3-Ultralytics:** This is Ultralytics' implementation of the YOLOv3 model. It reproduces the original YOLOv3 architecture and offers additional functionalities, such as support for more pre-trained models and easier customization options. - -3. **YOLOv3u:** This is an updated version of YOLOv3-Ultralytics that incorporates the anchor-free, objectness-free split head used in YOLOv8 models. YOLOv3u maintains the same backbone and neck architecture as YOLOv3 but with the updated detection head from YOLOv8. +2. **YOLOv3u:** This is an updated version of YOLOv3-Ultralytics that incorporates the anchor-free, objectness-free split head used in YOLOv8 models. YOLOv3u maintains the same backbone and neck architecture as YOLOv3 but with the updated detection head from YOLOv8. ![Ultralytics YOLOv3](https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov3-banner.avif) @@ -22,23 +20,21 @@ This document presents an overview of three closely related object detection mod - **YOLOv3:** Introduced the use of three different scales for detection, leveraging three different sizes of detection kernels: 13x13, 26x26, and 52x52. This significantly improved detection accuracy for objects of different sizes. Additionally, YOLOv3 added features such as multi-label predictions for each [bounding box](https://www.ultralytics.com/glossary/bounding-box) and a better feature extractor network. -- **YOLOv3-Ultralytics:** Ultralytics' implementation of YOLOv3 provides the same performance as the original model but comes with added support for more pre-trained models, additional training methods, and easier customization options. This makes it more versatile and user-friendly for practical applications. - - **YOLOv3u:** This updated model incorporates the anchor-free, objectness-free split head from YOLOv8. By eliminating the need for pre-defined anchor boxes and objectness scores, this detection head design can improve the model's ability to detect objects of varying sizes and shapes. This makes YOLOv3u more robust and accurate for object detection tasks. ## Supported Tasks and Modes -The YOLOv3 series, including YOLOv3, YOLOv3-Ultralytics, and YOLOv3u, are designed specifically for object detection tasks. These models are renowned for their effectiveness in various real-world scenarios, balancing accuracy and speed. Each variant offers unique features and optimizations, making them suitable for a range of applications. +YOLOv3 is designed specifically for object detection tasks. Ultralytics supports three variants of YOLOv3: `yolov3u`, `yolov3-tinyu` and `yolov3-sppu`. The `u` in the name signifies that these utilize the anchor-free head of YOLOv8, unlike their original architecture which is anchor-based. These models are renowned for their effectiveness in various real-world scenarios, balancing accuracy and speed. Each variant offers unique features and optimizations, making them suitable for a range of applications. All three models support a comprehensive set of modes, ensuring versatility in various stages of [model deployment](https://www.ultralytics.com/glossary/model-deployment) and development. These modes include [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md), providing users with a complete toolkit for effective object detection. -| Model Type | Tasks Supported | Inference | Validation | Training | Export | -| ------------------ | -------------------------------------- | --------- | ---------- | -------- | ------ | -| YOLOv3 | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv3-Ultralytics | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv3u | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| Model Type | Pre-Trained Weights | Tasks Supported | Inference | Validation | Training | Export | +| -------------- | ------------------- | -------------------------------------- | --------- | ---------- | -------- | ------ | +| YOLOv3(u) | `yolov3u.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv3-Tiny(u) | `yolov3-tinyu.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv3u-SPP(u) | `yolov3-sppu.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -This table provides an at-a-glance view of the capabilities of each YOLOv3 variant, highlighting their versatility and suitability for various tasks and operational modes in object detection workflows. +## This table provides an at-a-glance view of the capabilities of each YOLOv3 variant, highlighting their versatility and suitability for various tasks and operational modes in object detection workflows. ## Usage Examples @@ -53,8 +49,8 @@ This example provides simple YOLOv3 training and inference examples. For full do ```python from ultralytics import YOLO - # Load a COCO-pretrained YOLOv3n model - model = YOLO("yolov3n.pt") + # Load a COCO-pretrained YOLOv3u model + model = YOLO("yolov3u.pt") # Display model information (optional) model.info() @@ -62,7 +58,7 @@ This example provides simple YOLOv3 training and inference examples. For full do # Train the model on the COCO8 example dataset for 100 epochs results = model.train(data="coco8.yaml", epochs=100, imgsz=640) - # Run inference with the YOLOv3n model on the 'bus.jpg' image + # Run inference with the YOLOv3u model on the 'bus.jpg' image results = model("path/to/bus.jpg") ``` @@ -71,11 +67,11 @@ This example provides simple YOLOv3 training and inference examples. For full do CLI commands are available to directly run the models: ```bash - # Load a COCO-pretrained YOLOv3n model and train it on the COCO8 example dataset for 100 epochs - yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + # Load a COCO-pretrained YOLOv3u model and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov3u.pt data=coco8.yaml epochs=100 imgsz=640 - # Load a COCO-pretrained YOLOv3n model and run inference on the 'bus.jpg' image - yolo predict model=yolov3n.pt source=path/to/bus.jpg + # Load a COCO-pretrained YOLOv3u model and run inference on the 'bus.jpg' image + yolo predict model=yolov3u.pt source=path/to/bus.jpg ``` ## Citations and Acknowledgements @@ -114,8 +110,8 @@ Training a YOLOv3 model with Ultralytics is straightforward. You can train the m ```python from ultralytics import YOLO - # Load a COCO-pretrained YOLOv3n model - model = YOLO("yolov3n.pt") + # Load a COCO-pretrained YOLOv3u model + model = YOLO("yolov3u.pt") # Train the model on the COCO8 example dataset for 100 epochs results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -124,8 +120,8 @@ Training a YOLOv3 model with Ultralytics is straightforward. You can train the m === "CLI" ```bash - # Load a COCO-pretrained YOLOv3n model and train it on the COCO8 example dataset for 100 epochs - yolo train model=yolov3n.pt data=coco8.yaml epochs=100 imgsz=640 + # Load a COCO-pretrained YOLOv3u model and train it on the COCO8 example dataset for 100 epochs + yolo train model=yolov3u.pt data=coco8.yaml epochs=100 imgsz=640 ``` For more comprehensive training options and guidelines, visit our [Train mode documentation](../modes/train.md). @@ -145,25 +141,25 @@ You can perform inference using YOLOv3 models by either Python scripts or CLI co ```python from ultralytics import YOLO - # Load a COCO-pretrained YOLOv3n model - model = YOLO("yolov3n.pt") + # Load a COCO-pretrained YOLOv3u model + model = YOLO("yolov3u.pt") - # Run inference with the YOLOv3n model on the 'bus.jpg' image + # Run inference with the YOLOv3u model on the 'bus.jpg' image results = model("path/to/bus.jpg") ``` === "CLI" ```bash - # Load a COCO-pretrained YOLOv3n model and run inference on the 'bus.jpg' image - yolo predict model=yolov3n.pt source=path/to/bus.jpg + # Load a COCO-pretrained YOLOv3u model and run inference on the 'bus.jpg' image + yolo predict model=yolov3u.pt source=path/to/bus.jpg ``` Refer to the [Inference mode documentation](../modes/predict.md) for more details on running YOLO models. ### What tasks are supported by YOLOv3 and its variants? -YOLOv3, YOLOv3-Ultralytics, and YOLOv3u primarily support object detection tasks. These models can be used for various stages of model deployment and development, such as Inference, Validation, Training, and Export. For a comprehensive set of tasks supported and more in-depth details, visit our [Object Detection tasks documentation](../tasks/detect.md). +YOLOv3, YOLOv3-Tiny and YOLOv3-SPP primarily support object detection tasks. These models can be used for various stages of model deployment and development, such as Inference, Validation, Training, and Export. For a comprehensive set of tasks supported and more in-depth details, visit our [Object Detection tasks documentation](../tasks/detect.md). ### Where can I find resources to cite YOLOv3 in my research? diff --git a/docs/en/models/yolov5.md b/docs/en/models/yolov5.md index 8ff1c36ec05..2e4f80cd645 100644 --- a/docs/en/models/yolov5.md +++ b/docs/en/models/yolov5.md @@ -4,7 +4,7 @@ description: Explore YOLOv5u, an advanced object detection model with optimized keywords: YOLOv5, YOLOv5u, object detection, Ultralytics, anchor-free, pre-trained models, accuracy, speed, real-time detection --- -# YOLOv5 +# Ultralytics YOLOv5 ## Overview @@ -32,6 +32,11 @@ This table provides a detailed overview of the YOLOv5u model variants, highlight ## Performance Metrics + + + + + !!! performance === "Detection" @@ -92,6 +97,10 @@ This example provides simple YOLOv5 training and inference examples. For full do ## Citations and Acknowledgements +!!! tip "Ultralytics YOLOv5 Publication" + + Ultralytics has not published a formal research paper for YOLOv5 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com/). + If you use YOLOv5 or YOLOv5u in your research, please cite the Ultralytics YOLOv5 repository as follows: !!! quote "" diff --git a/docs/en/models/yolov6.md b/docs/en/models/yolov6.md index c41b40c8389..a4b5e132647 100644 --- a/docs/en/models/yolov6.md +++ b/docs/en/models/yolov6.md @@ -8,7 +8,7 @@ keywords: Meituan YOLOv6, object detection, real-time applications, BiC module, ## Overview -[Meituan](https://about.meituan.com/) YOLOv6 is a cutting-edge object detector that offers remarkable balance between speed and accuracy, making it a popular choice for real-time applications. This model introduces several notable enhancements on its architecture and training scheme, including the implementation of a Bi-directional Concatenation (BiC) module, an anchor-aided training (AAT) strategy, and an improved backbone and neck design for state-of-the-art accuracy on the COCO dataset. +[Meituan](https://www.meituan.com/) YOLOv6 is a cutting-edge object detector that offers remarkable balance between speed and accuracy, making it a popular choice for real-time applications. This model introduces several notable enhancements on its architecture and training scheme, including the implementation of a Bi-directional Concatenation (BiC) module, an anchor-aided training (AAT) strategy, and an improved backbone and neck design for state-of-the-art accuracy on the COCO dataset. ![Meituan YOLOv6](https://github.com/ultralytics/docs/releases/download/0/meituan-yolov6.avif) ![Model example image](https://github.com/ultralytics/docs/releases/download/0/yolov6-architecture-diagram.avif) **Overview of YOLOv6.** Model architecture diagram showing the redesigned network components and training strategies that have led to significant performance improvements. (a) The neck of YOLOv6 (N and S are shown). Note for M/L, RepBlocks is replaced with CSPStackRep. (b) The structure of a BiC module. (c) A SimCSPSPPF block. ([source](https://arxiv.org/pdf/2301.05586.pdf)). @@ -22,6 +22,11 @@ keywords: Meituan YOLOv6, object detection, real-time applications, BiC module, ## Performance Metrics + + + + + YOLOv6 provides various pre-trained models with different scales: - YOLOv6-N: 37.5% AP on COCO val2017 at 1187 FPS with NVIDIA T4 GPU. @@ -40,7 +45,7 @@ This example provides simple YOLOv6 training and inference examples. For full do === "Python" - [PyTorch](https://www.ultralytics.com/glossary/pytorch) pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python: + YOLOv6 `*.yaml` files can be passed to the `YOLO()` class to build the corresponding model in Python: ```python from ultralytics import YOLO @@ -74,13 +79,13 @@ This example provides simple YOLOv6 training and inference examples. For full do The YOLOv6 series offers a range of models, each optimized for high-performance [Object Detection](../tasks/detect.md). These models cater to varying computational needs and [accuracy](https://www.ultralytics.com/glossary/accuracy) requirements, making them versatile for a wide array of applications. -| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export | -| ---------- | ------------------- | -------------------------------------- | --------- | ---------- | -------- | ------ | -| YOLOv6-N | `yolov6-n.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv6-S | `yolov6-s.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv6-M | `yolov6-m.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv6-L | `yolov6-l.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv6-L6 | `yolov6-l6.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| Model | Filenames | Tasks | Inference | Validation | Training | Export | +| -------- | -------------- | -------------------------------------- | --------- | ---------- | -------- | ------ | +| YOLOv6-N | `yolov6n.yaml` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv6-S | `yolov6s.yaml` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv6-M | `yolov6m.yaml` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv6-L | `yolov6l.yaml` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv6-X | `yolov6x.yaml` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | This table provides a detailed overview of the YOLOv6 model variants, highlighting their capabilities in [object detection](https://www.ultralytics.com/glossary/object-detection) tasks and their compatibility with various operational modes such as [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). This comprehensive support ensures that users can fully leverage the capabilities of YOLOv6 models in a broad range of object detection scenarios. diff --git a/docs/en/models/yolov7.md b/docs/en/models/yolov7.md index 1ba9dc271b9..5eea0673d89 100644 --- a/docs/en/models/yolov7.md +++ b/docs/en/models/yolov7.md @@ -12,7 +12,14 @@ YOLOv7 is a state-of-the-art real-time object detector that surpasses all known ## Comparison of SOTA object detectors -From the results in the YOLO comparison table we know that the proposed method has the best speed-accuracy trade-off comprehensively. If we compare YOLOv7-tiny-SiLU with YOLOv5-N (r6.1), our method is 127 fps faster and 10.7% more accurate on AP. In addition, YOLOv7 has 51.4% AP at frame rate of 161 fps, while PPYOLOE-L with the same AP has only 78 fps frame rate. In terms of parameter usage, YOLOv7 is 41% less than PPYOLOE-L. If we compare YOLOv7-X with 114 fps inference speed to YOLOv5-L (r6.1) with 99 fps inference speed, YOLOv7-X can improve AP by 3.9%. If YOLOv7-X is compared with YOLOv5-X (r6.1) of similar scale, the inference speed of YOLOv7-X is 31 fps faster. In addition, in terms the amount of parameters and computation, YOLOv7-X reduces 22% of parameters and 8% of computation compared to YOLOv5-X (r6.1), but improves AP by 2.2% ([Source](https://arxiv.org/pdf/2207.02696.pdf)). +From the results in the YOLO comparison table we know that the proposed method has the best speed-accuracy trade-off comprehensively. If we compare YOLOv7-tiny-SiLU with YOLOv5-N (r6.1), our method is 127 fps faster and 10.7% more accurate on AP. In addition, YOLOv7 has 51.4% AP at frame rate of 161 fps, while PPYOLOE-L with the same AP has only 78 fps frame rate. In terms of parameter usage, YOLOv7 is 41% less than PPYOLOE-L. + + + + + + +If we compare YOLOv7-X with 114 fps inference speed to YOLOv5-L (r6.1) with 99 fps inference speed, YOLOv7-X can improve AP by 3.9%. If YOLOv7-X is compared with YOLOv5-X (r6.1) of similar scale, the inference speed of YOLOv7-X is 31 fps faster. In addition, in terms the amount of parameters and computation, YOLOv7-X reduces 22% of parameters and 8% of computation compared to YOLOv5-X (r6.1), but improves AP by 2.2% ([Source](https://arxiv.org/pdf/2207.02696)). | Model | Params
(M) | FLOPs
(G) | Size
(pixels) | FPS | APtest / val
50-95 | APtest
50 | APtest
75 | APtest
S | APtest
M | APtest
L | | --------------------- | ------------------ | ----------------- | --------------------- | ------- | -------------------------- | ----------------- | ----------------- | ---------------- | ---------------- | ---------------- | @@ -111,13 +118,13 @@ We would like to acknowledge the YOLOv7 authors for their significant contributi } ``` -The original YOLOv7 paper can be found on [arXiv](https://arxiv.org/pdf/2207.02696.pdf). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/WongKinYiu/yolov7). We appreciate their efforts in advancing the field and making their work accessible to the broader community. +The original YOLOv7 paper can be found on [arXiv](https://arxiv.org/pdf/2207.02696). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/WongKinYiu/yolov7). We appreciate their efforts in advancing the field and making their work accessible to the broader community. ## FAQ ### What is YOLOv7 and why is it considered a breakthrough in real-time [object detection](https://www.ultralytics.com/glossary/object-detection)? -YOLOv7 is a cutting-edge real-time object detection model that achieves unparalleled speed and accuracy. It surpasses other models, such as YOLOX, YOLOv5, and PPYOLOE, in both parameters usage and inference speed. YOLOv7's distinguishing features include its model re-parameterization and dynamic label assignment, which optimize its performance without increasing inference costs. For more technical details about its architecture and comparison metrics with other state-of-the-art object detectors, refer to the [YOLOv7 paper](https://arxiv.org/pdf/2207.02696.pdf). +YOLOv7 is a cutting-edge real-time object detection model that achieves unparalleled speed and accuracy. It surpasses other models, such as YOLOX, YOLOv5, and PPYOLOE, in both parameters usage and inference speed. YOLOv7's distinguishing features include its model re-parameterization and dynamic label assignment, which optimize its performance without increasing inference costs. For more technical details about its architecture and comparison metrics with other state-of-the-art object detectors, refer to the [YOLOv7 paper](https://arxiv.org/pdf/2207.02696). ### How does YOLOv7 improve on previous YOLO models like YOLOv4 and YOLOv5? @@ -151,4 +158,5 @@ YOLOv7 offers several key features that revolutionize real-time object detection - **Dynamic Label Assignment**: Uses a coarse-to-fine lead guided method to assign dynamic targets for outputs across different branches, improving accuracy. - **Extended and Compound Scaling**: Efficiently utilizes parameters and computation to scale the model for various real-time applications. - **Efficiency**: Reduces parameter count by 40% and computation by 50% compared to other state-of-the-art models while achieving faster inference speeds. - For further details on these features, see the [YOLOv7 Overview](#overview) section. + +For further details on these features, see the [YOLOv7 Overview](#overview) section. diff --git a/docs/en/models/yolov8.md b/docs/en/models/yolov8.md index 036cd305a1e..bf1d0b1b86b 100644 --- a/docs/en/models/yolov8.md +++ b/docs/en/models/yolov8.md @@ -8,7 +8,7 @@ keywords: YOLOv8, real-time object detection, YOLO series, Ultralytics, computer ## Overview -YOLOv8 is the latest iteration in the YOLO series of real-time object detectors, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduces new features and optimizations that make it an ideal choice for various [object detection](https://www.ultralytics.com/glossary/object-detection) tasks in a wide range of applications. +YOLOv8 was released by Ultralytic on January 10th, 2023, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduced new features and optimizations that make it an ideal choice for various [object detection](https://www.ultralytics.com/glossary/object-detection) tasks in a wide range of applications. ![Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/yolov8-comparison-plots.avif) @@ -48,6 +48,11 @@ This table provides an overview of the YOLOv8 model variants, highlighting their ## Performance Metrics + + + + + !!! performance === "Detection (COCO)" @@ -165,6 +170,10 @@ Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for obj ## Citations and Acknowledgements +!!! tip "Ultralytics YOLOv8 Publication" + + Ultralytics has not published a formal research paper for YOLOv8 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com/). + If you use the YOLOv8 model or any other software from this repository in your work, please cite it using the following format: !!! quote "" diff --git a/docs/en/models/yolov9.md b/docs/en/models/yolov9.md index 5c42a676767..bc109d5f5dc 100644 --- a/docs/en/models/yolov9.md +++ b/docs/en/models/yolov9.md @@ -86,6 +86,11 @@ By benchmarking, you can ensure that your model not only performs well in contro ## Performance on MS COCO Dataset + + + + + The performance of YOLOv9 on the [COCO dataset](../datasets/detect/coco.md) exemplifies its significant advancements in real-time object detection, setting new benchmarks across various model sizes. Table 1 presents a comprehensive comparison of state-of-the-art real-time object detectors, illustrating YOLOv9's superior efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy). **Table 1. Comparison of State-of-the-Art Real-Time Object Detectors** @@ -173,10 +178,10 @@ This example provides simple YOLOv9 training and inference examples. For full do The YOLOv9 series offers a range of models, each optimized for high-performance [Object Detection](../tasks/detect.md). These models cater to varying computational needs and accuracy requirements, making them versatile for a wide array of applications. -| Model | Filenames | Tasks | Inference | Validation | Training | Export | -| ---------- | ------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ | -| YOLOv9 | `yolov9t` `yolov9s` `yolov9m` `yolov9c.pt` `yolov9e.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | -| YOLOv9-seg | `yolov9c-seg.pt` `yolov9e-seg.pt` | [Instance Segmentation](../tasks/segment.md) | โœ… | โœ… | โœ… | โœ… | +| Model | Filenames | Tasks | Inference | Validation | Training | Export | +| ---------- | ---------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ | +| YOLOv9 | `yolov9t.pt` `yolov9s.pt` `yolov9m.pt` `yolov9c.pt` `yolov9e.pt` | [Object Detection](../tasks/detect.md) | โœ… | โœ… | โœ… | โœ… | +| YOLOv9-seg | `yolov9c-seg.pt` `yolov9e-seg.pt` | [Instance Segmentation](../tasks/segment.md) | โœ… | โœ… | โœ… | โœ… | This table provides a detailed overview of the YOLOv9 model variants, highlighting their capabilities in object detection tasks and their compatibility with various operational modes such as [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md). This comprehensive support ensures that users can fully leverage the capabilities of YOLOv9 models in a broad range of object detection scenarios. @@ -201,7 +206,7 @@ We would like to acknowledge the YOLOv9 authors for their significant contributi } ``` -The original YOLOv9 paper can be found on [arXiv](https://arxiv.org/pdf/2402.13616.pdf). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/WongKinYiu/yolov9). We appreciate their efforts in advancing the field and making their work accessible to the broader community. +The original YOLOv9 paper can be found on [arXiv](https://arxiv.org/pdf/2402.13616). The authors have made their work publicly available, and the codebase can be accessed on [GitHub](https://github.com/WongKinYiu/yolov9). We appreciate their efforts in advancing the field and making their work accessible to the broader community. ## FAQ diff --git a/docs/en/modes/benchmark.md b/docs/en/modes/benchmark.md index 209a0e03e07..14ebc540c1d 100644 --- a/docs/en/modes/benchmark.md +++ b/docs/en/modes/benchmark.md @@ -1,26 +1,37 @@ --- comments: true -description: Learn how to evaluate your YOLOv8 model's performance in real-world scenarios using benchmark mode. Optimize speed, accuracy, and resource allocation across export formats. -keywords: model benchmarking, YOLOv8, Ultralytics, performance evaluation, export formats, ONNX, TensorRT, OpenVINO, CoreML, TensorFlow, optimization, mAP50-95, inference time +description: Learn how to evaluate your YOLO11 model's performance in real-world scenarios using benchmark mode. Optimize speed, accuracy, and resource allocation across export formats. +keywords: model benchmarking, YOLO11, Ultralytics, performance evaluation, export formats, ONNX, TensorRT, OpenVINO, CoreML, TensorFlow, optimization, mAP50-95, inference time --- # Model Benchmarking with Ultralytics YOLO Ultralytics YOLO ecosystem and integrations +## Benchmark Visualization + +!!! tip "Refresh Browser" + + You may need to refresh the page to view the graphs correctly due to potential cookie issues. + + + + + + ## Introduction -Once your model is trained and validated, the next logical step is to evaluate its performance in various real-world scenarios. Benchmark mode in Ultralytics YOLOv8 serves this purpose by providing a robust framework for assessing the speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) of your model across a range of export formats. +Once your model is trained and validated, the next logical step is to evaluate its performance in various real-world scenarios. Benchmark mode in Ultralytics YOLO11 serves this purpose by providing a robust framework for assessing the speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) of your model across a range of export formats.


-
- Watch: Ultralytics Modes Tutorial: Benchmark + Watch: Benchmark Ultralytics YOLO11 Models | How to Compare Model Performance on Different Hardware?

## Why Is Benchmarking Crucial? @@ -50,7 +61,7 @@ Once your model is trained and validated, the next logical step is to evaluate i ## Usage Examples -Run YOLOv8n benchmarks on all supported export formats including ONNX, TensorRT etc. See Arguments section below for a full list of export arguments. +Run YOLO11n benchmarks on all supported export formats including ONNX, TensorRT etc. See Arguments section below for a full list of export arguments. !!! example @@ -60,32 +71,39 @@ Run YOLOv8n benchmarks on all supported export formats including ONNX, TensorRT from ultralytics.utils.benchmarks import benchmark # Benchmark on GPU - benchmark(model="yolov8n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) + benchmark(model="yolo11n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) + + # Benchmark specific export format + benchmark(model="yolo11n.pt", data="coco8.yaml", imgsz=640, format="onnx") ``` === "CLI" ```bash - yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + yolo benchmark model=yolo11n.pt data='coco8.yaml' imgsz=640 half=False device=0 + + # Benchmark specific export format + yolo benchmark model=yolo11n.pt data='coco8.yaml' imgsz=640 format=onnx ``` ## Arguments -Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` provide users with the flexibility to fine-tune the benchmarks to their specific needs and compare the performance of different export formats with ease. +Arguments such as `model`, `data`, `imgsz`, `half`, `device`, `verbose` and `format` provide users with the flexibility to fine-tune the benchmarks to their specific needs and compare the performance of different export formats with ease. | Key | Default Value | Description | | --------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `model` | `None` | Specifies the path to the model file. Accepts both `.pt` and `.yaml` formats, e.g., `"yolov8n.pt"` for pre-trained models or configuration files. | +| `model` | `None` | Specifies the path to the model file. Accepts both `.pt` and `.yaml` formats, e.g., `"yolo11n.pt"` for pre-trained models or configuration files. | | `data` | `None` | Path to a YAML file defining the dataset for benchmarking, typically including paths and settings for [validation data](https://www.ultralytics.com/glossary/validation-data). Example: `"coco8.yaml"`. | | `imgsz` | `640` | The input image size for the model. Can be a single integer for square images or a tuple `(width, height)` for non-square, e.g., `(640, 480)`. | | `half` | `False` | Enables FP16 (half-precision) inference, reducing memory usage and possibly increasing speed on compatible hardware. Use `half=True` to enable. | | `int8` | `False` | Activates INT8 quantization for further optimized performance on supported devices, especially useful for edge devices. Set `int8=True` to use. | -| `device` | `None` | Defines the computation device(s) for benchmarking, such as `"cpu"`, `"cuda:0"`, or a list of devices like `"cuda:0,1"` for multi-GPU setups. | +| `device` | `None` | Defines the computation device(s) for benchmarking, such as `"cpu"` or `"cuda:0"`. | | `verbose` | `False` | Controls the level of detail in logging output. A boolean value; set `verbose=True` for detailed logs or a float for thresholding errors. | +| `format` | `''` | Benchmark the model on a single export format. i.e `format=onnx` | ## Export Formats -Benchmarks will attempt to run automatically on all possible export formats below. +Benchmarks will attempt to run automatically on all possible export formats listed below. Alternatively, you can run benchmarks for a specific format by using the `format` argument, which accepts any of the formats mentioned below. {% include "macros/export-table.md" %} @@ -93,9 +111,9 @@ See full `export` details in the [Export](../modes/export.md) page. ## FAQ -### How do I benchmark my YOLOv8 model's performance using Ultralytics? +### How do I benchmark my YOLO11 model's performance using Ultralytics? -Ultralytics YOLOv8 offers a Benchmark mode to assess your model's performance across different export formats. This mode provides insights into key metrics such as [mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP50-95), accuracy, and inference time in milliseconds. To run benchmarks, you can use either Python or CLI commands. For example, to benchmark on a GPU: +Ultralytics YOLO11 offers a Benchmark mode to assess your model's performance across different export formats. This mode provides insights into key metrics such as [mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP50-95), accuracy, and inference time in milliseconds. To run benchmarks, you can use either Python or CLI commands. For example, to benchmark on a GPU: !!! example @@ -105,55 +123,59 @@ Ultralytics YOLOv8 offers a Benchmark mode to assess your model's performance ac from ultralytics.utils.benchmarks import benchmark # Benchmark on GPU - benchmark(model="yolov8n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) + benchmark(model="yolo11n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) ``` === "CLI" ```bash - yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + yolo benchmark model=yolo11n.pt data='coco8.yaml' imgsz=640 half=False device=0 ``` For more details on benchmark arguments, visit the [Arguments](#arguments) section. -### What are the benefits of exporting YOLOv8 models to different formats? +### What are the benefits of exporting YOLO11 models to different formats? -Exporting YOLOv8 models to different formats such as ONNX, TensorRT, and OpenVINO allows you to optimize performance based on your deployment environment. For instance: +Exporting YOLO11 models to different formats such as ONNX, TensorRT, and OpenVINO allows you to optimize performance based on your deployment environment. For instance: - **ONNX:** Provides up to 3x CPU speedup. - **TensorRT:** Offers up to 5x GPU speedup. - **OpenVINO:** Specifically optimized for Intel hardware. - These formats enhance both the speed and accuracy of your models, making them more efficient for various real-world applications. Visit the [Export](../modes/export.md) page for complete details. -### Why is benchmarking crucial in evaluating YOLOv8 models? +These formats enhance both the speed and accuracy of your models, making them more efficient for various real-world applications. Visit the [Export](../modes/export.md) page for complete details. + +### Why is benchmarking crucial in evaluating YOLO11 models? -Benchmarking your YOLOv8 models is essential for several reasons: +Benchmarking your YOLO11 models is essential for several reasons: - **Informed Decisions:** Understand the trade-offs between speed and accuracy. - **Resource Allocation:** Gauge the performance across different hardware options. - **Optimization:** Determine which export format offers the best performance for specific use cases. - **Cost Efficiency:** Optimize hardware usage based on benchmark results. - Key metrics such as mAP50-95, Top-5 accuracy, and inference time help in making these evaluations. Refer to the [Key Metrics](#key-metrics-in-benchmark-mode) section for more information. -### Which export formats are supported by YOLOv8, and what are their advantages? +Key metrics such as mAP50-95, Top-5 accuracy, and inference time help in making these evaluations. Refer to the [Key Metrics](#key-metrics-in-benchmark-mode) section for more information. + +### Which export formats are supported by YOLO11, and what are their advantages? -YOLOv8 supports a variety of export formats, each tailored for specific hardware and use cases: +YOLO11 supports a variety of export formats, each tailored for specific hardware and use cases: - **ONNX:** Best for CPU performance. - **TensorRT:** Ideal for GPU efficiency. - **OpenVINO:** Optimized for Intel hardware. - **CoreML & [TensorFlow](https://www.ultralytics.com/glossary/tensorflow):** Useful for iOS and general ML applications. - For a complete list of supported formats and their respective advantages, check out the [Supported Export Formats](#supported-export-formats) section. -### What arguments can I use to fine-tune my YOLOv8 benchmarks? +For a complete list of supported formats and their respective advantages, check out the [Supported Export Formats](#supported-export-formats) section. + +### What arguments can I use to fine-tune my YOLO11 benchmarks? When running benchmarks, several arguments can be customized to suit specific needs: -- **model:** Path to the model file (e.g., "yolov8n.pt"). +- **model:** Path to the model file (e.g., "yolo11n.pt"). - **data:** Path to a YAML file defining the dataset (e.g., "coco8.yaml"). - **imgsz:** The input image size, either as a single integer or a tuple. - **half:** Enable FP16 inference for better performance. - **int8:** Activate INT8 quantization for edge devices. - **device:** Specify the computation device (e.g., "cpu", "cuda:0"). - **verbose:** Control the level of logging detail. - For a full list of arguments, refer to the [Arguments](#arguments) section. + +For a full list of arguments, refer to the [Arguments](#arguments) section. diff --git a/docs/en/modes/export.md b/docs/en/modes/export.md index 706dd91cdc8..776d8264455 100644 --- a/docs/en/modes/export.md +++ b/docs/en/modes/export.md @@ -1,7 +1,7 @@ --- comments: true -description: Learn how to export your YOLOv8 model to various formats like ONNX, TensorRT, and CoreML. Achieve maximum compatibility and performance. -keywords: YOLOv8, Model Export, ONNX, TensorRT, CoreML, Ultralytics, AI, Machine Learning, Inference, Deployment +description: Learn how to export your YOLO11 model to various formats like ONNX, TensorRT, and CoreML. Achieve maximum compatibility and performance. +keywords: YOLO11, Model Export, ONNX, TensorRT, CoreML, Ultralytics, AI, Machine Learning, Inference, Deployment --- # Model Export with Ultralytics YOLO @@ -10,7 +10,7 @@ keywords: YOLOv8, Model Export, ONNX, TensorRT, CoreML, Ultralytics, AI, Machine ## Introduction -The ultimate goal of training a model is to deploy it for real-world applications. Export mode in Ultralytics YOLOv8 offers a versatile range of options for exporting your trained model to different formats, making it deployable across various platforms and devices. This comprehensive guide aims to walk you through the nuances of model exporting, showcasing how to achieve maximum compatibility and performance. +The ultimate goal of training a model is to deploy it for real-world applications. Export mode in Ultralytics YOLO11 offers a versatile range of options for exporting your trained model to different formats, making it deployable across various platforms and devices. This comprehensive guide aims to walk you through the nuances of model exporting, showcasing how to achieve maximum compatibility and performance.


@@ -20,10 +20,10 @@ The ultimate goal of training a model is to deploy it for real-world application allowfullscreen>
- Watch: How To Export Custom Trained Ultralytics YOLOv8 Model and Run Live Inference on Webcam. + Watch: How To Export Custom Trained Ultralytics YOLO Model and Run Live Inference on Webcam.

-## Why Choose YOLOv8's Export Mode? +## Why Choose YOLO11's Export Mode? - **Versatility:** Export to multiple formats including ONNX, TensorRT, CoreML, and more. - **Performance:** Gain up to 5x GPU speedup with TensorRT and 3x CPU speedup with ONNX or OpenVINO. @@ -46,7 +46,7 @@ Here are some of the standout functionalities: ## Usage Examples -Export a YOLOv8n model to a different format like ONNX or TensorRT. See the Arguments section below for a full list of export arguments. +Export a YOLO11n model to a different format like ONNX or TensorRT. See the Arguments section below for a full list of export arguments. !!! example @@ -56,7 +56,7 @@ Export a YOLOv8n model to a different format like ONNX or TensorRT. See the Argu from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -66,7 +66,7 @@ Export a YOLOv8n model to a different format like ONNX or TensorRT. See the Argu === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=yolo11n.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` @@ -80,15 +80,15 @@ Adjusting these parameters allows for customization of the export process to fit ## Export Formats -Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes. +Available YOLO11 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolo11n.onnx`. Usage examples are shown for your model after export completes. {% include "macros/export-table.md" %} ## FAQ -### How do I export a YOLOv8 model to ONNX format? +### How do I export a YOLO11 model to ONNX format? -Exporting a YOLOv8 model to ONNX format is straightforward with Ultralytics. It provides both Python and CLI methods for exporting models. +Exporting a YOLO11 model to ONNX format is straightforward with Ultralytics. It provides both Python and CLI methods for exporting models. !!! example @@ -98,7 +98,7 @@ Exporting a YOLOv8 model to ONNX format is straightforward with Ultralytics. It from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -108,7 +108,7 @@ Exporting a YOLOv8 model to ONNX format is straightforward with Ultralytics. It === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=yolo11n.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` @@ -116,7 +116,7 @@ For more details on the process, including advanced options like handling differ ### What are the benefits of using TensorRT for model export? -Using TensorRT for model export offers significant performance improvements. YOLOv8 models exported to TensorRT can achieve up to a 5x GPU speedup, making it ideal for real-time inference applications. +Using TensorRT for model export offers significant performance improvements. YOLO11 models exported to TensorRT can achieve up to a 5x GPU speedup, making it ideal for real-time inference applications. - **Versatility:** Optimize models for a specific hardware setup. - **Speed:** Achieve faster inference through advanced optimizations. @@ -124,7 +124,7 @@ Using TensorRT for model export offers significant performance improvements. YOL To learn more about integrating TensorRT, see the [TensorRT integration guide](../integrations/tensorrt.md). -### How do I enable INT8 quantization when exporting my YOLOv8 model? +### How do I enable INT8 quantization when exporting my YOLO11 model? INT8 quantization is an excellent way to compress the model and speed up inference, especially on edge devices. Here's how you can enable INT8 quantization: @@ -135,14 +135,14 @@ INT8 quantization is an excellent way to compress the model and speed up inferen ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") # Load a model - model.export(format="onnx", int8=True) + model = YOLO("yolo11n.pt") # Load a model + model.export(format="engine", int8=True) ``` === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx int8=True # export model with INT8 quantization + yolo export model=yolo11n.pt format=engine int8=True # export TensorRT model with INT8 quantization ``` INT8 quantization can be applied to various formats, such as TensorRT and CoreML. More details can be found in the [Export section](../modes/export.md). @@ -160,14 +160,14 @@ To enable this feature, use the `dynamic=True` flag during export: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.export(format="onnx", dynamic=True) ``` === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx dynamic=True + yolo export model=yolo11n.pt format=onnx dynamic=True ``` For additional context, refer to the [dynamic input size configuration](#arguments). diff --git a/docs/en/modes/index.md b/docs/en/modes/index.md index 2b56680efc0..ea303643d67 100644 --- a/docs/en/modes/index.md +++ b/docs/en/modes/index.md @@ -1,16 +1,16 @@ --- comments: true -description: Discover the diverse modes of Ultralytics YOLOv8, including training, validation, prediction, export, tracking, and benchmarking. Maximize model performance and efficiency. -keywords: Ultralytics, YOLOv8, machine learning, model training, validation, prediction, export, tracking, benchmarking, object detection +description: Discover the diverse modes of Ultralytics YOLO11, including training, validation, prediction, export, tracking, and benchmarking. Maximize model performance and efficiency. +keywords: Ultralytics, YOLO11, machine learning, model training, validation, prediction, export, tracking, benchmarking, object detection --- -# Ultralytics YOLOv8 Modes +# Ultralytics YOLO11 Modes Ultralytics YOLO ecosystem and integrations ## Introduction -Ultralytics YOLOv8 is not just another object detection model; it's a versatile framework designed to cover the entire lifecycle of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) modelsโ€”from data ingestion and model training to validation, deployment, and real-world tracking. Each mode serves a specific purpose and is engineered to offer you the flexibility and efficiency required for different tasks and use-cases. +Ultralytics YOLO11 is not just another object detection model; it's a versatile framework designed to cover the entire lifecycle of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) modelsโ€”from data ingestion and model training to validation, deployment, and real-world tracking. Each mode serves a specific purpose and is engineered to offer you the flexibility and efficiency required for different tasks and use-cases.


@@ -25,7 +25,7 @@ Ultralytics YOLOv8 is not just another object detection model; it's a versatile ### Modes at a Glance -Understanding the different **modes** that Ultralytics YOLOv8 supports is critical to getting the most out of your models: +Understanding the different **modes** that Ultralytics YOLO11 supports is critical to getting the most out of your models: - **Train** mode: Fine-tune your model on custom or preloaded datasets. - **Val** mode: A post-training checkpoint to validate model performance. @@ -34,49 +34,49 @@ Understanding the different **modes** that Ultralytics YOLOv8 supports is critic - **Track** mode: Extend your object detection model into real-time tracking applications. - **Benchmark** mode: Analyze the speed and accuracy of your model in diverse deployment environments. -This comprehensive guide aims to give you an overview and practical insights into each mode, helping you harness the full potential of YOLOv8. +This comprehensive guide aims to give you an overview and practical insights into each mode, helping you harness the full potential of YOLO11. ## [Train](train.md) -Train mode is used for training a YOLOv8 model on a custom dataset. In this mode, the model is trained using the specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can accurately predict the classes and locations of objects in an image. +Train mode is used for training a YOLO11 model on a custom dataset. In this mode, the model is trained using the specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can accurately predict the classes and locations of objects in an image. [Train Examples](train.md){ .md-button } ## [Val](val.md) -Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance. +Val mode is used for validating a YOLO11 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its accuracy and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance. [Val Examples](val.md){ .md-button } ## [Predict](predict.md) -Predict mode is used for making predictions using a trained YOLOv8 model on new images or videos. In this mode, the model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model predicts the classes and locations of objects in the input images or videos. +Predict mode is used for making predictions using a trained YOLO11 model on new images or videos. In this mode, the model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model predicts the classes and locations of objects in the input images or videos. [Predict Examples](predict.md){ .md-button } ## [Export](export.md) -Export mode is used for exporting a YOLOv8 model to a format that can be used for deployment. In this mode, the model is converted to a format that can be used by other software applications or hardware devices. This mode is useful when deploying the model to production environments. +Export mode is used for exporting a YOLO11 model to a format that can be used for deployment. In this mode, the model is converted to a format that can be used by other software applications or hardware devices. This mode is useful when deploying the model to production environments. [Export Examples](export.md){ .md-button } ## [Track](track.md) -Track mode is used for tracking objects in real-time using a YOLOv8 model. In this mode, the model is loaded from a checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful for applications such as surveillance systems or self-driving cars. +Track mode is used for tracking objects in real-time using a YOLO11 model. In this mode, the model is loaded from a checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful for applications such as surveillance systems or self-driving cars. [Track Examples](track.md){ .md-button } ## [Benchmark](benchmark.md) -Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation, and pose) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various formats like ONNX, OpenVINO, TensorRT, and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. +Benchmark mode is used to profile the speed and accuracy of various export formats for YOLO11. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation, and pose) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various formats like ONNX, OpenVINO, TensorRT, and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. [Benchmark Examples](benchmark.md){ .md-button } ## FAQ -### How do I train a custom [object detection](https://www.ultralytics.com/glossary/object-detection) model with Ultralytics YOLOv8? +### How do I train a custom [object detection](https://www.ultralytics.com/glossary/object-detection) model with Ultralytics YOLO11? -Training a custom object detection model with Ultralytics YOLOv8 involves using the train mode. You need a dataset formatted in YOLO format, containing images and corresponding annotation files. Use the following command to start the training process: +Training a custom object detection model with Ultralytics YOLO11 involves using the train mode. You need a dataset formatted in YOLO format, containing images and corresponding annotation files. Use the following command to start the training process: !!! example @@ -85,22 +85,25 @@ Training a custom object detection model with Ultralytics YOLOv8 involves using ```python from ultralytics import YOLO - # Train a custom model - model = YOLO("yolov8n.pt") + # Load a pre-trained YOLO model (you can choose n, s, m, l, or x versions) + model = YOLO("yolo11n.pt") + + # Start training on your custom dataset model.train(data="path/to/dataset.yaml", epochs=100, imgsz=640) ``` === "CLI" ```bash + # Train a YOLO model from the command line yolo train data=path/to/dataset.yaml epochs=100 imgsz=640 ``` For more detailed instructions, you can refer to the [Ultralytics Train Guide](../modes/train.md). -### What metrics does Ultralytics YOLOv8 use to validate the model's performance? +### What metrics does Ultralytics YOLO11 use to validate the model's performance? -Ultralytics YOLOv8 uses various metrics during the validation process to assess model performance. These include: +Ultralytics YOLO11 uses various metrics during the validation process to assess model performance. These include: - **mAP (mean Average Precision)**: This evaluates the accuracy of object detection. - **IOU (Intersection over Union)**: Measures the overlap between predicted and ground truth bounding boxes. @@ -115,22 +118,25 @@ You can run the following command to start the validation: ```python from ultralytics import YOLO - # Validate the model - model = YOLO("yolov8n.pt") + # Load a pre-trained or custom YOLO model + model = YOLO("yolo11n.pt") + + # Run validation on your dataset model.val(data="path/to/validation.yaml") ``` === "CLI" ```bash + # Validate a YOLO model from the command line yolo val data=path/to/validation.yaml ``` Refer to the [Validation Guide](../modes/val.md) for further details. -### How can I export my YOLOv8 model for deployment? +### How can I export my YOLO11 model for deployment? -Ultralytics YOLOv8 offers export functionality to convert your trained model into various deployment formats such as ONNX, TensorRT, CoreML, and more. Use the following example to export your model: +Ultralytics YOLO11 offers export functionality to convert your trained model into various deployment formats such as ONNX, TensorRT, CoreML, and more. Use the following example to export your model: !!! example @@ -139,22 +145,25 @@ Ultralytics YOLOv8 offers export functionality to convert your trained model int ```python from ultralytics import YOLO - # Export the model - model = YOLO("yolov8n.pt") + # Load your trained YOLO model + model = YOLO("yolo11n.pt") + + # Export the model to ONNX format (you can specify other formats as needed) model.export(format="onnx") ``` === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx + # Export a YOLO model to ONNX format from the command line + yolo export model=yolo11n.pt format=onnx ``` Detailed steps for each export format can be found in the [Export Guide](../modes/export.md). -### What is the purpose of the benchmark mode in Ultralytics YOLOv8? +### What is the purpose of the benchmark mode in Ultralytics YOLO11? -Benchmark mode in Ultralytics YOLOv8 is used to analyze the speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) of various export formats such as ONNX, TensorRT, and OpenVINO. It provides metrics like model size, `mAP50-95` for object detection, and inference time across different hardware setups, helping you choose the most suitable format for your deployment needs. +Benchmark mode in Ultralytics YOLO11 is used to analyze the speed and [accuracy](https://www.ultralytics.com/glossary/accuracy) of various export formats such as ONNX, TensorRT, and OpenVINO. It provides metrics like model size, `mAP50-95` for object detection, and inference time across different hardware setups, helping you choose the most suitable format for your deployment needs. !!! example @@ -163,21 +172,24 @@ Benchmark mode in Ultralytics YOLOv8 is used to analyze the speed and [accuracy] ```python from ultralytics.utils.benchmarks import benchmark - # Benchmark on GPU - benchmark(model="yolov8n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) + # Run benchmark on GPU (device 0) + # You can adjust parameters like model, dataset, image size, and precision as needed + benchmark(model="yolo11n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) ``` === "CLI" ```bash - yolo benchmark model=yolov8n.pt data='coco8.yaml' imgsz=640 half=False device=0 + # Benchmark a YOLO model from the command line + # Adjust parameters as needed for your specific use case + yolo benchmark model=yolo11n.pt data='coco8.yaml' imgsz=640 half=False device=0 ``` For more details, refer to the [Benchmark Guide](../modes/benchmark.md). -### How can I perform real-time object tracking using Ultralytics YOLOv8? +### How can I perform real-time object tracking using Ultralytics YOLO11? -Real-time object tracking can be achieved using the track mode in Ultralytics YOLOv8. This mode extends object detection capabilities to track objects across video frames or live feeds. Use the following example to enable tracking: +Real-time object tracking can be achieved using the track mode in Ultralytics YOLO11. This mode extends object detection capabilities to track objects across video frames or live feeds. Use the following example to enable tracking: !!! example @@ -186,14 +198,19 @@ Real-time object tracking can be achieved using the track mode in Ultralytics YO ```python from ultralytics import YOLO - # Track objects in a video - model = YOLO("yolov8n.pt") + # Load a pre-trained YOLO model + model = YOLO("yolo11n.pt") + + # Start tracking objects in a video + # You can also use live video streams or webcam input model.track(source="path/to/video.mp4") ``` === "CLI" ```bash + # Perform object tracking on a video from the command line + # You can specify different sources like webcam (0) or RTSP streams yolo track source=path/to/video.mp4 ``` diff --git a/docs/en/modes/predict.md b/docs/en/modes/predict.md index c2c738c54d2..622e67ace3a 100644 --- a/docs/en/modes/predict.md +++ b/docs/en/modes/predict.md @@ -1,7 +1,7 @@ --- comments: true -description: Harness the power of Ultralytics YOLOv8 for real-time, high-speed inference on various data sources. Learn about predict mode, key features, and practical applications. -keywords: Ultralytics, YOLOv8, model prediction, inference, predict mode, real-time inference, computer vision, machine learning, streaming, high performance +description: Harness the power of Ultralytics YOLO11 for real-time, high-speed inference on various data sources. Learn about predict mode, key features, and practical applications. +keywords: Ultralytics, YOLO11, model prediction, inference, predict mode, real-time inference, computer vision, machine learning, streaming, high performance --- # Model Prediction with Ultralytics YOLO @@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, model prediction, inference, predict mode, real-t ## Introduction -In the world of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), the process of making sense out of visual data is called 'inference' or 'prediction'. Ultralytics YOLOv8 offers a powerful feature known as **predict mode** that is tailored for high-performance, real-time inference on a wide range of data sources. +In the world of [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) and [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv), the process of making sense out of visual data is called 'inference' or 'prediction'. Ultralytics YOLO11 offers a powerful feature known as **predict mode** that is tailored for high-performance, real-time inference on a wide range of data sources.


@@ -20,7 +20,7 @@ In the world of [machine learning](https://www.ultralytics.com/glossary/machine- allowfullscreen>
- Watch: How to Extract the Outputs from Ultralytics YOLOv8 Model for Custom Projects. + Watch: How to Extract the Outputs from Ultralytics YOLO Model for Custom Projects.

## Real-world Applications @@ -32,7 +32,7 @@ In the world of [machine learning](https://www.ultralytics.com/glossary/machine- ## Why Use Ultralytics YOLO for Inference? -Here's why you should consider YOLOv8's predict mode for your various inference needs: +Here's why you should consider YOLO11's predict mode for your various inference needs: - **Versatility:** Capable of making inferences on images, videos, and even live streams. - **Performance:** Engineered for real-time, high-speed processing without sacrificing [accuracy](https://www.ultralytics.com/glossary/accuracy). @@ -41,7 +41,7 @@ Here's why you should consider YOLOv8's predict mode for your various inference ### Key Features of Predict Mode -YOLOv8's predict mode is designed to be robust and versatile, featuring: +YOLO11's predict mode is designed to be robust and versatile, featuring: - **Multiple Data Source Compatibility:** Whether your data is in the form of individual images, a collection of images, video files, or real-time video streams, predict mode has you covered. - **Streaming Mode:** Use the streaming feature to generate a memory-efficient generator of `Results` objects. Enable this by setting `stream=True` in the predictor's call method. @@ -58,7 +58,7 @@ Ultralytics YOLO models return either a Python list of `Results` objects, or a m from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # pretrained YOLOv8n model + model = YOLO("yolo11n.pt") # pretrained YOLO11n model # Run batched inference on a list of images results = model(["image1.jpg", "image2.jpg"]) # return a list of Results objects @@ -80,7 +80,7 @@ Ultralytics YOLO models return either a Python list of `Results` objects, or a m from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # pretrained YOLOv8n model + model = YOLO("yolo11n.pt") # pretrained YOLO11n model # Run batched inference on a list of images results = model(["image1.jpg", "image2.jpg"], stream=True) # return a generator of Results objects @@ -98,7 +98,7 @@ Ultralytics YOLO models return either a Python list of `Results` objects, or a m ## Inference Sources -YOLOv8 can process different types of input sources for inference, as shown in the table below. The sources include static images, video streams, and various data formats. The table also indicates whether each source can be used in streaming mode with the argument `stream=True` โœ…. Streaming mode is beneficial for processing videos or live streams as it creates a generator of results instead of loading all frames into memory. +YOLO11 can process different types of input sources for inference, as shown in the table below. The sources include static images, video streams, and various data formats. The table also indicates whether each source can be used in streaming mode with the argument `stream=True` โœ…. Streaming mode is beneficial for processing videos or live streams as it creates a generator of results instead of loading all frames into memory. !!! tip @@ -120,6 +120,7 @@ YOLOv8 can process different types of input sources for inference, as shown in t | YouTube โœ… | `'https://youtu.be/LNwODJXcvt4'` | `str` | URL to a YouTube video. | | stream โœ… | `'rtsp://example.com/media.mp4'` | `str` | URL for streaming protocols such as RTSP, RTMP, TCP, or an IP address. | | multi-stream โœ… | `'list.streams'` | `str` or `Path` | `*.streams` text file with one stream URL per row, i.e. 8 streams will run at batch-size 8. | +| webcam โœ… | `0` | `int` | Index of the connected camera device to run inference on. | Below are code examples for using each source type: @@ -131,8 +132,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define path to the image file source = "path/to/image.jpg" @@ -147,8 +148,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define current screenshot as source source = "screen" @@ -163,8 +164,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define remote image or video URL source = "https://ultralytics.com/images/bus.jpg" @@ -181,8 +182,8 @@ Below are code examples for using each source type: from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Open an image using PIL source = Image.open("path/to/image.jpg") @@ -199,8 +200,8 @@ Below are code examples for using each source type: from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Read an image using OpenCV source = cv2.imread("path/to/image.jpg") @@ -217,8 +218,8 @@ Below are code examples for using each source type: from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Create a random numpy array of HWC shape (640, 640, 3) with values in range [0, 255] and type uint8 source = np.random.randint(low=0, high=255, size=(640, 640, 3), dtype="uint8") @@ -235,8 +236,8 @@ Below are code examples for using each source type: from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Create a random torch tensor of BCHW shape (1, 3, 640, 640) with values in range [0, 1] and type float32 source = torch.rand(1, 3, 640, 640, dtype=torch.float32) @@ -253,8 +254,8 @@ Below are code examples for using each source type: from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define a path to a CSV file with images, URLs, videos and directories source = "path/to/file.csv" @@ -269,8 +270,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define path to video file source = "path/to/video.mp4" @@ -285,8 +286,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define path to directory containing images and videos for inference source = "path/to/dir" @@ -301,8 +302,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define a glob search for all JPG files in a directory source = "path/to/dir/*.jpg" @@ -320,8 +321,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Define source as YouTube video URL source = "https://youtu.be/LNwODJXcvt4" @@ -337,8 +338,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Single stream with batch-size 1 inference source = "rtsp://example.com/media.mp4" # RTSP, RTMP, TCP, or IP streaming address @@ -356,8 +357,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Multiple streams with batched inference (e.g., batch-size 8 for 8 streams) source = "path/to/list.streams" # *.streams text file with one streaming address per line @@ -378,6 +379,20 @@ Below are code examples for using each source type: Each row in the file represents a streaming source, allowing you to monitor and perform inference on several video streams at once. + === "Webcam" + + You can run inference on a connected camera device by passing the index of that particular camera to `source`. + + ```python + from ultralytics import YOLO + + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") + + # Run inference on the source + results = model(source=0, stream=True) # generator of Results objects + ``` + ## Inference Arguments `model.predict()` accepts multiple arguments that can be passed at inference time to override defaults: @@ -387,8 +402,8 @@ Below are code examples for using each source type: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Run inference on 'bus.jpg' with arguments model.predict("bus.jpg", save=True, imgsz=320, conf=0.5) @@ -404,12 +419,16 @@ Visualization arguments: ## Image and Video Formats -YOLOv8 supports various image and video formats, as specified in [ultralytics/data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py). See the tables below for the valid suffixes and example predict commands. +YOLO11 supports various image and video formats, as specified in [ultralytics/data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py). See the tables below for the valid suffixes and example predict commands. ### Images The below table contains valid Ultralytics image formats. +!!! note + + HEIC images are supported for inference only, not for training. + | Image Suffixes | Example Predict Command | Reference | | -------------- | -------------------------------- | -------------------------------------------------------------------------- | | `.bmp` | `yolo predict source=image.bmp` | [Microsoft BMP File Format](https://en.wikipedia.org/wiki/BMP_file_format) | @@ -422,6 +441,7 @@ The below table contains valid Ultralytics image formats. | `.tiff` | `yolo predict source=image.tiff` | [Tag Image File Format](https://en.wikipedia.org/wiki/TIFF) | | `.webp` | `yolo predict source=image.webp` | [WebP](https://en.wikipedia.org/wiki/WebP) | | `.pfm` | `yolo predict source=image.pfm` | [Portable FloatMap](https://en.wikipedia.org/wiki/Netpbm#File_formats) | +| `.HEIC` | `yolo predict source=image.HEIC` | [High Efficiency Image Format](https://en.wikipedia.org/wiki/HEIF) | ### Videos @@ -451,8 +471,8 @@ All Ultralytics `predict()` calls will return a list of `Results` objects: ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Run inference on an image results = model("bus.jpg") # list of 1 Results object @@ -490,7 +510,12 @@ All Ultralytics `predict()` calls will return a list of `Results` objects: | `verbose()` | `str` | Return log string for each task. | | `save_txt()` | `None` | Save predictions into a txt file. | | `save_crop()` | `None` | Save cropped predictions to `save_dir/cls/file_name.jpg`. | -| `tojson()` | `str` | Convert the object to JSON format. | +| `summary()` | `List[Dict]` | A list of dictionaries, each containing summarized information for results | +| `to_df()` | `DataFrame` | Convert the results to Pandas Dataframe. | +| `to_csv()` | `str` | Convert the result to CSV (comma separated values) format. | +| `to_xml()` | `str` | Convert the results to XML (Extensible Markup Language) format. | +| `to_json()` | `str` | Convert the results to JSON format. | +| `to_sql()` | `None` | Dump the results into the SQL database. | For more details see the [`Results` class documentation](../reference/engine/results.md). @@ -503,8 +528,8 @@ For more details see the [`Results` class documentation](../reference/engine/res ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Run inference on an image results = model("bus.jpg") # results list @@ -541,8 +566,8 @@ For more details see the [`Boxes` class documentation](../reference/engine/resul ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n-seg Segment model - model = YOLO("yolov8n-seg.pt") + # Load a pretrained YOLO11n-seg Segment model + model = YOLO("yolo11n-seg.pt") # Run inference on an image results = model("bus.jpg") # results list @@ -574,8 +599,8 @@ For more details see the [`Masks` class documentation](../reference/engine/resul ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n-pose Pose model - model = YOLO("yolov8n-pose.pt") + # Load a pretrained YOLO11n-pose Pose model + model = YOLO("yolo11n-pose.pt") # Run inference on an image results = model("bus.jpg") # results list @@ -608,8 +633,8 @@ For more details see the [`Keypoints` class documentation](../reference/engine/r ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n-cls Classify model - model = YOLO("yolov8n-cls.pt") + # Load a pretrained YOLO11n-cls Classify model + model = YOLO("yolo11n-cls.pt") # Run inference on an image results = model("bus.jpg") # results list @@ -643,11 +668,11 @@ For more details see the [`Probs` class documentation](../reference/engine/resul ```python from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n-obb.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n-obb.pt") # Run inference on an image - results = model("bus.jpg") # results list + results = model("boats.jpg") # results list # View results for r in results: @@ -683,8 +708,8 @@ The `plot()` method in `Results` objects facilitates visualization of prediction from ultralytics import YOLO - # Load a pretrained YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a pretrained YOLO11n model + model = YOLO("yolo11n.pt") # Run inference on 'bus.jpg' results = model(["bus.jpg", "zidane.jpg"]) # results list @@ -749,15 +774,15 @@ When using YOLO models in a multi-threaded application, it's important to instan # Starting threads that each have their own model instance - Thread(target=thread_safe_predict, args=("yolov8n.pt", "image1.jpg")).start() - Thread(target=thread_safe_predict, args=("yolov8n.pt", "image2.jpg")).start() + Thread(target=thread_safe_predict, args=("yolo11n.pt", "image1.jpg")).start() + Thread(target=thread_safe_predict, args=("yolo11n.pt", "image2.jpg")).start() ``` For an in-depth look at thread-safe inference with YOLO models and step-by-step instructions, please refer to our [YOLO Thread-Safe Inference Guide](../guides/yolo-thread-safe-inference.md). This guide will provide you with all the necessary information to avoid common pitfalls and ensure that your multi-threaded inference runs smoothly. ## Streaming Source `for`-loop -Here's a Python script using OpenCV (`cv2`) and YOLOv8 to run inference on video frames. This script assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). +Here's a Python script using OpenCV (`cv2`) and YOLO to run inference on video frames. This script assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). !!! example "Streaming for-loop" @@ -766,8 +791,8 @@ Here's a Python script using OpenCV (`cv2`) and YOLOv8 to run inference on video from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO model + model = YOLO("yolo11n.pt") # Open the video file video_path = "path/to/your/video/file.mp4" @@ -779,14 +804,14 @@ Here's a Python script using OpenCV (`cv2`) and YOLOv8 to run inference on video success, frame = cap.read() if success: - # Run YOLOv8 inference on the frame + # Run YOLO inference on the frame results = model(frame) # Visualize the results on the frame annotated_frame = results[0].plot() # Display the annotated frame - cv2.imshow("YOLOv8 Inference", annotated_frame) + cv2.imshow("YOLO Inference", annotated_frame) # Break the loop if 'q' is pressed if cv2.waitKey(1) & 0xFF == ord("q"): @@ -808,22 +833,22 @@ This script will run predictions on each frame of the video, visualize the resul ## FAQ -### What is Ultralytics YOLOv8 and its predict mode for real-time inference? +### What is Ultralytics YOLO and its predict mode for real-time inference? -Ultralytics YOLOv8 is a state-of-the-art model for real-time [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification. Its **predict mode** allows users to perform high-speed inference on various data sources such as images, videos, and live streams. Designed for performance and versatility, it also offers batch processing and streaming modes. For more details on its features, check out the [Ultralytics YOLOv8 predict mode](#key-features-of-predict-mode). +Ultralytics YOLO is a state-of-the-art model for real-time [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification. Its **predict mode** allows users to perform high-speed inference on various data sources such as images, videos, and live streams. Designed for performance and versatility, it also offers batch processing and streaming modes. For more details on its features, check out the [Ultralytics YOLO predict mode](#key-features-of-predict-mode). -### How can I run inference using Ultralytics YOLOv8 on different data sources? +### How can I run inference using Ultralytics YOLO on different data sources? -Ultralytics YOLOv8 can process a wide range of data sources, including individual images, videos, directories, URLs, and streams. You can specify the data source in the `model.predict()` call. For example, use `'image.jpg'` for a local image or `'https://ultralytics.com/images/bus.jpg'` for a URL. Check out the detailed examples for various [inference sources](#inference-sources) in the documentation. +Ultralytics YOLO can process a wide range of data sources, including individual images, videos, directories, URLs, and streams. You can specify the data source in the `model.predict()` call. For example, use `'image.jpg'` for a local image or `'https://ultralytics.com/images/bus.jpg'` for a URL. Check out the detailed examples for various [inference sources](#inference-sources) in the documentation. -### How do I optimize YOLOv8 inference speed and memory usage? +### How do I optimize YOLO inference speed and memory usage? To optimize inference speed and manage memory efficiently, you can use the streaming mode by setting `stream=True` in the predictor's call method. The streaming mode generates a memory-efficient generator of `Results` objects instead of loading all frames into memory. For processing long videos or large datasets, streaming mode is particularly useful. Learn more about [streaming mode](#key-features-of-predict-mode). -### What inference arguments does Ultralytics YOLOv8 support? +### What inference arguments does Ultralytics YOLO support? -The `model.predict()` method in YOLOv8 supports various arguments such as `conf`, `iou`, `imgsz`, `device`, and more. These arguments allow you to customize the inference process, setting parameters like confidence thresholds, image size, and the device used for computation. Detailed descriptions of these arguments can be found in the [inference arguments](#inference-arguments) section. +The `model.predict()` method in YOLO supports various arguments such as `conf`, `iou`, `imgsz`, `device`, and more. These arguments allow you to customize the inference process, setting parameters like confidence thresholds, image size, and the device used for computation. Detailed descriptions of these arguments can be found in the [inference arguments](#inference-arguments) section. -### How can I visualize and save the results of YOLOv8 predictions? +### How can I visualize and save the results of YOLO predictions? -After running inference with YOLOv8, the `Results` objects contain methods for displaying and saving annotated images. You can use methods like `result.show()` and `result.save(filename="result.jpg")` to visualize and save the results. For a comprehensive list of these methods, refer to the [working with results](#working-with-results) section. +After running inference with YOLO, the `Results` objects contain methods for displaying and saving annotated images. You can use methods like `result.show()` and `result.save(filename="result.jpg")` to visualize and save the results. For a comprehensive list of these methods, refer to the [working with results](#working-with-results) section. diff --git a/docs/en/modes/track.md b/docs/en/modes/track.md index 46c43b0b1a8..efd39e9f471 100644 --- a/docs/en/modes/track.md +++ b/docs/en/modes/track.md @@ -27,7 +27,7 @@ The output from Ultralytics trackers is consistent with standard [object detecti allowfullscreen>
- Watch: Object Detection and Tracking with Ultralytics YOLOv8. + Watch: Object Detection and Tracking with Ultralytics YOLO.

## Real-world Applications @@ -60,7 +60,7 @@ The default tracker is BoT-SORT. If object confidence score will be low, i.e lower than [`track_high_thresh`](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/trackers/bytetrack.yaml#L5), then there will be no tracks successfully returned and updated. -To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLOv8n, YOLOv8n-seg and YOLOv8n-pose. +To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLO11n, YOLO11n-seg and YOLO11n-pose. !!! example @@ -70,9 +70,9 @@ To run the tracker on video streams, use a trained Detect, Segment or Pose model from ultralytics import YOLO # Load an official or custom model - model = YOLO("yolov8n.pt") # Load an official Detect model - model = YOLO("yolov8n-seg.pt") # Load an official Segment model - model = YOLO("yolov8n-pose.pt") # Load an official Pose model + model = YOLO("yolo11n.pt") # Load an official Detect model + model = YOLO("yolo11n-seg.pt") # Load an official Segment model + model = YOLO("yolo11n-pose.pt") # Load an official Pose model model = YOLO("path/to/best.pt") # Load a custom trained model # Perform tracking with the model @@ -84,9 +84,9 @@ To run the tracker on video streams, use a trained Detect, Segment or Pose model ```bash # Perform tracking with various models using the command line interface - yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model - yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model - yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model + yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model + yolo track model=yolo11n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model + yolo track model=yolo11n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Custom trained model # Track using ByteTrack tracker @@ -113,7 +113,7 @@ Tracking configuration shares properties with Predict mode, such as `conf`, `iou from ultralytics import YOLO # Configure the tracking parameters and run the tracker - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) ``` @@ -121,7 +121,7 @@ Tracking configuration shares properties with Predict mode, such as `conf`, `iou ```bash # Configure tracking parameters and run the tracker using the command line interface - yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show + yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show ``` ### Tracker Selection @@ -136,7 +136,7 @@ Ultralytics also allows you to use a modified tracker configuration file. To do from ultralytics import YOLO # Load the model and run the tracker with a custom configuration file - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tracker.yaml") ``` @@ -144,7 +144,7 @@ Ultralytics also allows you to use a modified tracker configuration file. To do ```bash # Load the model and run the tracker with a custom configuration file using the command line interface - yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' ``` For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) page. @@ -153,7 +153,7 @@ For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/tr ### Persisting Tracks Loop -Here is a Python script using [OpenCV](https://www.ultralytics.com/glossary/opencv) (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker that the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image. +Here is a Python script using [OpenCV](https://www.ultralytics.com/glossary/opencv) (`cv2`) and YOLO11 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker that the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image. !!! example "Streaming for-loop with tracking" @@ -162,8 +162,8 @@ Here is a Python script using [OpenCV](https://www.ultralytics.com/glossary/open from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Open the video file video_path = "path/to/video.mp4" @@ -175,14 +175,14 @@ Here is a Python script using [OpenCV](https://www.ultralytics.com/glossary/open success, frame = cap.read() if success: - # Run YOLOv8 tracking on the frame, persisting tracks between frames + # Run YOLO11 tracking on the frame, persisting tracks between frames results = model.track(frame, persist=True) # Visualize the results on the frame annotated_frame = results[0].plot() # Display the annotated frame - cv2.imshow("YOLOv8 Tracking", annotated_frame) + cv2.imshow("YOLO11 Tracking", annotated_frame) # Break the loop if 'q' is pressed if cv2.waitKey(1) & 0xFF == ord("q"): @@ -200,9 +200,9 @@ Please note the change from `model(frame)` to `model.track(frame)`, which enable ### Plotting Tracks Over Time -Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLOv8, plotting these tracks is a seamless and efficient process. +Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLO11, plotting these tracks is a seamless and efficient process. -In the following example, we demonstrate how to utilize YOLOv8's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects. +In the following example, we demonstrate how to utilize YOLO11's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects. !!! example "Plotting tracks over multiple video frames" @@ -214,8 +214,8 @@ In the following example, we demonstrate how to utilize YOLOv8's tracking capabi from ultralytics import YOLO - # Load the YOLOv8 model - model = YOLO("yolov8n.pt") + # Load the YOLO11 model + model = YOLO("yolo11n.pt") # Open the video file video_path = "path/to/video.mp4" @@ -230,7 +230,7 @@ In the following example, we demonstrate how to utilize YOLOv8's tracking capabi success, frame = cap.read() if success: - # Run YOLOv8 tracking on the frame, persisting tracks between frames + # Run YOLO11 tracking on the frame, persisting tracks between frames results = model.track(frame, persist=True) # Get the boxes and track IDs @@ -253,7 +253,7 @@ In the following example, we demonstrate how to utilize YOLOv8's tracking capabi cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) # Display the annotated frame - cv2.imshow("YOLOv8 Tracking", annotated_frame) + cv2.imshow("YOLO11 Tracking", annotated_frame) # Break the loop if 'q' is pressed if cv2.waitKey(1) & 0xFF == ord("q"): @@ -275,7 +275,7 @@ In the provided Python script, we make use of Python's `threading` module to run To ensure that each thread receives the correct parameters (the video file, the model to use and the file index), we define a function `run_tracker_in_thread` that accepts these parameters and contains the main tracking loop. This function reads the video frame by frame, runs the tracker, and displays the results. -Two different models are used in this example: `yolov8n.pt` and `yolov8n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`. +Two different models are used in this example: `yolo11n.pt` and `yolo11n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`. The `daemon=True` parameter in `threading.Thread` means that these threads will be closed as soon as the main program finishes. We then start the threads with `start()` and use `join()` to make the main thread wait until both tracker threads have finished. @@ -291,7 +291,7 @@ Finally, after all threads have completed their task, the windows displaying the from ultralytics import YOLO # Define model names and video sources - MODEL_NAMES = ["yolov8n.pt", "yolov8n-seg.pt"] + MODEL_NAMES = ["yolo11n.pt", "yolo11n-seg.pt"] SOURCES = ["path/to/video.mp4", "0"] # local video, 0 for webcam @@ -300,7 +300,7 @@ Finally, after all threads have completed their task, the windows displaying the Run YOLO tracker in its own thread for concurrent processing. Args: - model_name (str): The YOLOv8 model object. + model_name (str): The YOLO11 model object. filename (str): The path to the video file or the identifier for the webcam/external camera source. """ model = YOLO(model_name) @@ -357,14 +357,14 @@ You can configure a custom tracker by copying an existing tracker configuration ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tracker.yaml") ``` === "CLI" ```bash - yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' + yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' ``` ### How can I run object tracking on multiple video streams simultaneously? @@ -381,7 +381,7 @@ To run object tracking on multiple video streams simultaneously, you can use Pyt from ultralytics import YOLO # Define model names and video sources - MODEL_NAMES = ["yolov8n.pt", "yolov8n-seg.pt"] + MODEL_NAMES = ["yolo11n.pt", "yolo11n-seg.pt"] SOURCES = ["path/to/video.mp4", "0"] # local video, 0 for webcam @@ -390,7 +390,7 @@ To run object tracking on multiple video streams simultaneously, you can use Pyt Run YOLO tracker in its own thread for concurrent processing. Args: - model_name (str): The YOLOv8 model object. + model_name (str): The YOLO11 model object. filename (str): The path to the video file or the identifier for the webcam/external camera source. """ model = YOLO(model_name) @@ -438,7 +438,7 @@ To visualize object tracks over multiple video frames, you can use the YOLO mode from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") video_path = "path/to/video.mp4" cap = cv2.VideoCapture(video_path) track_history = defaultdict(lambda: []) @@ -458,7 +458,7 @@ To visualize object tracks over multiple video frames, you can use the YOLO mode track.pop(0) points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10) - cv2.imshow("YOLOv8 Tracking", annotated_frame) + cv2.imshow("YOLO11 Tracking", annotated_frame) if cv2.waitKey(1) & 0xFF == ord("q"): break else: diff --git a/docs/en/modes/train.md b/docs/en/modes/train.md index f5722b72809..a1649f8a0ac 100644 --- a/docs/en/modes/train.md +++ b/docs/en/modes/train.md @@ -1,7 +1,7 @@ --- comments: true -description: Learn how to efficiently train object detection models using YOLOv8 with comprehensive instructions on settings, augmentation, and hardware utilization. -keywords: Ultralytics, YOLOv8, model training, deep learning, object detection, GPU training, dataset augmentation, hyperparameter tuning, model performance, M1 M2 training +description: Learn how to efficiently train object detection models using YOLO11 with comprehensive instructions on settings, augmentation, and hardware utilization. +keywords: Ultralytics, YOLO11, model training, deep learning, object detection, GPU training, dataset augmentation, hyperparameter tuning, model performance, apple silicon training --- # Model Training with Ultralytics YOLO @@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, model training, deep learning, object detection, ## Introduction -Training a [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) model involves feeding it data and adjusting its parameters so that it can make accurate predictions. Train mode in Ultralytics YOLOv8 is engineered for effective and efficient training of object detection models, fully utilizing modern hardware capabilities. This guide aims to cover all the details you need to get started with training your own models using YOLOv8's robust set of features. +Training a [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) model involves feeding it data and adjusting its parameters so that it can make accurate predictions. Train mode in Ultralytics YOLO11 is engineered for effective and efficient training of object detection models, fully utilizing modern hardware capabilities. This guide aims to cover all the details you need to get started with training your own models using YOLO11's robust set of features.


@@ -20,12 +20,12 @@ Training a [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl allowfullscreen>
- Watch: How to Train a YOLOv8 model on Your Custom Dataset in Google Colab. + Watch: How to Train a YOLO model on Your Custom Dataset in Google Colab.

## Why Choose Ultralytics YOLO for Training? -Here are some compelling reasons to opt for YOLOv8's Train mode: +Here are some compelling reasons to opt for YOLO11's Train mode: - **Efficiency:** Make the most out of your hardware, whether you're on a single-GPU setup or scaling across multiple GPUs. - **Versatility:** Train on custom datasets in addition to readily available ones like COCO, VOC, and ImageNet. @@ -34,7 +34,7 @@ Here are some compelling reasons to opt for YOLOv8's Train mode: ### Key Features of Train Mode -The following are some notable features of YOLOv8's Train mode: +The following are some notable features of YOLO11's Train mode: - **Automatic Dataset Download:** Standard datasets like COCO, VOC, and ImageNet are downloaded automatically on first use. - **Multi-GPU Support:** Scale your training efforts seamlessly across multiple GPUs to expedite the process. @@ -43,11 +43,15 @@ The following are some notable features of YOLOv8's Train mode: !!! tip - * YOLOv8 datasets like COCO, VOC, ImageNet and many others automatically download on first use, i.e. `yolo train data=coco.yaml` + * YOLO11 datasets like COCO, VOC, ImageNet and many others automatically download on first use, i.e. `yolo train data=coco.yaml` ## Usage Examples -Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. The training device can be specified using the `device` argument. If no argument is passed GPU `device=0` will be used if available, otherwise `device='cpu'` will be used. See Arguments section below for a full list of training arguments. +Train YOLO11n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. The training device can be specified using the `device` argument. If no argument is passed GPU `device=0` will be used if available, otherwise `device='cpu'` will be used. See Arguments section below for a full list of training arguments. + +!!! warning "Windows Multi-Processing Error" + + On Windows, you may receive a `RuntimeError` when launching the training as a script. Add a `if __name__ == "__main__":` block before your training code to resolve it. !!! example "Single-GPU and CPU Training Example" @@ -59,9 +63,9 @@ Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.yaml") # build a new model from YAML - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) - model = YOLO("yolov8n.yaml").load("yolov8n.pt") # build from YAML and transfer weights + model = YOLO("yolo11n.yaml") # build a new model from YAML + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.yaml").load("yolo11n.pt") # build from YAML and transfer weights # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -71,13 +75,13 @@ Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/ ```bash # Build a new model from YAML and start training from scratch - yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.yaml epochs=100 imgsz=640 # Start training from a pretrained *.pt model - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 # Build a new model from YAML, transfer pretrained weights to it and start training - yolo detect train data=coco8.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.yaml pretrained=yolo11n.pt epochs=100 imgsz=640 ``` ### Multi-GPU Training @@ -94,7 +98,7 @@ Multi-GPU training allows for more efficient utilization of available hardware r from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model with 2 GPUs results = model.train(data="coco8.yaml", epochs=100, imgsz=640, device=[0, 1]) @@ -104,14 +108,14 @@ Multi-GPU training allows for more efficient utilization of available hardware r ```bash # Start training from a pretrained *.pt model using GPUs 0 and 1 - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 device=0,1 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 device=0,1 ``` -### Apple M1 and M2 MPS Training +### Apple Silicon MPS Training -With the support for Apple M1 and M2 chips integrated in the Ultralytics YOLO models, it's now possible to train your models on devices utilizing the powerful Metal Performance Shaders (MPS) framework. The MPS offers a high-performance way of executing computation and image processing tasks on Apple's custom silicon. +With the support for Apple silicon chips integrated in the Ultralytics YOLO models, it's now possible to train your models on devices utilizing the powerful Metal Performance Shaders (MPS) framework. The MPS offers a high-performance way of executing computation and image processing tasks on Apple's custom silicon. -To enable training on Apple M1 and M2 chips, you should specify 'mps' as your device when initiating the training process. Below is an example of how you could do this in Python and via the command line: +To enable training on Apple silicon chips, you should specify 'mps' as your device when initiating the training process. Below is an example of how you could do this in Python and via the command line: !!! example "MPS Training Example" @@ -121,7 +125,7 @@ To enable training on Apple M1 and M2 chips, you should specify 'mps' as your de from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model with MPS results = model.train(data="coco8.yaml", epochs=100, imgsz=640, device="mps") @@ -131,10 +135,10 @@ To enable training on Apple M1 and M2 chips, you should specify 'mps' as your de ```bash # Start training from a pretrained *.pt model using MPS - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 device=mps ``` -While leveraging the computational power of the M1/M2 chips, this enables more efficient processing of the training tasks. For more detailed guidance and advanced configuration options, please refer to the [PyTorch MPS documentation](https://pytorch.org/docs/stable/notes/mps.html). +While leveraging the computational power of the Apple silicon chips, this enables more efficient processing of the training tasks. For more detailed guidance and advanced configuration options, please refer to the [PyTorch MPS documentation](https://pytorch.org/docs/stable/notes/mps.html). ### Resuming Interrupted Trainings @@ -199,7 +203,7 @@ These settings can be adjusted to meet the specific requirements of the dataset ## Logging -In training a YOLOv8 model, you might find it valuable to keep track of the model's performance over time. This is where logging comes into play. Ultralytics' YOLO provides support for three types of loggers - Comet, ClearML, and TensorBoard. +In training a YOLO11 model, you might find it valuable to keep track of the model's performance over time. This is where logging comes into play. Ultralytics' YOLO provides support for three types of loggers - Comet, ClearML, and TensorBoard. To use a logger, select it from the dropdown menu in the code snippet above and run it. The chosen logger will be installed and initialized. @@ -272,9 +276,9 @@ After setting up your logger, you can then proceed with your model training. All ## FAQ -### How do I train an [object detection](https://www.ultralytics.com/glossary/object-detection) model using Ultralytics YOLOv8? +### How do I train an [object detection](https://www.ultralytics.com/glossary/object-detection) model using Ultralytics YOLO11? -To train an object detection model using Ultralytics YOLOv8, you can either use the Python API or the CLI. Below is an example for both: +To train an object detection model using Ultralytics YOLO11, you can either use the Python API or the CLI. Below is an example for both: !!! example "Single-GPU and CPU Training Example" @@ -284,7 +288,7 @@ To train an object detection model using Ultralytics YOLOv8, you can either use from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -293,14 +297,14 @@ To train an object detection model using Ultralytics YOLOv8, you can either use === "CLI" ```bash - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For more details, refer to the [Train Settings](#train-settings) section. -### What are the key features of Ultralytics YOLOv8's Train mode? +### What are the key features of Ultralytics YOLO11's Train mode? -The key features of Ultralytics YOLOv8's Train mode include: +The key features of Ultralytics YOLO11's Train mode include: - **Automatic Dataset Download:** Automatically downloads standard datasets like COCO, VOC, and ImageNet. - **Multi-GPU Support:** Scale training across multiple GPUs for faster processing. @@ -309,7 +313,7 @@ The key features of Ultralytics YOLOv8's Train mode include: These features make training efficient and customizable to your needs. For more details, see the [Key Features of Train Mode](#key-features-of-train-mode) section. -### How do I resume training from an interrupted session in Ultralytics YOLOv8? +### How do I resume training from an interrupted session in Ultralytics YOLO11? To resume training from an interrupted session, set the `resume` argument to `True` and specify the path to the last saved checkpoint. @@ -335,9 +339,9 @@ To resume training from an interrupted session, set the `resume` argument to `Tr Check the section on [Resuming Interrupted Trainings](#resuming-interrupted-trainings) for more information. -### Can I train YOLOv8 models on Apple M1 and M2 chips? +### Can I train YOLO11 models on Apple silicon chips? -Yes, Ultralytics YOLOv8 supports training on Apple M1 and M2 chips utilizing the Metal Performance Shaders (MPS) framework. Specify 'mps' as your training device. +Yes, Ultralytics YOLO11 supports training on Apple silicon chips utilizing the Metal Performance Shaders (MPS) framework. Specify 'mps' as your training device. !!! example "MPS Training Example" @@ -347,23 +351,23 @@ Yes, Ultralytics YOLOv8 supports training on Apple M1 and M2 chips utilizing the from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") - # Train the model on M1/M2 chip + # Train the model on Apple silicon chip (M1/M2/M3/M4) results = model.train(data="coco8.yaml", epochs=100, imgsz=640, device="mps") ``` === "CLI" ```bash - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 device=mps + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 device=mps ``` -For more details, refer to the [Apple M1 and M2 MPS Training](#apple-m1-and-m2-mps-training) section. +For more details, refer to the [Apple Silicon MPS Training](#apple-silicon-mps-training) section. ### What are the common training settings, and how do I configure them? -Ultralytics YOLOv8 allows you to configure a variety of training settings such as batch size, learning rate, epochs, and more through arguments. Here's a brief overview: +Ultralytics YOLO11 allows you to configure a variety of training settings such as batch size, learning rate, epochs, and more through arguments. Here's a brief overview: | Argument | Default | Description | | -------- | ------- | ---------------------------------------------------------------------- | diff --git a/docs/en/modes/val.md b/docs/en/modes/val.md index 91eb4c2a873..a1cd3139106 100644 --- a/docs/en/modes/val.md +++ b/docs/en/modes/val.md @@ -1,7 +1,7 @@ --- comments: true -description: Learn how to validate your YOLOv8 model with precise metrics, easy-to-use tools, and custom settings for optimal performance. -keywords: Ultralytics, YOLOv8, model validation, machine learning, object detection, mAP metrics, Python API, CLI +description: Learn how to validate your YOLO11 model with precise metrics, easy-to-use tools, and custom settings for optimal performance. +keywords: Ultralytics, YOLO11, model validation, machine learning, object detection, mAP metrics, Python API, CLI --- # Model Validation with Ultralytics YOLO @@ -10,7 +10,7 @@ keywords: Ultralytics, YOLOv8, model validation, machine learning, object detect ## Introduction -Validation is a critical step in the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) pipeline, allowing you to assess the quality of your trained models. Val mode in Ultralytics YOLOv8 provides a robust suite of tools and metrics for evaluating the performance of your [object detection](https://www.ultralytics.com/glossary/object-detection) models. This guide serves as a complete resource for understanding how to effectively use the Val mode to ensure that your models are both accurate and reliable. +Validation is a critical step in the [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) pipeline, allowing you to assess the quality of your trained models. Val mode in Ultralytics YOLO11 provides a robust suite of tools and metrics for evaluating the performance of your [object detection](https://www.ultralytics.com/glossary/object-detection) models. This guide serves as a complete resource for understanding how to effectively use the Val mode to ensure that your models are both accurate and reliable.


@@ -25,7 +25,7 @@ Validation is a critical step in the [machine learning](https://www.ultralytics. ## Why Validate with Ultralytics YOLO? -Here's why using YOLOv8's Val mode is advantageous: +Here's why using YOLO11's Val mode is advantageous: - **Precision:** Get accurate metrics like mAP50, mAP75, and mAP50-95 to comprehensively evaluate your model. - **Convenience:** Utilize built-in features that remember training settings, simplifying the validation process. @@ -34,7 +34,7 @@ Here's why using YOLOv8's Val mode is advantageous: ### Key Features of Val Mode -These are the notable functionalities offered by YOLOv8's Val mode: +These are the notable functionalities offered by YOLO11's Val mode: - **Automated Settings:** Models remember their training configurations for straightforward validation. - **Multi-Metric Support:** Evaluate your model based on a range of accuracy metrics. @@ -43,11 +43,11 @@ These are the notable functionalities offered by YOLOv8's Val mode: !!! tip - * YOLOv8 models automatically remember their training settings, so you can validate a model at the same image size and on the original dataset easily with just `yolo val model=yolov8n.pt` or `model('yolov8n.pt').val()` + * YOLO11 models automatically remember their training settings, so you can validate a model at the same image size and on the original dataset easily with just `yolo val model=yolo11n.pt` or `model('yolo11n.pt').val()` ## Usage Examples -Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. See Arguments section below for a full list of export arguments. +Validate trained YOLO11n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. See Arguments section below for a full list of validation arguments. !!! example @@ -57,7 +57,7 @@ Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/a from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -71,7 +71,7 @@ Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/a === "CLI" ```bash - yolo detect val model=yolov8n.pt # val official model + yolo detect val model=yolo11n.pt # val official model yolo detect val model=path/to/best.pt # val custom model ``` @@ -95,7 +95,7 @@ The below examples showcase YOLO model validation with custom arguments in Pytho from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Customize validation settings validation_results = model.val(data="coco8.yaml", imgsz=640, batch=16, conf=0.25, iou=0.6, device="0") @@ -104,20 +104,20 @@ The below examples showcase YOLO model validation with custom arguments in Pytho === "CLI" ```bash - yolo val model=yolov8n.pt data=coco8.yaml imgsz=640 batch=16 conf=0.25 iou=0.6 device=0 + yolo val model=yolo11n.pt data=coco8.yaml imgsz=640 batch=16 conf=0.25 iou=0.6 device=0 ``` ## FAQ -### How do I validate my YOLOv8 model with Ultralytics? +### How do I validate my YOLO11 model with Ultralytics? -To validate your YOLOv8 model, you can use the Val mode provided by Ultralytics. For example, using the Python API, you can load a model and run validation with: +To validate your YOLO11 model, you can use the Val mode provided by Ultralytics. For example, using the Python API, you can load a model and run validation with: ```python from ultralytics import YOLO # Load a model -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") # Validate the model metrics = model.val() @@ -127,14 +127,14 @@ print(metrics.box.map) # map50-95 Alternatively, you can use the command-line interface (CLI): ```bash -yolo val model=yolov8n.pt +yolo val model=yolo11n.pt ``` For further customization, you can adjust various arguments like `imgsz`, `batch`, and `conf` in both Python and CLI modes. Check the [Arguments for YOLO Model Validation](#arguments-for-yolo-model-validation) section for the full list of parameters. -### What metrics can I get from YOLOv8 model validation? +### What metrics can I get from YOLO11 model validation? -YOLOv8 model validation provides several key metrics to assess model performance. These include: +YOLO11 model validation provides several key metrics to assess model performance. These include: - mAP50 (mean Average Precision at IoU threshold 0.5) - mAP75 (mean Average Precision at IoU threshold 0.75) @@ -156,16 +156,16 @@ For a complete performance evaluation, it's crucial to review all these metrics. Using Ultralytics YOLO for validation provides several advantages: -- **[Precision](https://www.ultralytics.com/glossary/precision):** YOLOv8 offers accurate performance metrics including mAP50, mAP75, and mAP50-95. +- **[Precision](https://www.ultralytics.com/glossary/precision):** YOLO11 offers accurate performance metrics including mAP50, mAP75, and mAP50-95. - **Convenience:** The models remember their training settings, making validation straightforward. - **Flexibility:** You can validate against the same or different datasets and image sizes. - **Hyperparameter Tuning:** Validation metrics help in fine-tuning models for better performance. These benefits ensure that your models are evaluated thoroughly and can be optimized for superior results. Learn more about these advantages in the [Why Validate with Ultralytics YOLO](#why-validate-with-ultralytics-yolo) section. -### Can I validate my YOLOv8 model using a custom dataset? +### Can I validate my YOLO11 model using a custom dataset? -Yes, you can validate your YOLOv8 model using a [custom dataset](https://docs.ultralytics.com/datasets/). Specify the `data` argument with the path to your dataset configuration file. This file should include paths to the [validation data](https://www.ultralytics.com/glossary/validation-data), class names, and other relevant details. +Yes, you can validate your YOLO11 model using a [custom dataset](https://docs.ultralytics.com/datasets/). Specify the `data` argument with the path to your dataset configuration file. This file should include paths to the [validation data](https://www.ultralytics.com/glossary/validation-data), class names, and other relevant details. Example in Python: @@ -173,7 +173,7 @@ Example in Python: from ultralytics import YOLO # Load a model -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") # Validate with a custom dataset metrics = model.val(data="path/to/your/custom_dataset.yaml") @@ -183,12 +183,12 @@ print(metrics.box.map) # map50-95 Example using CLI: ```bash -yolo val model=yolov8n.pt data=path/to/your/custom_dataset.yaml +yolo val model=yolo11n.pt data=path/to/your/custom_dataset.yaml ``` For more customizable options during validation, see the [Example Validation with Arguments](#example-validation-with-arguments) section. -### How do I save validation results to a JSON file in YOLOv8? +### How do I save validation results to a JSON file in YOLO11? To save the validation results to a JSON file, you can set the `save_json` argument to `True` when running validation. This can be done in both the Python API and CLI. @@ -198,7 +198,7 @@ Example in Python: from ultralytics import YOLO # Load a model -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") # Save validation results to JSON metrics = model.val(save_json=True) @@ -207,7 +207,7 @@ metrics = model.val(save_json=True) Example using CLI: ```bash -yolo val model=yolov8n.pt save_json=True +yolo val model=yolo11n.pt save_json=True ``` This functionality is particularly useful for further analysis or integration with other tools. Check the [Arguments for YOLO Model Validation](#arguments-for-yolo-model-validation) for more details. diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md index 204623cca4f..da4d8624195 100644 --- a/docs/en/quickstart.md +++ b/docs/en/quickstart.md @@ -28,7 +28,7 @@ Ultralytics provides various installation methods including pip, conda, and Dock Install the `ultralytics` package using pip, or update an existing installation by running `pip install -U ultralytics`. Visit the Python Package Index (PyPI) for more details on the `ultralytics` package: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/). [![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) - [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) + [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) ```bash # Install the ultralytics package from PyPI diff --git a/docs/en/reference/cfg/__init__.md b/docs/en/reference/cfg/__init__.md index 5997b37f2b4..6a59b4c4ef5 100644 --- a/docs/en/reference/cfg/__init__.md +++ b/docs/en/reference/cfg/__init__.md @@ -47,11 +47,7 @@ keywords: Ultralytics, YOLO, configuration, cfg2dict, get_cfg, check_cfg, save_d



-## ::: ultralytics.cfg.handle_explorer - -



- -## ::: ultralytics.cfg.handle_streamlit_inference +## ::: ultralytics.cfg.handle_yolo_solutions



diff --git a/docs/en/reference/data/converter.md b/docs/en/reference/data/converter.md index d4ba3d58d5a..073c760e536 100644 --- a/docs/en/reference/data/converter.md +++ b/docs/en/reference/data/converter.md @@ -41,4 +41,8 @@ keywords: Ultralytics, data conversion, YOLO models, COCO, DOTA, YOLO bbox2segme ## ::: ultralytics.data.converter.yolo_bbox2segment +



+ +## ::: ultralytics.data.converter.create_synthetic_coco_dataset +

diff --git a/docs/en/reference/data/explorer/explorer.md b/docs/en/reference/data/explorer/explorer.md deleted file mode 100644 index 22aa6d06207..00000000000 --- a/docs/en/reference/data/explorer/explorer.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -comments: true -description: Explore the Ultralytics data explorer functions including YOLO dataset handling, image querying, embedding generation, and similarity indexing. -keywords: Ultralytics, YOLO, data explorer, image querying, embeddings, similarity index, python, machine learning ---- - -# Reference for `ultralytics/data/explorer/explorer.py` - -!!! note - - This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/explorer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/explorer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/explorer.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! - -
- -## ::: ultralytics.data.explorer.explorer.ExplorerDataset - -



- -## ::: ultralytics.data.explorer.explorer.Explorer - -

diff --git a/docs/en/reference/data/explorer/gui/dash.md b/docs/en/reference/data/explorer/gui/dash.md deleted file mode 100644 index b2e51203644..00000000000 --- a/docs/en/reference/data/explorer/gui/dash.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -comments: true -description: Explore the functionalities of Ultralytics Explorer with our comprehensive GUI dash documentation. -keywords: Ultralytics, Explorer, GUI, dash, documentation, data explorer, AI query, SQL query, image similarity ---- - -# Reference for `ultralytics/data/explorer/gui/dash.py` - -!!! note - - This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/gui/dash.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/gui/dash.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/gui/dash.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! - -
- -## ::: ultralytics.data.explorer.gui.dash._get_explorer - -



- -## ::: ultralytics.data.explorer.gui.dash.init_explorer_form - -



- -## ::: ultralytics.data.explorer.gui.dash.query_form - -



- -## ::: ultralytics.data.explorer.gui.dash.ai_query_form - -



- -## ::: ultralytics.data.explorer.gui.dash.find_similar_imgs - -



- -## ::: ultralytics.data.explorer.gui.dash.similarity_form - -



- -## ::: ultralytics.data.explorer.gui.dash.run_sql_query - -



- -## ::: ultralytics.data.explorer.gui.dash.run_ai_query - -



- -## ::: ultralytics.data.explorer.gui.dash.reset_explorer - -



- -## ::: ultralytics.data.explorer.gui.dash.utralytics_explorer_docs_callback - -



- -## ::: ultralytics.data.explorer.gui.dash.layout - -

diff --git a/docs/en/reference/data/explorer/utils.md b/docs/en/reference/data/explorer/utils.md deleted file mode 100644 index 9a953a0665c..00000000000 --- a/docs/en/reference/data/explorer/utils.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -comments: true -description: Explore various utility functions in ultralytics.data.explorer.utils including schema definitions, batch sanitization, and query results plotting. -keywords: Ultralytics, data explorer, utils, schema, sanitize batch, plot query results, SQL query, machine learning ---- - -# Reference for `ultralytics/data/explorer/utils.py` - -!!! note - - This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/utils.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! - -
- -## ::: ultralytics.data.explorer.utils.get_table_schema - -



- -## ::: ultralytics.data.explorer.utils.get_sim_index_schema - -



- -## ::: ultralytics.data.explorer.utils.sanitize_batch - -



- -## ::: ultralytics.data.explorer.utils.plot_query_result - -



- -## ::: ultralytics.data.explorer.utils.prompt_sql_query - -

diff --git a/docs/en/reference/data/utils.md b/docs/en/reference/data/utils.md index c5aba5e7b82..7721d5ea121 100644 --- a/docs/en/reference/data/utils.md +++ b/docs/en/reference/data/utils.md @@ -35,6 +35,10 @@ keywords: Ultralytics, dataset utils, data handling, image verification, Python,



+## ::: ultralytics.data.utils.visualize_image_annotations + +



+ ## ::: ultralytics.data.utils.polygon2mask



diff --git a/docs/en/reference/engine/exporter.md b/docs/en/reference/engine/exporter.md index 98e81a8aaf5..a650b314e87 100644 --- a/docs/en/reference/engine/exporter.md +++ b/docs/en/reference/engine/exporter.md @@ -19,10 +19,18 @@ keywords: YOLOv8, export formats, ONNX, TensorRT, CoreML, machine learning model



+## ::: ultralytics.engine.exporter.NMSModel + +



+ ## ::: ultralytics.engine.exporter.export_formats



+## ::: ultralytics.engine.exporter.validate_args + +



+ ## ::: ultralytics.engine.exporter.gd_outputs



diff --git a/docs/en/reference/models/sam/predict.md b/docs/en/reference/models/sam/predict.md index e715225c64b..17f8b472c4a 100644 --- a/docs/en/reference/models/sam/predict.md +++ b/docs/en/reference/models/sam/predict.md @@ -17,4 +17,8 @@ keywords: Ultralytics, SAM, Segment Anything Model, SAM 2, Segment Anything Mode ## ::: ultralytics.models.sam.predict.SAM2Predictor +



+ +## ::: ultralytics.models.sam.predict.SAM2VideoPredictor +

diff --git a/docs/en/reference/nn/modules/block.md b/docs/en/reference/nn/modules/block.md index da0ca655f3d..05134721e4f 100644 --- a/docs/en/reference/nn/modules/block.md +++ b/docs/en/reference/nn/modules/block.md @@ -189,4 +189,8 @@ keywords: Ultralytics, YOLO, neural networks, block modules, DFL, Proto, HGStem, ## ::: ultralytics.nn.modules.block.SCDown +



+ +## ::: ultralytics.nn.modules.block.TorchVision +

diff --git a/docs/en/reference/nn/modules/conv.md b/docs/en/reference/nn/modules/conv.md index 6c68fa7fc99..e7a16b577ab 100644 --- a/docs/en/reference/nn/modules/conv.md +++ b/docs/en/reference/nn/modules/conv.md @@ -63,6 +63,10 @@ keywords: Ultralytics, convolution modules, Conv, LightConv, GhostConv, YOLO, de



+## ::: ultralytics.nn.modules.conv.Index + +



+ ## ::: ultralytics.nn.modules.conv.autopad

diff --git a/docs/en/reference/solutions/region_counter.md b/docs/en/reference/solutions/region_counter.md new file mode 100644 index 00000000000..0f27adff281 --- /dev/null +++ b/docs/en/reference/solutions/region_counter.md @@ -0,0 +1,16 @@ +--- +description: Explore the Ultralytics Object Counter for real-time video streams. Learn about initializing parameters, tracking objects, and more. +keywords: Ultralytics, Object Counter, Real-time Tracking, Video Stream, Python, Object Detection +--- + +# Reference for `ultralytics/solutions/region_counter.py` + +!!! note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/region_counter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/region_counter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/region_counter.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! + +
+ +## ::: ultralytics.solutions.region_counter.RegionCounter + +

diff --git a/docs/en/reference/solutions/security_alarm.md b/docs/en/reference/solutions/security_alarm.md new file mode 100644 index 00000000000..a6f1b678706 --- /dev/null +++ b/docs/en/reference/solutions/security_alarm.md @@ -0,0 +1,16 @@ +--- +description: Discover how Ultralytics' Security Alarm System enhances real-time surveillance with intelligent object detection and tracking. Learn about setup, monitoring, and threat detection. +keywords: Ultralytics, Security Alarm System, Real-time Surveillance, Object Detection, Video Monitoring, Python, Threat Detection +--- + +# Reference for `ultralytics/solutions/security_alarm.py` + +!!! note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/security_alarm.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/security_alarm.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/security_alarm.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! + +
+ +## ::: ultralytics.solutions.security_alarm.SecurityAlarm + +

diff --git a/docs/en/reference/solutions/solutions.md b/docs/en/reference/solutions/solutions.md new file mode 100644 index 00000000000..727a5fa7527 --- /dev/null +++ b/docs/en/reference/solutions/solutions.md @@ -0,0 +1,16 @@ +--- +description: Explore the Ultralytics Solution Base class for real-time object counting,virtual gym, heatmaps, speed estimation using Ultralytics YOLO. Learn to implement Ultralytics solutions effectively. +keywords: Ultralytics, Solutions, Object counting, Speed Estimation, Heatmaps, Queue Management, AI Gym, YOLO, pose detection, gym step counting, real-time pose estimation, Python +--- + +# Reference for `ultralytics/solutions/solutions.py` + +!!! note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/solutions.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/solutions.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/solutions.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! + +
+ +## ::: ultralytics.solutions.solutions.BaseSolution + +

diff --git a/docs/en/reference/solutions/streamlit_inference.md b/docs/en/reference/solutions/streamlit_inference.md index 368d69e3f4a..92aac75064d 100644 --- a/docs/en/reference/solutions/streamlit_inference.md +++ b/docs/en/reference/solutions/streamlit_inference.md @@ -11,6 +11,6 @@ keywords: Ultralytics, YOLOv8, live inference, real-time object detection, Strea
-## ::: ultralytics.solutions.streamlit_inference.inference +## ::: ultralytics.solutions.streamlit_inference.Inference

diff --git a/docs/en/reference/solutions/trackzone.md b/docs/en/reference/solutions/trackzone.md new file mode 100644 index 00000000000..546d61dcb03 --- /dev/null +++ b/docs/en/reference/solutions/trackzone.md @@ -0,0 +1,16 @@ +--- +description: Discover Ultralytics' TrackZone solution for real-time object tracking within defined zones. Gain insights into initializing regions, tracking objects exclusively within specific areas, and optimizing video stream processing for region-based object detection. +keywords: Ultralytics, TrackZone, Object Tracking, Zone Tracking, Region Tracking, Python, Real-time Object Tracking, Video Stream Processing, Region-based Detection +--- + +# Reference for `ultralytics/solutions/trackzone.py` + +!!! note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/trackzone.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/trackzone.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/trackzone.py) ๐Ÿ› ๏ธ. Thank you ๐Ÿ™! + +
+ +## ::: ultralytics.solutions.trackzone.TrackZone + +

diff --git a/docs/en/reference/utils/__init__.md b/docs/en/reference/utils/__init__.md index 9b7d1b584d8..981adc6ce84 100644 --- a/docs/en/reference/utils/__init__.md +++ b/docs/en/reference/utils/__init__.md @@ -87,6 +87,10 @@ keywords: Ultralytics, utils, TQDM, Python, ML, Machine Learning utilities, YOLO



+## ::: ultralytics.utils.is_runpod + +



+ ## ::: ultralytics.utils.is_docker



diff --git a/docs/en/reference/utils/checks.md b/docs/en/reference/utils/checks.md index 378af7e8c5b..b75c1f6fc89 100644 --- a/docs/en/reference/utils/checks.md +++ b/docs/en/reference/utils/checks.md @@ -109,4 +109,12 @@ keywords: Ultralytics, YOLO, utility functions, version checks, requirements, im ## ::: ultralytics.utils.checks.cuda_is_available +



+ +## ::: ultralytics.utils.checks.is_rockchip + +



+ +## ::: ultralytics.utils.checks.is_sudo_available +

diff --git a/docs/en/reference/utils/metrics.md b/docs/en/reference/utils/metrics.md index 13bc13205b8..f600ccaf86d 100644 --- a/docs/en/reference/utils/metrics.md +++ b/docs/en/reference/utils/metrics.md @@ -71,7 +71,7 @@ keywords: Ultralytics, metrics, model validation, performance analysis, IoU, con



-## ::: ultralytics.utils.metrics.smooth_BCE +## ::: ultralytics.utils.metrics.smooth_bce



diff --git a/docs/en/reference/utils/ops.md b/docs/en/reference/utils/ops.md index b62ba7b4ebf..ab6cafbca86 100644 --- a/docs/en/reference/utils/ops.md +++ b/docs/en/reference/utils/ops.md @@ -129,4 +129,8 @@ keywords: Ultralytics, utility operations, non-max suppression, bounding box tra ## ::: ultralytics.utils.ops.clean_str +



+ +## ::: ultralytics.utils.ops.empty_like +

diff --git a/docs/en/reference/utils/torch_utils.md b/docs/en/reference/utils/torch_utils.md index 4f8f3d1b9ca..8242b70aba0 100644 --- a/docs/en/reference/utils/torch_utils.md +++ b/docs/en/reference/utils/torch_utils.md @@ -19,6 +19,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere



+## ::: ultralytics.utils.torch_utils.FXModel + +



+ ## ::: ultralytics.utils.torch_utils.torch_distributed_zero_first



@@ -35,6 +39,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere



+## ::: ultralytics.utils.torch_utils.get_gpu_info + +



+ ## ::: ultralytics.utils.torch_utils.select_device



@@ -119,6 +127,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere



+## ::: ultralytics.utils.torch_utils.cuda_memory_usage + +



+ ## ::: ultralytics.utils.torch_utils.profile

diff --git a/docs/en/solutions/index.md b/docs/en/solutions/index.md index 52423c14f51..9414be65d8d 100644 --- a/docs/en/solutions/index.md +++ b/docs/en/solutions/index.md @@ -1,34 +1,72 @@ --- comments: true -description: Explore Ultralytics Solutions using YOLOv8 for object counting, blurring, security, and more. Enhance efficiency and solve real-world problems with cutting-edge AI. -keywords: Ultralytics, YOLOv8, object counting, object blurring, security systems, AI solutions, real-time analysis, computer vision applications +description: Explore Ultralytics Solutions using YOLO11 for object counting, blurring, security, and more. Enhance efficiency and solve real-world problems with cutting-edge AI. +keywords: Ultralytics, YOLO11, object counting, object blurring, security systems, AI solutions, real-time analysis, computer vision applications --- -# Ultralytics Solutions: Harness YOLOv8 to Solve Real-World Problems +# Ultralytics Solutions: Harness YOLO11 to Solve Real-World Problems -Ultralytics Solutions provide cutting-edge applications of YOLO models, offering real-world solutions like object counting, blurring, and security systems, enhancing efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) in diverse industries. Discover the power of YOLOv8 for practical, impactful implementations. +Ultralytics Solutions provide cutting-edge applications of YOLO models, offering real-world solutions like object counting, blurring, and security systems, enhancing efficiency and [accuracy](https://www.ultralytics.com/glossary/accuracy) in diverse industries. Discover the power of YOLO11 for practical, impactful implementations. ![Ultralytics Solutions Thumbnail](https://github.com/ultralytics/docs/releases/download/0/ultralytics-solutions-thumbnail.avif) +

+
+ +
+ Watch: How to Run Ultralytics Solutions from the Command Line (CLI) | Ultralytics YOLO11 ๐Ÿš€ +

+ ## Solutions Here's our curated list of Ultralytics solutions that can be used to create awesome [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects. -- [Object Counting](../guides/object-counting.md) ๐Ÿš€ NEW: Learn to perform real-time object counting with YOLOv8. Gain the expertise to accurately count objects in live video streams. -- [Object Cropping](../guides/object-cropping.md) ๐Ÿš€ NEW: Master object cropping with YOLOv8 for precise extraction of objects from images and videos. -- [Object Blurring](../guides/object-blurring.md) ๐Ÿš€ NEW: Apply object blurring using YOLOv8 to protect privacy in image and video processing. -- [Workouts Monitoring](../guides/workouts-monitoring.md) ๐Ÿš€ NEW: Discover how to monitor workouts using YOLOv8. Learn to track and analyze various fitness routines in real time. -- [Objects Counting in Regions](../guides/region-counting.md) ๐Ÿš€ NEW: Count objects in specific regions using YOLOv8 for accurate detection in varied areas. -- [Security Alarm System](../guides/security-alarm-system.md) ๐Ÿš€ NEW: Create a security alarm system with YOLOv8 that triggers alerts upon detecting new objects. Customize the system to fit your specific needs. -- [Heatmaps](../guides/heatmaps.md) ๐Ÿš€ NEW: Utilize detection heatmaps to visualize data intensity across a matrix, providing clear insights in computer vision tasks. -- [Instance Segmentation with Object Tracking](../guides/instance-segmentation-and-tracking.md) ๐Ÿš€ NEW: Implement [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) and object tracking with YOLOv8 to achieve precise object boundaries and continuous monitoring. -- [VisionEye View Objects Mapping](../guides/vision-eye.md) ๐Ÿš€ NEW: Develop systems that mimic human eye focus on specific objects, enhancing the computer's ability to discern and prioritize details. -- [Speed Estimation](../guides/speed-estimation.md) ๐Ÿš€ NEW: Estimate object speed using YOLOv8 and object tracking techniques, crucial for applications like autonomous vehicles and traffic monitoring. -- [Distance Calculation](../guides/distance-calculation.md) ๐Ÿš€ NEW: Calculate distances between objects using [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroids in YOLOv8, essential for spatial analysis. -- [Queue Management](../guides/queue-management.md) ๐Ÿš€ NEW: Implement efficient queue management systems to minimize wait times and improve productivity using YOLOv8. -- [Parking Management](../guides/parking-management.md) ๐Ÿš€ NEW: Organize and direct vehicle flow in parking areas with YOLOv8, optimizing space utilization and user experience. -- [Analytics](../guides/analytics.md) ๐Ÿ“Š NEW: Conduct comprehensive data analysis to discover patterns and make informed decisions, leveraging YOLOv8 for descriptive, predictive, and prescriptive analytics. -- [Live Inference with Streamlit](../guides/streamlit-live-inference.md) ๐Ÿš€ NEW: Leverage the power of YOLOv8 for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) directly through your web browser with a user-friendly Streamlit interface. +- [Object Counting](../guides/object-counting.md) ๐Ÿš€: Learn to perform real-time object counting with YOLO11. Gain the expertise to accurately count objects in live video streams. +- [Object Cropping](../guides/object-cropping.md) ๐Ÿš€: Master object cropping with YOLO11 for precise extraction of objects from images and videos. +- [Object Blurring](../guides/object-blurring.md) ๐Ÿš€: Apply object blurring using YOLO11 to protect privacy in image and video processing. +- [Workouts Monitoring](../guides/workouts-monitoring.md) ๐Ÿš€: Discover how to monitor workouts using YOLO11. Learn to track and analyze various fitness routines in real time. +- [Objects Counting in Regions](../guides/region-counting.md) ๐Ÿš€: Count objects in specific regions using YOLO11 for accurate detection in varied areas. +- [Security Alarm System](../guides/security-alarm-system.md) ๐Ÿš€: Create a security alarm system with YOLO11 that triggers alerts upon detecting new objects. Customize the system to fit your specific needs. +- [Heatmaps](../guides/heatmaps.md) ๐Ÿš€: Utilize detection heatmaps to visualize data intensity across a matrix, providing clear insights in computer vision tasks. +- [Instance Segmentation with Object Tracking](../guides/instance-segmentation-and-tracking.md) ๐Ÿš€ NEW: Implement [instance segmentation](https://www.ultralytics.com/glossary/instance-segmentation) and object tracking with YOLO11 to achieve precise object boundaries and continuous monitoring. +- [VisionEye View Objects Mapping](../guides/vision-eye.md) ๐Ÿš€: Develop systems that mimic human eye focus on specific objects, enhancing the computer's ability to discern and prioritize details. +- [Speed Estimation](../guides/speed-estimation.md) ๐Ÿš€: Estimate object speed using YOLO11 and object tracking techniques, crucial for applications like autonomous vehicles and traffic monitoring. +- [Distance Calculation](../guides/distance-calculation.md) ๐Ÿš€: Calculate distances between objects using [bounding box](https://www.ultralytics.com/glossary/bounding-box) centroids in YOLO11, essential for spatial analysis. +- [Queue Management](../guides/queue-management.md) ๐Ÿš€: Implement efficient queue management systems to minimize wait times and improve productivity using YOLO11. +- [Parking Management](../guides/parking-management.md) ๐Ÿš€: Organize and direct vehicle flow in parking areas with YOLO11, optimizing space utilization and user experience. +- [Analytics](../guides/analytics.md) ๐Ÿ“Š: Conduct comprehensive data analysis to discover patterns and make informed decisions, leveraging YOLO11 for descriptive, predictive, and prescriptive analytics. +- [Live Inference with Streamlit](../guides/streamlit-live-inference.md) ๐Ÿš€: Leverage the power of YOLO11 for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) directly through your web browser with a user-friendly Streamlit interface. +- [Track Objects in Zone](../guides/trackzone.md) ๐ŸŽฏ NEW: Learn how to track objects within specific zones of video frames using YOLO11 for precise and efficient monitoring. + +## Solutions Usage + +!!! tip "Command Info" + + `yolo SOLUTIONS SOLUTION_NAME ARGS` + + - **SOLUTIONS** is a required keyword. + - **SOLUTION_NAME** (optional) is one of: `['count', 'heatmap', 'queue', 'speed', 'workout', 'analytics', 'trackzone']`. + - **ARGS** (optional) are custom `arg=value` pairs, such as `show_in=True`, to override default settings. + + === "CLI" + + ```bash + yolo solutions count show=True # for object counting + + yolo solutions source="path/to/video/file.mp4" # specify video file path + ``` + +## Arguments + +!!! tip "Predict args" + + Solutions also support some of the arguments from `predict`, including parameters such as `conf`, `line_width`, `tracker`, `model`, `show`, and `classes`. + +{% include "macros/solutions-args.md" %} ## Contribute to Our Solutions @@ -42,20 +80,20 @@ Let's work together to make the Ultralytics YOLO ecosystem more robust and versa ### How can I use Ultralytics YOLO for real-time object counting? -Ultralytics YOLOv8 can be used for real-time object counting by leveraging its advanced object detection capabilities. You can follow our detailed guide on [Object Counting](../guides/object-counting.md) to set up YOLOv8 for live video stream analysis. Simply install YOLOv8, load your model, and process video frames to count objects dynamically. +Ultralytics YOLO11 can be used for real-time object counting by leveraging its advanced object detection capabilities. You can follow our detailed guide on [Object Counting](../guides/object-counting.md) to set up YOLO11 for live video stream analysis. Simply install YOLO11, load your model, and process video frames to count objects dynamically. ### What are the benefits of using Ultralytics YOLO for security systems? -Ultralytics YOLOv8 enhances security systems by offering real-time object detection and alert mechanisms. By employing YOLOv8, you can create a security alarm system that triggers alerts when new objects are detected in the surveillance area. Learn how to set up a [Security Alarm System](../guides/security-alarm-system.md) with YOLOv8 for robust security monitoring. +Ultralytics YOLO11 enhances security systems by offering real-time object detection and alert mechanisms. By employing YOLO11, you can create a security alarm system that triggers alerts when new objects are detected in the surveillance area. Learn how to set up a [Security Alarm System](../guides/security-alarm-system.md) with YOLO11 for robust security monitoring. ### How can Ultralytics YOLO improve queue management systems? -Ultralytics YOLOv8 can significantly improve queue management systems by accurately counting and tracking people in queues, thus helping to reduce wait times and optimize service efficiency. Follow our detailed guide on [Queue Management](../guides/queue-management.md) to learn how to implement YOLOv8 for effective queue monitoring and analysis. +Ultralytics YOLO11 can significantly improve queue management systems by accurately counting and tracking people in queues, thus helping to reduce wait times and optimize service efficiency. Follow our detailed guide on [Queue Management](../guides/queue-management.md) to learn how to implement YOLO11 for effective queue monitoring and analysis. ### Can Ultralytics YOLO be used for workout monitoring? -Yes, Ultralytics YOLOv8 can be effectively used for monitoring workouts by tracking and analyzing fitness routines in real-time. This allows for precise evaluation of exercise form and performance. Explore our guide on [Workouts Monitoring](../guides/workouts-monitoring.md) to learn how to set up an AI-powered workout monitoring system using YOLOv8. +Yes, Ultralytics YOLO11 can be effectively used for monitoring workouts by tracking and analyzing fitness routines in real-time. This allows for precise evaluation of exercise form and performance. Explore our guide on [Workouts Monitoring](../guides/workouts-monitoring.md) to learn how to set up an AI-powered workout monitoring system using YOLO11. ### How does Ultralytics YOLO help in creating heatmaps for [data visualization](https://www.ultralytics.com/glossary/data-visualization)? -Ultralytics YOLOv8 can generate heatmaps to visualize data intensity across a given area, highlighting regions of high activity or interest. This feature is particularly useful in understanding patterns and trends in various computer vision tasks. Learn more about creating and using [Heatmaps](../guides/heatmaps.md) with YOLOv8 for comprehensive data analysis and visualization. +Ultralytics YOLO11 can generate heatmaps to visualize data intensity across a given area, highlighting regions of high activity or interest. This feature is particularly useful in understanding patterns and trends in various computer vision tasks. Learn more about creating and using [Heatmaps](../guides/heatmaps.md) with YOLO11 for comprehensive data analysis and visualization. diff --git a/docs/en/tasks/classify.md b/docs/en/tasks/classify.md index 62623a403fb..7da8a3e7e7c 100644 --- a/docs/en/tasks/classify.md +++ b/docs/en/tasks/classify.md @@ -1,8 +1,8 @@ --- comments: true -description: Master image classification using YOLOv8. Learn to train, validate, predict, and export models efficiently. -keywords: YOLOv8, image classification, AI, machine learning, pretrained models, ImageNet, model export, predict, train, validate -model_name: yolov8n-cls +description: Master image classification using YOLO11. Learn to train, validate, predict, and export models efficiently. +keywords: YOLO11, image classification, AI, machine learning, pretrained models, ImageNet, model export, predict, train, validate +model_name: yolo11n-cls --- # Image Classification @@ -26,28 +26,22 @@ The output of an image classifier is a single class label and a confidence score !!! tip - YOLOv8 Classify models use the `-cls` suffix, i.e. `yolov8n-cls.pt` and are pretrained on [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). + YOLO11 Classify models use the `-cls` suffix, i.e. `yolo11n-cls.pt` and are pretrained on [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml). -## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) -YOLOv8 pretrained Classify models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. +YOLO11 pretrained Classify models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. -| Model | size
(pixels) | acc
top1 | acc
top5 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) at 640 | -| -------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | ------------------------------ | ----------------------------------- | ------------------ | ------------------------ | -| [YOLOv8n-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt) | 224 | 69.0 | 88.3 | 12.9 | 0.31 | 2.7 | 4.3 | -| [YOLOv8s-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-cls.pt) | 224 | 73.8 | 91.7 | 23.4 | 0.35 | 6.4 | 13.5 | -| [YOLOv8m-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-cls.pt) | 224 | 76.8 | 93.5 | 85.4 | 0.62 | 17.0 | 42.7 | -| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-cls.pt) | 224 | 76.8 | 93.5 | 163.0 | 0.87 | 37.5 | 99.7 | -| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-cls.pt) | 224 | 79.0 | 94.6 | 232.0 | 1.01 | 57.4 | 154.8 | +{% include "macros/yolo-cls-perf.md" %} - **acc** values are model accuracies on the [ImageNet](https://www.image-net.org/) dataset validation set.
Reproduce by `yolo val classify data=path/to/ImageNet device=0` - **Speed** averaged over ImageNet val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val classify data=path/to/ImageNet batch=1 device=0|cpu` ## Train -Train YOLOv8n-cls on the MNIST160 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 64. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. +Train YOLO11n-cls on the MNIST160 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 64. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. !!! example @@ -57,9 +51,9 @@ Train YOLOv8n-cls on the MNIST160 dataset for 100 [epochs](https://www.ultralyti from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.yaml") # build a new model from YAML - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) - model = YOLO("yolov8n-cls.yaml").load("yolov8n-cls.pt") # build from YAML and transfer weights + model = YOLO("yolo11n-cls.yaml") # build a new model from YAML + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.yaml").load("yolo11n-cls.pt") # build from YAML and transfer weights # Train the model results = model.train(data="mnist160", epochs=100, imgsz=64) @@ -69,13 +63,13 @@ Train YOLOv8n-cls on the MNIST160 dataset for 100 [epochs](https://www.ultralyti ```bash # Build a new model from YAML and start training from scratch - yolo classify train data=mnist160 model=yolov8n-cls.yaml epochs=100 imgsz=64 + yolo classify train data=mnist160 model=yolo11n-cls.yaml epochs=100 imgsz=64 # Start training from a pretrained *.pt model - yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + yolo classify train data=mnist160 model=yolo11n-cls.pt epochs=100 imgsz=64 # Build a new model from YAML, transfer pretrained weights to it and start training - yolo classify train data=mnist160 model=yolov8n-cls.yaml pretrained=yolov8n-cls.pt epochs=100 imgsz=64 + yolo classify train data=mnist160 model=yolo11n-cls.yaml pretrained=yolo11n-cls.pt epochs=100 imgsz=64 ``` ### Dataset format @@ -84,7 +78,7 @@ YOLO classification dataset format can be found in detail in the [Dataset Guide] ## Val -Validate trained YOLOv8n-cls model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the MNIST160 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n-cls model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the MNIST160 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example @@ -94,7 +88,7 @@ Validate trained YOLOv8n-cls model [accuracy](https://www.ultralytics.com/glossa from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load an official model + model = YOLO("yolo11n-cls.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -106,13 +100,13 @@ Validate trained YOLOv8n-cls model [accuracy](https://www.ultralytics.com/glossa === "CLI" ```bash - yolo classify val model=yolov8n-cls.pt # val official model + yolo classify val model=yolo11n-cls.pt # val official model yolo classify val model=path/to/best.pt # val custom model ``` ## Predict -Use a trained YOLOv8n-cls model to run predictions on images. +Use a trained YOLO11n-cls model to run predictions on images. !!! example @@ -122,7 +116,7 @@ Use a trained YOLOv8n-cls model to run predictions on images. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load an official model + model = YOLO("yolo11n-cls.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Predict with the model @@ -132,7 +126,7 @@ Use a trained YOLOv8n-cls model to run predictions on images. === "CLI" ```bash - yolo classify predict model=yolov8n-cls.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo classify predict model=yolo11n-cls.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` @@ -140,7 +134,7 @@ See full `predict` mode details in the [Predict](../modes/predict.md) page. ## Export -Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. +Export a YOLO11n-cls model to a different format like ONNX, CoreML, etc. !!! example @@ -150,7 +144,7 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load an official model + model = YOLO("yolo11n-cls.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -160,11 +154,11 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc. === "CLI" ```bash - yolo export model=yolov8n-cls.pt format=onnx # export official model + yolo export model=yolo11n-cls.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8-cls export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes. +Available YOLO11-cls export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolo11n-cls.onnx`. Usage examples are shown for your model after export completes. {% include "macros/export-table.md" %} @@ -172,13 +166,13 @@ See full `export` details in the [Export](../modes/export.md) page. ## FAQ -### What is the purpose of YOLOv8 in image classification? +### What is the purpose of YOLO11 in image classification? -YOLOv8 models, such as `yolov8n-cls.pt`, are designed for efficient image classification. They assign a single class label to an entire image along with a confidence score. This is particularly useful for applications where knowing the specific class of an image is sufficient, rather than identifying the location or shape of objects within the image. +YOLO11 models, such as `yolo11n-cls.pt`, are designed for efficient image classification. They assign a single class label to an entire image along with a confidence score. This is particularly useful for applications where knowing the specific class of an image is sufficient, rather than identifying the location or shape of objects within the image. -### How do I train a YOLOv8 model for image classification? +### How do I train a YOLO11 model for image classification? -To train a YOLOv8 model, you can use either Python or CLI commands. For example, to train a `yolov8n-cls` model on the MNIST160 dataset for 100 epochs at an image size of 64: +To train a YOLO11 model, you can use either Python or CLI commands. For example, to train a `yolo11n-cls` model on the MNIST160 dataset for 100 epochs at an image size of 64: !!! example @@ -188,7 +182,7 @@ To train a YOLOv8 model, you can use either Python or CLI commands. For example, from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-cls.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="mnist160", epochs=100, imgsz=64) @@ -197,18 +191,18 @@ To train a YOLOv8 model, you can use either Python or CLI commands. For example, === "CLI" ```bash - yolo classify train data=mnist160 model=yolov8n-cls.pt epochs=100 imgsz=64 + yolo classify train data=mnist160 model=yolo11n-cls.pt epochs=100 imgsz=64 ``` For more configuration options, visit the [Configuration](../usage/cfg.md) page. -### Where can I find pretrained YOLOv8 classification models? +### Where can I find pretrained YOLO11 classification models? -Pretrained YOLOv8 classification models can be found in the [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) section. Models like `yolov8n-cls.pt`, `yolov8s-cls.pt`, `yolov8m-cls.pt`, etc., are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset and can be easily downloaded and used for various image classification tasks. +Pretrained YOLO11 classification models can be found in the [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) section. Models like `yolo11n-cls.pt`, `yolo11s-cls.pt`, `yolo11m-cls.pt`, etc., are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset and can be easily downloaded and used for various image classification tasks. -### How can I export a trained YOLOv8 model to different formats? +### How can I export a trained YOLO11 model to different formats? -You can export a trained YOLOv8 model to various formats using Python or CLI commands. For instance, to export a model to ONNX format: +You can export a trained YOLO11 model to various formats using Python or CLI commands. For instance, to export a model to ONNX format: !!! example @@ -218,7 +212,7 @@ You can export a trained YOLOv8 model to various formats using Python or CLI com from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load the trained model + model = YOLO("yolo11n-cls.pt") # load the trained model # Export the model to ONNX model.export(format="onnx") @@ -227,12 +221,12 @@ You can export a trained YOLOv8 model to various formats using Python or CLI com === "CLI" ```bash - yolo export model=yolov8n-cls.pt format=onnx # export the trained model to ONNX format + yolo export model=yolo11n-cls.pt format=onnx # export the trained model to ONNX format ``` For detailed export options, refer to the [Export](../modes/export.md) page. -### How do I validate a trained YOLOv8 classification model? +### How do I validate a trained YOLO11 classification model? To validate a trained model's accuracy on a dataset like MNIST160, you can use the following Python or CLI commands: @@ -244,7 +238,7 @@ To validate a trained model's accuracy on a dataset like MNIST160, you can use t from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-cls.pt") # load the trained model + model = YOLO("yolo11n-cls.pt") # load the trained model # Validate the model metrics = model.val() # no arguments needed, uses the dataset and settings from training @@ -255,7 +249,7 @@ To validate a trained model's accuracy on a dataset like MNIST160, you can use t === "CLI" ```bash - yolo classify val model=yolov8n-cls.pt # validate the trained model + yolo classify val model=yolo11n-cls.pt # validate the trained model ``` For more information, visit the [Validate](#val) section. diff --git a/docs/en/tasks/detect.md b/docs/en/tasks/detect.md index a9c54889bc4..68c53f075b2 100644 --- a/docs/en/tasks/detect.md +++ b/docs/en/tasks/detect.md @@ -1,7 +1,7 @@ --- comments: true -description: Learn about object detection with YOLOv8. Explore pretrained models, training, validation, prediction, and export details for efficient object recognition. -keywords: object detection, YOLOv8, pretrained models, training, validation, prediction, export, machine learning, computer vision +description: Learn about object detection with YOLO11. Explore pretrained models, training, validation, prediction, and export details for efficient object recognition. +keywords: object detection, YOLO11, pretrained models, training, validation, prediction, export, machine learning, computer vision --- # Object Detection @@ -20,33 +20,27 @@ The output of an object detector is a set of bounding boxes that enclose the obj allowfullscreen>
- Watch: Object Detection with Pre-trained Ultralytics YOLOv8 Model. + Watch: Object Detection with Pre-trained Ultralytics YOLO Model.

!!! tip - YOLOv8 Detect models are the default YOLOv8 models, i.e. `yolov8n.pt` and are pretrained on [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + YOLO11 Detect models are the default YOLO11 models, i.e. `yolo11n.pt` and are pretrained on [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). -## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) -YOLOv8 pretrained Detect models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. +YOLO11 pretrained Detect models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. -| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ------------------------------------------------------------------------------------ | --------------------- | -------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) | 640 | 37.3 | 80.4 | 0.99 | 3.2 | 8.7 | -| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) | 640 | 44.9 | 128.4 | 1.20 | 11.2 | 28.6 | -| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) | 640 | 50.2 | 234.7 | 1.83 | 25.9 | 78.9 | -| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt) | 640 | 52.9 | 375.2 | 2.39 | 43.7 | 165.2 | -| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt) | 640 | 53.9 | 479.1 | 3.53 | 68.2 | 257.8 | +{% include "macros/yolo-det-perf.md" %} - **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val detect data=coco.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val detect data=coco8.yaml batch=1 device=0|cpu` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val detect data=coco.yaml batch=1 device=0|cpu` ## Train -Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. +Train YOLO11n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. !!! example @@ -56,9 +50,9 @@ Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.yaml") # build a new model from YAML - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) - model = YOLO("yolov8n.yaml").load("yolov8n.pt") # build from YAML and transfer weights + model = YOLO("yolo11n.yaml") # build a new model from YAML + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.yaml").load("yolo11n.pt") # build from YAML and transfer weights # Train the model results = model.train(data="coco8.yaml", epochs=100, imgsz=640) @@ -68,13 +62,13 @@ Train YOLOv8n on the COCO8 dataset for 100 [epochs](https://www.ultralytics.com/ ```bash # Build a new model from YAML and start training from scratch - yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.yaml epochs=100 imgsz=640 # Start training from a pretrained *.pt model - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 # Build a new model from YAML, transfer pretrained weights to it and start training - yolo detect train data=coco8.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.yaml pretrained=yolo11n.pt epochs=100 imgsz=640 ``` ### Dataset format @@ -83,7 +77,7 @@ YOLO detection dataset format can be found in detail in the [Dataset Guide](../d ## Val -Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example @@ -93,7 +87,7 @@ Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/a from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -107,13 +101,13 @@ Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/a === "CLI" ```bash - yolo detect val model=yolov8n.pt # val official model + yolo detect val model=yolo11n.pt # val official model yolo detect val model=path/to/best.pt # val custom model ``` ## Predict -Use a trained YOLOv8n model to run predictions on images. +Use a trained YOLO11n model to run predictions on images. !!! example @@ -123,7 +117,7 @@ Use a trained YOLOv8n model to run predictions on images. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Predict with the model @@ -133,7 +127,7 @@ Use a trained YOLOv8n model to run predictions on images. === "CLI" ```bash - yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo detect predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` @@ -141,7 +135,7 @@ See full `predict` mode details in the [Predict](../modes/predict.md) page. ## Export -Export a YOLOv8n model to a different format like ONNX, CoreML, etc. +Export a YOLO11n model to a different format like ONNX, CoreML, etc. !!! example @@ -151,7 +145,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official model + model = YOLO("yolo11n.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -161,11 +155,11 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx # export official model + yolo export model=yolo11n.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes. +Available YOLO11 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolo11n.onnx`. Usage examples are shown for your model after export completes. {% include "macros/export-table.md" %} @@ -173,9 +167,9 @@ See full `export` details in the [Export](../modes/export.md) page. ## FAQ -### How do I train a YOLOv8 model on my custom dataset? +### How do I train a YOLO11 model on my custom dataset? -Training a YOLOv8 model on a custom dataset involves a few steps: +Training a YOLO11 model on a custom dataset involves a few steps: 1. **Prepare the Dataset**: Ensure your dataset is in the YOLO format. For guidance, refer to our [Dataset Guide](../datasets/detect/index.md). 2. **Load the Model**: Use the Ultralytics YOLO library to load a pre-trained model or create a new model from a YAML file. @@ -189,7 +183,7 @@ Training a YOLOv8 model on a custom dataset involves a few steps: from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model on your custom dataset model.train(data="my_custom_dataset.yaml", epochs=100, imgsz=640) @@ -198,26 +192,26 @@ Training a YOLOv8 model on a custom dataset involves a few steps: === "CLI" ```bash - yolo detect train data=my_custom_dataset.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=my_custom_dataset.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` For detailed configuration options, visit the [Configuration](../usage/cfg.md) page. -### What pretrained models are available in YOLOv8? +### What pretrained models are available in YOLO11? -Ultralytics YOLOv8 offers various pretrained models for object detection, segmentation, and pose estimation. These models are pretrained on the COCO dataset or ImageNet for classification tasks. Here are some of the available models: +Ultralytics YOLO11 offers various pretrained models for object detection, segmentation, and pose estimation. These models are pretrained on the COCO dataset or ImageNet for classification tasks. Here are some of the available models: -- [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) -- [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) -- [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) -- [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt) -- [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt) +- [YOLO11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) +- [YOLO11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) +- [YOLO11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) +- [YOLO11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) +- [YOLO11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) -For a detailed list and performance metrics, refer to the [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) section. +For a detailed list and performance metrics, refer to the [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) section. -### How can I validate the accuracy of my trained YOLOv8 model? +### How can I validate the accuracy of my trained YOLO model? -To validate the accuracy of your trained YOLOv8 model, you can use the `.val()` method in Python or the `yolo detect val` command in CLI. This will provide metrics like mAP50-95, mAP50, and more. +To validate the accuracy of your trained YOLO11 model, you can use the `.val()` method in Python or the `yolo detect val` command in CLI. This will provide metrics like mAP50-95, mAP50, and more. !!! example @@ -242,9 +236,9 @@ To validate the accuracy of your trained YOLOv8 model, you can use the `.val()` For more validation details, visit the [Val](../modes/val.md) page. -### What formats can I export a YOLOv8 model to? +### What formats can I export a YOLO11 model to? -Ultralytics YOLOv8 allows exporting models to various formats such as ONNX, TensorRT, CoreML, and more to ensure compatibility across different platforms and devices. +Ultralytics YOLO11 allows exporting models to various formats such as ONNX, TensorRT, CoreML, and more to ensure compatibility across different platforms and devices. !!! example @@ -254,7 +248,7 @@ Ultralytics YOLOv8 allows exporting models to various formats such as ONNX, Tens from ultralytics import YOLO # Load the model - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Export the model to ONNX format model.export(format="onnx") @@ -263,18 +257,18 @@ Ultralytics YOLOv8 allows exporting models to various formats such as ONNX, Tens === "CLI" ```bash - yolo export model=yolov8n.pt format=onnx + yolo export model=yolo11n.pt format=onnx ``` Check the full list of supported formats and instructions on the [Export](../modes/export.md) page. -### Why should I use Ultralytics YOLOv8 for object detection? +### Why should I use Ultralytics YOLO11 for object detection? -Ultralytics YOLOv8 is designed to offer state-of-the-art performance for object detection, segmentation, and pose estimation. Here are some key advantages: +Ultralytics YOLO11 is designed to offer state-of-the-art performance for object detection, segmentation, and pose estimation. Here are some key advantages: 1. **Pretrained Models**: Utilize models pretrained on popular datasets like COCO and ImageNet for faster development. 2. **High Accuracy**: Achieves impressive mAP scores, ensuring reliable object detection. 3. **Speed**: Optimized for real-time inference, making it ideal for applications requiring swift processing. 4. **Flexibility**: Export models to various formats like ONNX and TensorRT for deployment across multiple platforms. -Explore our [Blog](https://www.ultralytics.com/blog) for use cases and success stories showcasing YOLOv8 in action. +Explore our [Blog](https://www.ultralytics.com/blog) for use cases and success stories showcasing YOLO11 in action. diff --git a/docs/en/tasks/index.md b/docs/en/tasks/index.md index 016a1ca9b9a..d71825fa8e3 100644 --- a/docs/en/tasks/index.md +++ b/docs/en/tasks/index.md @@ -1,15 +1,14 @@ --- comments: true -description: Explore Ultralytics YOLOv8 for detection, segmentation, classification, OBB, and pose estimation with high accuracy and speed. Learn how to apply each task. -keywords: Ultralytics YOLOv8, detection, segmentation, classification, oriented object detection, pose estimation, computer vision, AI framework +description: Explore Ultralytics YOLO11 for detection, segmentation, classification, OBB, and pose estimation with high accuracy and speed. Learn how to apply each task. +keywords: Ultralytics YOLO11, detection, segmentation, classification, oriented object detection, pose estimation, computer vision, AI framework --- -# Ultralytics YOLOv8 Tasks +# Ultralytics YOLO11 Tasks -
Ultralytics YOLO supported tasks -YOLOv8 is an AI framework that supports multiple [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) **tasks**. The framework can be used to perform [detection](detect.md), [segmentation](segment.md), [obb](obb.md), [classification](classify.md), and [pose](pose.md) estimation. Each of these tasks has a different objective and use case. +YOLO11 is an AI framework that supports multiple [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) **tasks**. The framework can be used to perform [detection](detect.md), [segmentation](segment.md), [obb](obb.md), [classification](classify.md), and [pose](pose.md) estimation. Each of these tasks has a different objective and use case.


@@ -19,48 +18,48 @@ YOLOv8 is an AI framework that supports multiple [computer vision](https://www.u allowfullscreen>
- Watch: Explore Ultralytics YOLO Tasks: [Object Detection](https://www.ultralytics.com/glossary/object-detection), Segmentation, OBB, Tracking, and Pose Estimation. + Watch: Explore Ultralytics YOLO Tasks: Object Detection, Segmentation, OBB, Tracking, and Pose Estimation.

## [Detection](detect.md) -Detection is the primary task supported by YOLOv8. It involves detecting objects in an image or video frame and drawing bounding boxes around them. The detected objects are classified into different categories based on their features. YOLOv8 can detect multiple objects in a single image or video frame with high [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. +Detection is the primary task supported by YOLO11. It involves detecting objects in an image or video frame and drawing bounding boxes around them. The detected objects are classified into different categories based on their features. YOLO11 can detect multiple objects in a single image or video frame with high [accuracy](https://www.ultralytics.com/glossary/accuracy) and speed. [Detection Examples](detect.md){ .md-button } ## [Segmentation](segment.md) -Segmentation is a task that involves segmenting an image into different regions based on the content of the image. Each region is assigned a label based on its content. This task is useful in applications such as [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) and medical imaging. YOLOv8 uses a variant of the U-Net architecture to perform segmentation. +Segmentation is a task that involves segmenting an image into different regions based on the content of the image. Each region is assigned a label based on its content. This task is useful in applications such as [image segmentation](https://www.ultralytics.com/glossary/image-segmentation) and medical imaging. YOLO11 uses a variant of the U-Net architecture to perform segmentation. [Segmentation Examples](segment.md){ .md-button } ## [Classification](classify.md) -Classification is a task that involves classifying an image into different categories. YOLOv8 can be used to classify images based on their content. It uses a variant of the EfficientNet architecture to perform classification. +Classification is a task that involves classifying an image into different categories. YOLO11 can be used to classify images based on their content. It uses a variant of the EfficientNet architecture to perform classification. [Classification Examples](classify.md){ .md-button } ## [Pose](pose.md) -Pose/keypoint detection is a task that involves detecting specific points in an image or video frame. These points are referred to as keypoints and are used to track movement or pose estimation. YOLOv8 can detect keypoints in an image or video frame with high accuracy and speed. +Pose/keypoint detection is a task that involves detecting specific points in an image or video frame. These points are referred to as keypoints and are used to track movement or pose estimation. YOLO11 can detect keypoints in an image or video frame with high accuracy and speed. [Pose Examples](pose.md){ .md-button } ## [OBB](obb.md) -Oriented object detection goes a step further than regular object detection with introducing an extra angle to locate objects more accurate in an image. YOLOv8 can detect rotated objects in an image or video frame with high accuracy and speed. +Oriented object detection goes a step further than regular object detection with introducing an extra angle to locate objects more accurate in an image. YOLO11 can detect rotated objects in an image or video frame with high accuracy and speed. [Oriented Detection](obb.md){ .md-button } ## Conclusion -YOLOv8 supports multiple tasks, including detection, segmentation, classification, oriented object detection and keypoints detection. Each of these tasks has different objectives and use cases. By understanding the differences between these tasks, you can choose the appropriate task for your computer vision application. +YOLO11 supports multiple tasks, including detection, segmentation, classification, oriented object detection and keypoints detection. Each of these tasks has different objectives and use cases. By understanding the differences between these tasks, you can choose the appropriate task for your computer vision application. ## FAQ -### What tasks can Ultralytics YOLOv8 perform? +### What tasks can Ultralytics YOLO11 perform? -Ultralytics YOLOv8 is a versatile AI framework capable of performing various computer vision tasks with high accuracy and speed. These tasks include: +Ultralytics YOLO11 is a versatile AI framework capable of performing various computer vision tasks with high accuracy and speed. These tasks include: - **[Detection](detect.md):** Identifying and localizing objects in images or video frames by drawing bounding boxes around them. - **[Segmentation](segment.md):** Segmenting images into different regions based on their content, useful for applications like medical imaging. @@ -68,12 +67,12 @@ Ultralytics YOLOv8 is a versatile AI framework capable of performing various com - **[Pose estimation](pose.md):** Detecting specific keypoints in an image or video frame to track movements or poses. - **[Oriented Object Detection (OBB)](obb.md):** Detecting rotated objects with an added orientation angle for enhanced accuracy. -### How do I use Ultralytics YOLOv8 for object detection? +### How do I use Ultralytics YOLO11 for object detection? -To use Ultralytics YOLOv8 for object detection, follow these steps: +To use Ultralytics YOLO11 for object detection, follow these steps: 1. Prepare your dataset in the appropriate format. -2. Train the YOLOv8 model using the detection task. +2. Train the YOLO11 model using the detection task. 3. Use the model to make predictions by feeding in new images or video frames. !!! example @@ -83,38 +82,44 @@ To use Ultralytics YOLOv8 for object detection, follow these steps: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") # Load pre-trained model - results = model.predict(source="image.jpg") # Perform object detection - results[0].show() + # Load a pre-trained YOLO model (adjust model type as needed) + model = YOLO("yolo11n.pt") # n, s, m, l, x versions available + + # Perform object detection on an image + results = model.predict(source="image.jpg") # Can also use video, directory, URL, etc. + + # Display the results + results[0].show() # Show the first image results ``` === "CLI" ```bash - yolo detect model=yolov8n.pt source='image.jpg' + # Run YOLO detection from the command line + yolo detect model=yolo11n.pt source="image.jpg" # Adjust model and source as needed ``` For more detailed instructions, check out our [detection examples](detect.md). -### What are the benefits of using YOLOv8 for segmentation tasks? +### What are the benefits of using YOLO11 for segmentation tasks? -Using YOLOv8 for segmentation tasks provides several advantages: +Using YOLO11 for segmentation tasks provides several advantages: 1. **High Accuracy:** The segmentation task leverages a variant of the U-Net architecture to achieve precise segmentation. -2. **Speed:** YOLOv8 is optimized for real-time applications, offering quick processing even for high-resolution images. +2. **Speed:** YOLO11 is optimized for real-time applications, offering quick processing even for high-resolution images. 3. **Multiple Applications:** It is ideal for medical imaging, autonomous driving, and other applications requiring detailed image segmentation. -Learn more about the benefits and use cases of YOLOv8 for segmentation in the [segmentation section](segment.md). +Learn more about the benefits and use cases of YOLO11 for segmentation in the [segmentation section](segment.md). -### Can Ultralytics YOLOv8 handle pose estimation and keypoint detection? +### Can Ultralytics YOLO11 handle pose estimation and keypoint detection? -Yes, Ultralytics YOLOv8 can effectively perform pose estimation and keypoint detection with high accuracy and speed. This feature is particularly useful for tracking movements in sports analytics, healthcare, and human-computer interaction applications. YOLOv8 detects keypoints in an image or video frame, allowing for precise pose estimation. +Yes, Ultralytics YOLO11 can effectively perform pose estimation and keypoint detection with high accuracy and speed. This feature is particularly useful for tracking movements in sports analytics, healthcare, and human-computer interaction applications. YOLO11 detects keypoints in an image or video frame, allowing for precise pose estimation. For more details and implementation tips, visit our [pose estimation examples](pose.md). -### Why should I choose Ultralytics YOLOv8 for oriented object detection (OBB)? +### Why should I choose Ultralytics YOLO11 for oriented object detection (OBB)? -Oriented Object Detection (OBB) with YOLOv8 provides enhanced [precision](https://www.ultralytics.com/glossary/precision) by detecting objects with an additional angle parameter. This feature is beneficial for applications requiring accurate localization of rotated objects, such as aerial imagery analysis and warehouse automation. +Oriented Object Detection (OBB) with YOLO11 provides enhanced [precision](https://www.ultralytics.com/glossary/precision) by detecting objects with an additional angle parameter. This feature is beneficial for applications requiring accurate localization of rotated objects, such as aerial imagery analysis and warehouse automation. - **Increased Precision:** The angle component reduces false positives for rotated objects. - **Versatile Applications:** Useful for tasks in geospatial analysis, robotics, etc. diff --git a/docs/en/tasks/obb.md b/docs/en/tasks/obb.md index 9175d827863..621ffc783dd 100644 --- a/docs/en/tasks/obb.md +++ b/docs/en/tasks/obb.md @@ -1,8 +1,8 @@ --- comments: true -description: Discover how to detect objects with rotation for higher precision using YOLOv8 OBB models. Learn, train, validate, and export OBB models effortlessly. -keywords: Oriented Bounding Boxes, OBB, Object Detection, YOLOv8, Ultralytics, DOTAv1, Model Training, Model Export, AI, Machine Learning -model_name: yolov8n-obb +description: Discover how to detect objects with rotation for higher precision using YOLO11 OBB models. Learn, train, validate, and export OBB models effortlessly. +keywords: Oriented Bounding Boxes, OBB, Object Detection, YOLO11, Ultralytics, DOTAv1, Model Training, Model Export, AI, Machine Learning +model_name: yolo11n-obb --- # Oriented Bounding Boxes [Object Detection](https://www.ultralytics.com/glossary/object-detection) @@ -17,7 +17,7 @@ The output of an oriented object detector is a set of rotated bounding boxes tha !!! tip - YOLOv8 OBB models use the `-obb` suffix, i.e. `yolov8n-obb.pt` and are pretrained on [DOTAv1](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/DOTAv1.yaml). + YOLO11 OBB models use the `-obb` suffix, i.e. `yolo11n-obb.pt` and are pretrained on [DOTAv1](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/DOTAv1.yaml).


@@ -27,7 +27,7 @@ The output of an oriented object detector is a set of rotated bounding boxes tha allowfullscreen>
- Watch: Object Detection using Ultralytics YOLOv8 Oriented Bounding Boxes (YOLOv8-OBB) + Watch: Object Detection using Ultralytics YOLO Oriented Bounding Boxes (YOLO-OBB)

## Visual Samples @@ -36,26 +36,20 @@ The output of an oriented object detector is a set of rotated bounding boxes tha | :------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------: | | ![Ships Detection using OBB](https://github.com/ultralytics/docs/releases/download/0/ships-detection-using-obb.avif) | ![Vehicle Detection using OBB](https://github.com/ultralytics/docs/releases/download/0/vehicle-detection-using-obb.avif) | -## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) -YOLOv8 pretrained OBB models are shown here, which are pretrained on the [DOTAv1](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/DOTAv1.yaml) dataset. +YOLO11 pretrained OBB models are shown here, which are pretrained on the [DOTAv1](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/DOTAv1.yaml) dataset. [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. -| Model | size
(pixels) | mAPtest
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-obb](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-obb.pt) | 1024 | 78.0 | 204.77 | 3.57 | 3.1 | 23.3 | -| [YOLOv8s-obb](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-obb.pt) | 1024 | 79.5 | 424.88 | 4.07 | 11.4 | 76.3 | -| [YOLOv8m-obb](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-obb.pt) | 1024 | 80.5 | 763.48 | 7.61 | 26.4 | 208.6 | -| [YOLOv8l-obb](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-obb.pt) | 1024 | 80.7 | 1278.42 | 11.83 | 44.5 | 433.8 | -| [YOLOv8x-obb](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-obb.pt) | 1024 | 81.36 | 1759.10 | 13.23 | 69.5 | 676.7 | +{% include "macros/yolo-obb-perf.md" %} -- **mAPtest** values are for single-model multiscale on [DOTAv1 test](https://captain-whu.github.io/DOTA/index.html) dataset.
Reproduce by `yolo val obb data=DOTAv1.yaml device=0 split=test` and submit merged results to [DOTA evaluation](https://captain-whu.github.io/DOTA/evaluation.html). +- **mAPtest** values are for single-model multiscale on [DOTAv1](https://captain-whu.github.io/DOTA/index.html) dataset.
Reproduce by `yolo val obb data=DOTAv1.yaml device=0 split=test` and submit merged results to [DOTA evaluation](https://captain-whu.github.io/DOTA/evaluation.html). - **Speed** averaged over DOTAv1 val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val obb data=DOTAv1.yaml batch=1 device=0|cpu` ## Train -Train YOLOv8n-obb on the `dota8.yaml` dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. +Train YOLO11n-obb on the DOTA8 dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. !!! example @@ -65,9 +59,9 @@ Train YOLOv8n-obb on the `dota8.yaml` dataset for 100 [epochs](https://www.ultra from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.yaml") # build a new model from YAML - model = YOLO("yolov8n-obb.pt") # load a pretrained model (recommended for training) - model = YOLO("yolov8n-obb.yaml").load("yolov8n.pt") # build from YAML and transfer weights + model = YOLO("yolo11n-obb.yaml") # build a new model from YAML + model = YOLO("yolo11n-obb.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-obb.yaml").load("yolo11n.pt") # build from YAML and transfer weights # Train the model results = model.train(data="dota8.yaml", epochs=100, imgsz=640) @@ -77,13 +71,13 @@ Train YOLOv8n-obb on the `dota8.yaml` dataset for 100 [epochs](https://www.ultra ```bash # Build a new model from YAML and start training from scratch - yolo obb train data=dota8.yaml model=yolov8n-obb.yaml epochs=100 imgsz=640 + yolo obb train data=dota8.yaml model=yolo11n-obb.yaml epochs=100 imgsz=640 # Start training from a pretrained *.pt model - yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + yolo obb train data=dota8.yaml model=yolo11n-obb.pt epochs=100 imgsz=640 # Build a new model from YAML, transfer pretrained weights to it and start training - yolo obb train data=dota8.yaml model=yolov8n-obb.yaml pretrained=yolov8n-obb.pt epochs=100 imgsz=640 + yolo obb train data=dota8.yaml model=yolo11n-obb.yaml pretrained=yolo11n-obb.pt epochs=100 imgsz=640 ```

@@ -94,7 +88,7 @@ Train YOLOv8n-obb on the `dota8.yaml` dataset for 100 [epochs](https://www.ultra allowfullscreen>
- Watch: How to Train Ultralytics YOLOv8-OBB (Oriented Bounding Boxes) Models on DOTA Dataset using Ultralytics HUB + Watch: How to Train Ultralytics YOLO-OBB (Oriented Bounding Boxes) Models on DOTA Dataset using Ultralytics HUB

### Dataset format @@ -103,7 +97,7 @@ OBB dataset format can be found in detail in the [Dataset Guide](../datasets/obb ## Val -Validate trained YOLOv8n-obb model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the DOTA8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n-obb model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the DOTA8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example @@ -113,7 +107,7 @@ Validate trained YOLOv8n-obb model [accuracy](https://www.ultralytics.com/glossa from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") # load an official model + model = YOLO("yolo11n-obb.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -127,13 +121,13 @@ Validate trained YOLOv8n-obb model [accuracy](https://www.ultralytics.com/glossa === "CLI" ```bash - yolo obb val model=yolov8n-obb.pt data=dota8.yaml # val official model + yolo obb val model=yolo11n-obb.pt data=dota8.yaml # val official model yolo obb val model=path/to/best.pt data=path/to/data.yaml # val custom model ``` ## Predict -Use a trained YOLOv8n-obb model to run predictions on images. +Use a trained YOLO11n-obb model to run predictions on images. !!! example @@ -143,18 +137,18 @@ Use a trained YOLOv8n-obb model to run predictions on images. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") # load an official model + model = YOLO("yolo11n-obb.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Predict with the model - results = model("https://ultralytics.com/images/bus.jpg") # predict on an image + results = model("https://ultralytics.com/images/boats.jpg") # predict on an image ``` === "CLI" ```bash - yolo obb predict model=yolov8n-obb.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model - yolo obb predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model + yolo obb predict model=yolo11n-obb.pt source='https://ultralytics.com/images/boats.jpg' # predict with official model + yolo obb predict model=path/to/best.pt source='https://ultralytics.com/images/boats.jpg' # predict with custom model ```

@@ -165,14 +159,14 @@ Use a trained YOLOv8n-obb model to run predictions on images. allowfullscreen>
- Watch: How to Detect and Track Storage Tanks using Ultralytics YOLOv8-OBB | Oriented Bounding Boxes | DOTA + Watch: How to Detect and Track Storage Tanks using Ultralytics YOLO-OBB | Oriented Bounding Boxes | DOTA

See full `predict` mode details in the [Predict](../modes/predict.md) page. ## Export -Export a YOLOv8n-obb model to a different format like ONNX, CoreML, etc. +Export a YOLO11n-obb model to a different format like ONNX, CoreML, etc. !!! example @@ -182,7 +176,7 @@ Export a YOLOv8n-obb model to a different format like ONNX, CoreML, etc. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") # load an official model + model = YOLO("yolo11n-obb.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -192,11 +186,11 @@ Export a YOLOv8n-obb model to a different format like ONNX, CoreML, etc. === "CLI" ```bash - yolo export model=yolov8n-obb.pt format=onnx # export official model + yolo export model=yolo11n-obb.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8-obb export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-obb.onnx`. Usage examples are shown for your model after export completes. +Available YOLO11-obb export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolo11n-obb.onnx`. Usage examples are shown for your model after export completes. {% include "macros/export-table.md" %} @@ -208,9 +202,9 @@ See full `export` details in the [Export](../modes/export.md) page. Oriented Bounding Boxes (OBB) include an additional angle to enhance object localization accuracy in images. Unlike regular bounding boxes, which are axis-aligned rectangles, OBBs can rotate to fit the orientation of the object better. This is particularly useful for applications requiring precise object placement, such as aerial or satellite imagery ([Dataset Guide](../datasets/obb/index.md)). -### How do I train a YOLOv8n-obb model using a custom dataset? +### How do I train a YOLO11n-obb model using a custom dataset? -To train a YOLOv8n-obb model with a custom dataset, follow the example below using Python or CLI: +To train a YOLO11n-obb model with a custom dataset, follow the example below using Python or CLI: !!! example @@ -220,7 +214,7 @@ To train a YOLOv8n-obb model with a custom dataset, follow the example below usi from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n-obb.pt") + model = YOLO("yolo11n-obb.pt") # Train the model results = model.train(data="path/to/custom_dataset.yaml", epochs=100, imgsz=640) @@ -229,18 +223,18 @@ To train a YOLOv8n-obb model with a custom dataset, follow the example below usi === "CLI" ```bash - yolo obb train data=path/to/custom_dataset.yaml model=yolov8n-obb.pt epochs=100 imgsz=640 + yolo obb train data=path/to/custom_dataset.yaml model=yolo11n-obb.pt epochs=100 imgsz=640 ``` For more training arguments, check the [Configuration](../usage/cfg.md) section. -### What datasets can I use for training YOLOv8-OBB models? +### What datasets can I use for training YOLO11-OBB models? -YOLOv8-OBB models are pretrained on datasets like [DOTAv1](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/DOTAv1.yaml) but you can use any dataset formatted for OBB. Detailed information on OBB dataset formats can be found in the [Dataset Guide](../datasets/obb/index.md). +YOLO11-OBB models are pretrained on datasets like [DOTAv1](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/DOTAv1.yaml) but you can use any dataset formatted for OBB. Detailed information on OBB dataset formats can be found in the [Dataset Guide](../datasets/obb/index.md). -### How can I export a YOLOv8-OBB model to ONNX format? +### How can I export a YOLO11-OBB model to ONNX format? -Exporting a YOLOv8-OBB model to ONNX format is straightforward using either Python or CLI: +Exporting a YOLO11-OBB model to ONNX format is straightforward using either Python or CLI: !!! example @@ -250,7 +244,7 @@ Exporting a YOLOv8-OBB model to ONNX format is straightforward using either Pyth from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") + model = YOLO("yolo11n-obb.pt") # Export the model model.export(format="onnx") @@ -259,14 +253,14 @@ Exporting a YOLOv8-OBB model to ONNX format is straightforward using either Pyth === "CLI" ```bash - yolo export model=yolov8n-obb.pt format=onnx + yolo export model=yolo11n-obb.pt format=onnx ``` For more export formats and details, refer to the [Export](../modes/export.md) page. -### How do I validate the accuracy of a YOLOv8n-obb model? +### How do I validate the accuracy of a YOLO11n-obb model? -To validate a YOLOv8n-obb model, you can use Python or CLI commands as shown below: +To validate a YOLO11n-obb model, you can use Python or CLI commands as shown below: !!! example @@ -276,7 +270,7 @@ To validate a YOLOv8n-obb model, you can use Python or CLI commands as shown bel from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-obb.pt") + model = YOLO("yolo11n-obb.pt") # Validate the model metrics = model.val(data="dota8.yaml") @@ -285,7 +279,7 @@ To validate a YOLOv8n-obb model, you can use Python or CLI commands as shown bel === "CLI" ```bash - yolo obb val model=yolov8n-obb.pt data=dota8.yaml + yolo obb val model=yolo11n-obb.pt data=dota8.yaml ``` See full validation details in the [Val](../modes/val.md) section. diff --git a/docs/en/tasks/pose.md b/docs/en/tasks/pose.md index ca0d5feca68..7efe7414fdc 100644 --- a/docs/en/tasks/pose.md +++ b/docs/en/tasks/pose.md @@ -1,8 +1,8 @@ --- comments: true -description: Discover how to use YOLOv8 for pose estimation tasks. Learn about model training, validation, prediction, and exporting in various formats. -keywords: pose estimation, YOLOv8, Ultralytics, keypoints, model training, image recognition, deep learning -model_name: yolov8n-pose +description: Discover how to use YOLO11 for pose estimation tasks. Learn about model training, validation, prediction, and exporting in various formats. +keywords: pose estimation, YOLO11, Ultralytics, keypoints, model training, image recognition, deep learning +model_name: yolo11n-pose --- # Pose Estimation @@ -13,34 +13,22 @@ Pose estimation is a task that involves identifying the location of specific poi The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific parts of an object in a scene, and their location in relation to each other. - - - - - -
- -
- Watch: Pose Estimation with Ultralytics YOLOv8. -
- -
- Watch: Pose Estimation with Ultralytics HUB. -
+

+
+ +
+ Watch: Ultralytics YOLO11 Pose Estimation Tutorial | Real-Time Object Tracking and Human Pose Detection +

!!! tip - YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt`. These models are trained on the [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) dataset and are suitable for a variety of pose estimation tasks. + YOLO11 _pose_ models use the `-pose` suffix, i.e. `yolo11n-pose.pt`. These models are trained on the [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) dataset and are suitable for a variety of pose estimation tasks. - In the default YOLOv8 pose model, there are 17 keypoints, each representing a different part of the human body. Here is the mapping of each index to its respective body joint: + In the default YOLO11 pose model, there are 17 keypoints, each representing a different part of the human body. Here is the mapping of each index to its respective body joint: 0: Nose 1: Left Eye @@ -60,27 +48,20 @@ The output of a pose estimation model is a set of points that represent the keyp 15: Left Ankle 16: Right Ankle -## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) -YOLOv8 pretrained Pose models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. +YOLO11 pretrained Pose models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. -| Model | size
(pixels) | mAPpose
50-95 | mAPpose
50 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| ---------------------------------------------------------------------------------------------------- | --------------------- | --------------------- | ------------------ | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-pose.pt) | 640 | 50.4 | 80.1 | 131.8 | 1.18 | 3.3 | 9.2 | -| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-pose.pt) | 640 | 60.0 | 86.2 | 233.2 | 1.42 | 11.6 | 30.2 | -| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 | -| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 | -| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 | -| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 | +{% include "macros/yolo-pose-perf.md" %} - **mAPval** values are for single-model single-scale on [COCO Keypoints val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val pose data=coco-pose.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val pose data=coco-pose.yaml batch=1 device=0|cpu` ## Train -Train a YOLOv8-pose model on the COCO128-pose dataset. +Train a YOLO11-pose model on the COCO8-pose dataset. !!! example @@ -90,9 +71,9 @@ Train a YOLOv8-pose model on the COCO128-pose dataset. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.yaml") # build a new model from YAML - model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) - model = YOLO("yolov8n-pose.yaml").load("yolov8n-pose.pt") # build from YAML and transfer weights + model = YOLO("yolo11n-pose.yaml") # build a new model from YAML + model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-pose.yaml").load("yolo11n-pose.pt") # build from YAML and transfer weights # Train the model results = model.train(data="coco8-pose.yaml", epochs=100, imgsz=640) @@ -102,13 +83,13 @@ Train a YOLOv8-pose model on the COCO128-pose dataset. ```bash # Build a new model from YAML and start training from scratch - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640 + yolo pose train data=coco8-pose.yaml model=yolo11n-pose.yaml epochs=100 imgsz=640 # Start training from a pretrained *.pt model - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco8-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640 # Build a new model from YAML, transfer pretrained weights to it and start training - yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640 + yolo pose train data=coco8-pose.yaml model=yolo11n-pose.yaml pretrained=yolo11n-pose.pt epochs=100 imgsz=640 ``` ### Dataset format @@ -117,7 +98,7 @@ YOLO pose dataset format can be found in detail in the [Dataset Guide](../datase ## Val -Validate trained YOLOv8n-pose model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO128-pose dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n-pose model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8-pose dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example @@ -127,7 +108,7 @@ Validate trained YOLOv8n-pose model [accuracy](https://www.ultralytics.com/gloss from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load an official model + model = YOLO("yolo11n-pose.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -141,13 +122,13 @@ Validate trained YOLOv8n-pose model [accuracy](https://www.ultralytics.com/gloss === "CLI" ```bash - yolo pose val model=yolov8n-pose.pt # val official model + yolo pose val model=yolo11n-pose.pt # val official model yolo pose val model=path/to/best.pt # val custom model ``` ## Predict -Use a trained YOLOv8n-pose model to run predictions on images. +Use a trained YOLO11n-pose model to run predictions on images. !!! example @@ -157,7 +138,7 @@ Use a trained YOLOv8n-pose model to run predictions on images. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load an official model + model = YOLO("yolo11n-pose.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Predict with the model @@ -167,7 +148,7 @@ Use a trained YOLOv8n-pose model to run predictions on images. === "CLI" ```bash - yolo pose predict model=yolov8n-pose.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo pose predict model=yolo11n-pose.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` @@ -175,7 +156,7 @@ See full `predict` mode details in the [Predict](../modes/predict.md) page. ## Export -Export a YOLOv8n Pose model to a different format like ONNX, CoreML, etc. +Export a YOLO11n Pose model to a different format like ONNX, CoreML, etc. !!! example @@ -185,7 +166,7 @@ Export a YOLOv8n Pose model to a different format like ONNX, CoreML, etc. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-pose.pt") # load an official model + model = YOLO("yolo11n-pose.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -195,11 +176,11 @@ Export a YOLOv8n Pose model to a different format like ONNX, CoreML, etc. === "CLI" ```bash - yolo export model=yolov8n-pose.pt format=onnx # export official model + yolo export model=yolo11n-pose.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8-pose export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes. +Available YOLO11-pose export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolo11n-pose.onnx`. Usage examples are shown for your model after export completes. {% include "macros/export-table.md" %} @@ -207,20 +188,20 @@ See full `export` details in the [Export](../modes/export.md) page. ## FAQ -### What is Pose Estimation with Ultralytics YOLOv8 and how does it work? +### What is Pose Estimation with Ultralytics YOLO11 and how does it work? -Pose estimation with Ultralytics YOLOv8 involves identifying specific points, known as keypoints, in an image. These keypoints typically represent joints or other important features of the object. The output includes the `[x, y]` coordinates and confidence scores for each point. YOLOv8-pose models are specifically designed for this task and use the `-pose` suffix, such as `yolov8n-pose.pt`. These models are pre-trained on datasets like [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) and can be used for various pose estimation tasks. For more information, visit the [Pose Estimation Page](#pose-estimation). +Pose estimation with Ultralytics YOLO11 involves identifying specific points, known as keypoints, in an image. These keypoints typically represent joints or other important features of the object. The output includes the `[x, y]` coordinates and confidence scores for each point. YOLO11-pose models are specifically designed for this task and use the `-pose` suffix, such as `yolo11n-pose.pt`. These models are pre-trained on datasets like [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml) and can be used for various pose estimation tasks. For more information, visit the [Pose Estimation Page](#pose-estimation). -### How can I train a YOLOv8-pose model on a custom dataset? +### How can I train a YOLO11-pose model on a custom dataset? -Training a YOLOv8-pose model on a custom dataset involves loading a model, either a new model defined by a YAML file or a pre-trained model. You can then start the training process using your specified dataset and parameters. +Training a YOLO11-pose model on a custom dataset involves loading a model, either a new model defined by a YAML file or a pre-trained model. You can then start the training process using your specified dataset and parameters. ```python from ultralytics import YOLO # Load a model -model = YOLO("yolov8n-pose.yaml") # build a new model from YAML -model = YOLO("yolov8n-pose.pt") # load a pretrained model (recommended for training) +model = YOLO("yolo11n-pose.yaml") # build a new model from YAML +model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training) # Train the model results = model.train(data="your-dataset.yaml", epochs=100, imgsz=640) @@ -228,15 +209,15 @@ results = model.train(data="your-dataset.yaml", epochs=100, imgsz=640) For comprehensive details on training, refer to the [Train Section](#train). -### How do I validate a trained YOLOv8-pose model? +### How do I validate a trained YOLO11-pose model? -Validation of a YOLOv8-pose model involves assessing its accuracy using the same dataset parameters retained during training. Here's an example: +Validation of a YOLO11-pose model involves assessing its accuracy using the same dataset parameters retained during training. Here's an example: ```python from ultralytics import YOLO # Load a model -model = YOLO("yolov8n-pose.pt") # load an official model +model = YOLO("yolo11n-pose.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -245,15 +226,15 @@ metrics = model.val() # no arguments needed, dataset and settings remembered For more information, visit the [Val Section](#val). -### Can I export a YOLOv8-pose model to other formats, and how? +### Can I export a YOLO11-pose model to other formats, and how? -Yes, you can export a YOLOv8-pose model to various formats like ONNX, CoreML, TensorRT, and more. This can be done using either Python or the Command Line Interface (CLI). +Yes, you can export a YOLO11-pose model to various formats like ONNX, CoreML, TensorRT, and more. This can be done using either Python or the Command Line Interface (CLI). ```python from ultralytics import YOLO # Load a model -model = YOLO("yolov8n-pose.pt") # load an official model +model = YOLO("yolo11n-pose.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -262,6 +243,6 @@ model.export(format="onnx") Refer to the [Export Section](#export) for more details. -### What are the available Ultralytics YOLOv8-pose models and their performance metrics? +### What are the available Ultralytics YOLO11-pose models and their performance metrics? -Ultralytics YOLOv8 offers various pretrained pose models such as YOLOv8n-pose, YOLOv8s-pose, YOLOv8m-pose, among others. These models differ in size, accuracy (mAP), and speed. For instance, the YOLOv8n-pose model achieves a mAPpose50-95 of 50.4 and an mAPpose50 of 80.1. For a complete list and performance details, visit the [Models Section](#models). +Ultralytics YOLO11 offers various pretrained pose models such as YOLO11n-pose, YOLO11s-pose, YOLO11m-pose, among others. These models differ in size, accuracy (mAP), and speed. For instance, the YOLO11n-pose model achieves a mAPpose50-95 of 50.4 and an mAPpose50 of 80.1. For a complete list and performance details, visit the [Models Section](#models). diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md index 73398d18140..33c19d9d3c6 100644 --- a/docs/en/tasks/segment.md +++ b/docs/en/tasks/segment.md @@ -1,8 +1,8 @@ --- comments: true -description: Master instance segmentation using YOLOv8. Learn how to detect, segment and outline objects in images with detailed guides and examples. -keywords: instance segmentation, YOLOv8, object detection, image segmentation, machine learning, deep learning, computer vision, COCO dataset, Ultralytics -model_name: yolov8n-seg +description: Master instance segmentation using YOLO11. Learn how to detect, segment and outline objects in images with detailed guides and examples. +keywords: instance segmentation, YOLO11, object detection, image segmentation, machine learning, deep learning, computer vision, COCO dataset, Ultralytics +model_name: yolo11n-seg --- # Instance Segmentation @@ -21,33 +21,27 @@ The output of an instance segmentation model is a set of masks or contours that allowfullscreen>
- Watch: Run Segmentation with Pre-Trained Ultralytics YOLOv8 Model in Python. + Watch: Run Segmentation with Pre-Trained Ultralytics YOLO Model in Python.

!!! tip - YOLOv8 Segment models use the `-seg` suffix, i.e. `yolov8n-seg.pt` and are pretrained on [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). + YOLO11 Segment models use the `-seg` suffix, i.e. `yolo11n-seg.pt` and are pretrained on [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml). -## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/v8) +## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models/11) -YOLOv8 pretrained Segment models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. +YOLO11 pretrained Segment models are shown here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/ImageNet.yaml) dataset. [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use. -| Model | size
(pixels) | mAPbox
50-95 | mAPmask
50-95 | Speed
CPU ONNX
(ms) | Speed
A100 TensorRT
(ms) | params
(M) | FLOPs
(B) | -| -------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- | -| [YOLOv8n-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-seg.pt) | 640 | 36.7 | 30.5 | 96.1 | 1.21 | 3.4 | 12.6 | -| [YOLOv8s-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-seg.pt) | 640 | 44.6 | 36.8 | 155.7 | 1.47 | 11.8 | 42.6 | -| [YOLOv8m-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt) | 640 | 49.9 | 40.8 | 317.0 | 2.18 | 27.3 | 110.2 | -| [YOLOv8l-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-seg.pt) | 640 | 52.3 | 42.6 | 572.4 | 2.79 | 46.0 | 220.5 | -| [YOLOv8x-seg](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-seg.pt) | 640 | 53.4 | 43.4 | 712.1 | 4.02 | 71.8 | 344.1 | +{% include "macros/yolo-seg-perf.md" %} - **mAPval** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset.
Reproduce by `yolo val segment data=coco.yaml device=0` -- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val segment data=coco8-seg.yaml batch=1 device=0|cpu` +- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance.
Reproduce by `yolo val segment data=coco.yaml batch=1 device=0|cpu` ## Train -Train YOLOv8n-seg on the COCO128-seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. +Train YOLO11n-seg on the COCO8-seg dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) at image size 640. For a full list of available arguments see the [Configuration](../usage/cfg.md) page. !!! example @@ -57,9 +51,9 @@ Train YOLOv8n-seg on the COCO128-seg dataset for 100 [epochs](https://www.ultral from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.yaml") # build a new model from YAML - model = YOLO("yolov8n-seg.pt") # load a pretrained model (recommended for training) - model = YOLO("yolov8n-seg.yaml").load("yolov8n.pt") # build from YAML and transfer weights + model = YOLO("yolo11n-seg.yaml") # build a new model from YAML + model = YOLO("yolo11n-seg.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n-seg.yaml").load("yolo11n.pt") # build from YAML and transfer weights # Train the model results = model.train(data="coco8-seg.yaml", epochs=100, imgsz=640) @@ -69,13 +63,13 @@ Train YOLOv8n-seg on the COCO128-seg dataset for 100 [epochs](https://www.ultral ```bash # Build a new model from YAML and start training from scratch - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=100 imgsz=640 + yolo segment train data=coco8-seg.yaml model=yolo11n-seg.yaml epochs=100 imgsz=640 # Start training from a pretrained *.pt model - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco8-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 # Build a new model from YAML, transfer pretrained weights to it and start training - yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml pretrained=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=coco8-seg.yaml model=yolo11n-seg.yaml pretrained=yolo11n-seg.pt epochs=100 imgsz=640 ``` ### Dataset format @@ -84,7 +78,7 @@ YOLO segmentation dataset format can be found in detail in the [Dataset Guide](. ## Val -Validate trained YOLOv8n-seg model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO128-seg dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n-seg model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8-seg dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example @@ -94,7 +88,7 @@ Validate trained YOLOv8n-seg model [accuracy](https://www.ultralytics.com/glossa from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load an official model + model = YOLO("yolo11n-seg.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Validate the model @@ -112,13 +106,13 @@ Validate trained YOLOv8n-seg model [accuracy](https://www.ultralytics.com/glossa === "CLI" ```bash - yolo segment val model=yolov8n-seg.pt # val official model + yolo segment val model=yolo11n-seg.pt # val official model yolo segment val model=path/to/best.pt # val custom model ``` ## Predict -Use a trained YOLOv8n-seg model to run predictions on images. +Use a trained YOLO11n-seg model to run predictions on images. !!! example @@ -128,7 +122,7 @@ Use a trained YOLOv8n-seg model to run predictions on images. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load an official model + model = YOLO("yolo11n-seg.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom model # Predict with the model @@ -138,7 +132,7 @@ Use a trained YOLOv8n-seg model to run predictions on images. === "CLI" ```bash - yolo segment predict model=yolov8n-seg.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model + yolo segment predict model=yolo11n-seg.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model ``` @@ -146,7 +140,7 @@ See full `predict` mode details in the [Predict](../modes/predict.md) page. ## Export -Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. +Export a YOLO11n-seg model to a different format like ONNX, CoreML, etc. !!! example @@ -156,7 +150,7 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. from ultralytics import YOLO # Load a model - model = YOLO("yolov8n-seg.pt") # load an official model + model = YOLO("yolo11n-seg.pt") # load an official model model = YOLO("path/to/best.pt") # load a custom trained model # Export the model @@ -166,11 +160,11 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc. === "CLI" ```bash - yolo export model=yolov8n-seg.pt format=onnx # export official model + yolo export model=yolo11n-seg.pt format=onnx # export official model yolo export model=path/to/best.pt format=onnx # export custom trained model ``` -Available YOLOv8-seg export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes. +Available YOLO11-seg export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolo11n-seg.onnx`. Usage examples are shown for your model after export completes. {% include "macros/export-table.md" %} @@ -178,9 +172,9 @@ See full `export` details in the [Export](../modes/export.md) page. ## FAQ -### How do I train a YOLOv8 segmentation model on a custom dataset? +### How do I train a YOLO11 segmentation model on a custom dataset? -To train a YOLOv8 segmentation model on a custom dataset, you first need to prepare your dataset in the YOLO segmentation format. You can use tools like [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) to convert datasets from other formats. Once your dataset is ready, you can train the model using Python or CLI commands: +To train a YOLO11 segmentation model on a custom dataset, you first need to prepare your dataset in the YOLO segmentation format. You can use tools like [JSON2YOLO](https://github.com/ultralytics/JSON2YOLO) to convert datasets from other formats. Once your dataset is ready, you can train the model using Python or CLI commands: !!! example @@ -189,8 +183,8 @@ To train a YOLOv8 segmentation model on a custom dataset, you first need to prep ```python from ultralytics import YOLO - # Load a pretrained YOLOv8 segment model - model = YOLO("yolov8n-seg.pt") + # Load a pretrained YOLO11 segment model + model = YOLO("yolo11n-seg.pt") # Train the model results = model.train(data="path/to/your_dataset.yaml", epochs=100, imgsz=640) @@ -199,22 +193,22 @@ To train a YOLOv8 segmentation model on a custom dataset, you first need to prep === "CLI" ```bash - yolo segment train data=path/to/your_dataset.yaml model=yolov8n-seg.pt epochs=100 imgsz=640 + yolo segment train data=path/to/your_dataset.yaml model=yolo11n-seg.pt epochs=100 imgsz=640 ``` Check the [Configuration](../usage/cfg.md) page for more available arguments. -### What is the difference between [object detection](https://www.ultralytics.com/glossary/object-detection) and instance segmentation in YOLOv8? +### What is the difference between [object detection](https://www.ultralytics.com/glossary/object-detection) and instance segmentation in YOLO11? -Object detection identifies and localizes objects within an image by drawing bounding boxes around them, whereas instance segmentation not only identifies the bounding boxes but also delineates the exact shape of each object. YOLOv8 instance segmentation models provide masks or contours that outline each detected object, which is particularly useful for tasks where knowing the precise shape of objects is important, such as medical imaging or autonomous driving. +Object detection identifies and localizes objects within an image by drawing bounding boxes around them, whereas instance segmentation not only identifies the bounding boxes but also delineates the exact shape of each object. YOLO11 instance segmentation models provide masks or contours that outline each detected object, which is particularly useful for tasks where knowing the precise shape of objects is important, such as medical imaging or autonomous driving. -### Why use YOLOv8 for instance segmentation? +### Why use YOLO11 for instance segmentation? -Ultralytics YOLOv8 is a state-of-the-art model recognized for its high accuracy and real-time performance, making it ideal for instance segmentation tasks. YOLOv8 Segment models come pretrained on the [COCO dataset](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), ensuring robust performance across a variety of objects. Additionally, YOLOv8 supports training, validation, prediction, and export functionalities with seamless integration, making it highly versatile for both research and industry applications. +Ultralytics YOLO11 is a state-of-the-art model recognized for its high accuracy and real-time performance, making it ideal for instance segmentation tasks. YOLO11 Segment models come pretrained on the [COCO dataset](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml), ensuring robust performance across a variety of objects. Additionally, YOLO supports training, validation, prediction, and export functionalities with seamless integration, making it highly versatile for both research and industry applications. -### How do I load and validate a pretrained YOLOv8 segmentation model? +### How do I load and validate a pretrained YOLO segmentation model? -Loading and validating a pretrained YOLOv8 segmentation model is straightforward. Here's how you can do it using both Python and CLI: +Loading and validating a pretrained YOLO segmentation model is straightforward. Here's how you can do it using both Python and CLI: !!! example @@ -224,7 +218,7 @@ Loading and validating a pretrained YOLOv8 segmentation model is straightforward from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n-seg.pt") + model = YOLO("yolo11n-seg.pt") # Validate the model metrics = model.val() @@ -235,14 +229,14 @@ Loading and validating a pretrained YOLOv8 segmentation model is straightforward === "CLI" ```bash - yolo segment val model=yolov8n-seg.pt + yolo segment val model=yolo11n-seg.pt ``` These steps will provide you with validation metrics like [Mean Average Precision](https://www.ultralytics.com/glossary/mean-average-precision-map) (mAP), crucial for assessing model performance. -### How can I export a YOLOv8 segmentation model to ONNX format? +### How can I export a YOLO segmentation model to ONNX format? -Exporting a YOLOv8 segmentation model to ONNX format is simple and can be done using Python or CLI commands: +Exporting a YOLO segmentation model to ONNX format is simple and can be done using Python or CLI commands: !!! example @@ -252,7 +246,7 @@ Exporting a YOLOv8 segmentation model to ONNX format is simple and can be done u from ultralytics import YOLO # Load a pretrained model - model = YOLO("yolov8n-seg.pt") + model = YOLO("yolo11n-seg.pt") # Export the model to ONNX format model.export(format="onnx") @@ -261,7 +255,7 @@ Exporting a YOLOv8 segmentation model to ONNX format is simple and can be done u === "CLI" ```bash - yolo export model=yolov8n-seg.pt format=onnx + yolo export model=yolo11n-seg.pt format=onnx ``` For more details on exporting to various formats, refer to the [Export](../modes/export.md) page. diff --git a/docs/en/usage/callbacks.md b/docs/en/usage/callbacks.md index 19a92699878..12056395bcf 100644 --- a/docs/en/usage/callbacks.md +++ b/docs/en/usage/callbacks.md @@ -1,7 +1,7 @@ --- comments: true description: Explore Ultralytics callbacks for training, validation, exporting, and prediction. Learn how to use and customize them for your ML models. -keywords: Ultralytics, callbacks, training, validation, export, prediction, ML models, YOLOv8, Python, machine learning +keywords: Ultralytics, callbacks, training, validation, export, prediction, ML models, YOLO11, Python, machine learning --- ## Callbacks @@ -16,7 +16,7 @@ Ultralytics framework supports callbacks as entry points in strategic stages of allowfullscreen>
- Watch: Mastering Ultralytics YOLOv8: Callbacks + Watch: Mastering Ultralytics YOLO: Callbacks

## Examples @@ -41,7 +41,7 @@ def on_predict_batch_end(predictor): # Create a YOLO model instance -model = YOLO(f"yolov8n.pt") +model = YOLO("yolo11n.pt") # Add the custom callback to the model model.add_callback("on_predict_batch_end", on_predict_batch_end) @@ -119,7 +119,7 @@ def on_predict_batch_end(predictor): predictor.results = zip(predictor.results, image) -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") model.add_callback("on_predict_batch_end", on_predict_batch_end) for result, frame in model.predict(): pass @@ -129,20 +129,26 @@ for result, frame in model.predict(): To customize your Ultralytics training routine using callbacks, you can inject your logic at specific stages of the training process. Ultralytics YOLO provides a variety of training callbacks such as `on_train_start`, `on_train_end`, and `on_train_batch_end`. These allow you to add custom metrics, processing, or logging. -Here's an example of how to log additional metrics at the end of each training epoch: +Here's an example of how to freeze BatchNorm statistics when freezing layers with callbacks: ```python from ultralytics import YOLO -def on_train_epoch_end(trainer): - """Custom logic for additional metrics logging at the end of each training epoch.""" - additional_metric = compute_additional_metric(trainer) - trainer.log({"additional_metric": additional_metric}) +# Add a callback to put the frozen layers in eval mode to prevent BN values from changing +def put_in_eval_mode(trainer): + n_layers = trainer.args.freeze + if not isinstance(n_layers, int): + return + for i, (name, module) in enumerate(trainer.model.named_modules()): + if name.endswith("bn") and int(name.split(".")[1]) < n_layers: + module.eval() + module.track_running_stats = False -model = YOLO("yolov8n.pt") -model.add_callback("on_train_epoch_end", on_train_epoch_end) + +model = YOLO("yolo11n.pt") +model.add_callback("on_train_epoch_start", put_in_eval_mode) model.train(data="coco.yaml", epochs=10) ``` @@ -152,20 +158,23 @@ Refer to the [Training Guide](../modes/train.md) for more details on how to effe Using **callbacks during validation** in Ultralytics YOLO can enhance model evaluation by allowing custom processing, logging, or metrics calculation. Callbacks such as `on_val_start`, `on_val_batch_end`, and `on_val_end` provide entry points to inject custom logic, ensuring detailed and comprehensive validation processes. -For instance, you might want to log additional validation metrics or save intermediate results for further analysis. Here's an example of how to log custom metrics at the end of validation: +For instance, you might want to plot all the validation batches, instead of just the first 3. Here's how you can do that: ```python +import inspect + from ultralytics import YOLO -def on_val_end(validator): - """Log custom metrics at end of validation.""" - custom_metric = compute_custom_metric(validator) - validator.log({"custom_metric": custom_metric}) +def plot_samples(validator): + frame = inspect.currentframe().f_back.f_back + v = frame.f_locals + validator.plot_val_samples(v["batch"], v["batch_i"]) + validator.plot_predictions(v["batch"], v["preds"], v["batch_i"]) -model = YOLO("yolov8n.pt") -model.add_callback("on_val_end", on_val_end) +model = YOLO("yolo11n.pt") +model.add_callback("on_val_batch_end", plot_samples) model.val(data="coco.yaml") ``` @@ -175,21 +184,29 @@ Check out the [Validation Guide](../modes/val.md) for further insights on incorp To attach a custom callback for the **prediction mode** in Ultralytics YOLO, you define a callback function and register it with the prediction process. Common prediction callbacks include `on_predict_start`, `on_predict_batch_end`, and `on_predict_end`. These allow for modification of prediction outputs and integration of additional functionalities like data logging or result transformation. -Here is an example where a custom callback is used to log predictions: +Here is an example where a custom callback is used to save predictions based on whether an object of a particular class is present: ```python from ultralytics import YOLO +model = YOLO("yolo11n.pt") + +class_id = 2 -def on_predict_end(predictor): - """Log predictions at the end of prediction.""" - for result in predictor.results: - log_prediction(result) +def save_on_object(predictor): + r = predictor.results[0] + if class_id in r.boxes.cls: + predictor.args.save = True + else: + predictor.args.save = False -model = YOLO("yolov8n.pt") -model.add_callback("on_predict_end", on_predict_end) -results = model.predict(source="image.jpg") + +model.add_callback("on_predict_postprocess_end", save_on_object) +results = model("pedestrians.mp4", stream=True, save=True) + +for results in results: + pass ``` For more comprehensive usage, refer to the [Prediction Guide](../modes/predict.md) which includes detailed instructions and additional customization options. @@ -215,7 +232,7 @@ def on_predict_batch_end(predictor): predictor.results = zip(predictor.results, image) -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") model.add_callback("on_predict_batch_end", on_predict_batch_end) for result, frame in model.predict(): pass diff --git a/docs/en/usage/cfg.md b/docs/en/usage/cfg.md index aecc5fc6465..c51863c5e01 100644 --- a/docs/en/usage/cfg.md +++ b/docs/en/usage/cfg.md @@ -14,7 +14,7 @@ YOLO settings and hyperparameters play a critical role in the model's performanc allowfullscreen>
- Watch: Mastering Ultralytics YOLOv8: Configuration + Watch: Mastering Ultralytics YOLO: Configuration

Ultralytics commands use the following syntax: @@ -32,8 +32,8 @@ Ultralytics commands use the following syntax: ```python from ultralytics import YOLO - # Load a YOLOv8 model from a pre-trained weights file - model = YOLO("yolov8n.pt") + # Load a YOLO11 model from a pre-trained weights file + model = YOLO("yolo11n.pt") # Run MODE mode using the custom arguments ARGS (guess TASK) model.MODE(ARGS) @@ -41,8 +41,8 @@ Ultralytics commands use the following syntax: Where: -- `TASK` (optional) is one of ([detect](../tasks/detect.md), [segment](../tasks/segment.md), [classify](../tasks/classify.md), [pose](../tasks/pose.md)) -- `MODE` (required) is one of ([train](../modes/train.md), [val](../modes/val.md), [predict](../modes/predict.md), [export](../modes/export.md), [track](../modes/track.md)) +- `TASK` (optional) is one of ([detect](../tasks/detect.md), [segment](../tasks/segment.md), [classify](../tasks/classify.md), [pose](../tasks/pose.md), [obb](../tasks/obb.md)) +- `MODE` (required) is one of ([train](../modes/train.md), [val](../modes/val.md), [predict](../modes/predict.md), [export](../modes/export.md), [track](../modes/track.md), [benchmark](../modes/benchmark.md)) - `ARGS` (optional) are `arg=value` pairs like `imgsz=640` that override defaults. Default `ARG` values are defined on this page from the `cfg/defaults.yaml` [file](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml). @@ -59,7 +59,7 @@ YOLO models can be used for a variety of tasks, including detection, segmentatio | Argument | Default | Description | | -------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `task` | `'detect'` | Specifies the YOLO task to be executed. Options include `detect` for [object detection](https://www.ultralytics.com/glossary/object-detection), `segment` for segmentation, `classify` for classification, `pose` for pose estimation and `OBB` for oriented bounding boxes. Each task is tailored to specific types of output and problems within image and video analysis. | +| `task` | `'detect'` | Specifies the YOLO task to be executed. Options include `detect` for [object detection](https://www.ultralytics.com/glossary/object-detection), `segment` for segmentation, `classify` for classification, `pose` for pose estimation and `obb` for oriented bounding boxes. Each task is tailored to specific types of output and problems within image and video analysis. | [Tasks Guide](../tasks/index.md){ .md-button } @@ -67,12 +67,12 @@ YOLO models can be used for a variety of tasks, including detection, segmentatio YOLO models can be used in different modes depending on the specific problem you are trying to solve. These modes include: -- **Train**: For training a YOLOv8 model on a custom dataset. -- **Val**: For validating a YOLOv8 model after it has been trained. -- **Predict**: For making predictions using a trained YOLOv8 model on new images or videos. -- **Export**: For exporting a YOLOv8 model to a format that can be used for deployment. -- **Track**: For tracking objects in real-time using a YOLOv8 model. -- **Benchmark**: For benchmarking YOLOv8 exports (ONNX, TensorRT, etc.) speed and accuracy. +- **Train**: For training a YOLO11 model on a custom dataset. +- **Val**: For validating a YOLO11 model after it has been trained. +- **Predict**: For making predictions using a trained YOLO11 model on new images or videos. +- **Export**: For exporting a YOLO11 model to a format that can be used for deployment. +- **Track**: For tracking objects in real-time using a YOLO11 model. +- **Benchmark**: For benchmarking YOLO11 exports (ONNX, TensorRT, etc.) speed and accuracy. | Argument | Default | Description | | -------- | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -130,6 +130,14 @@ It is crucial to thoughtfully configure these settings to ensure the exported mo [Export Guide](../modes/export.md){ .md-button } +## Solutions Settings + +The configuration settings for Ultralytics Solutions offer a flexible way to customize the model for various tasks like object counting, heatmap creation, workout tracking, data analysis, zone tracking, queue management, and region-based counting. These options make it easy to adjust the setup for accurate and useful results tailored to specific needs. + +{% include "macros/solutions-args.md" %} + +[Solutions Guide](../solutions/index.md){ .md-button } + ## Augmentation Settings Augmentation techniques are essential for improving the robustness and performance of YOLO models by introducing variability into the [training data](https://www.ultralytics.com/glossary/training-data), helping the model generalize better to unseen data. The following table outlines the purpose and effect of each augmentation argument: @@ -186,7 +194,8 @@ Default inference settings include: - **IoU Threshold (`iou=0.7`)**: For Non-Maximum Suppression (NMS). - **Image Size (`imgsz=640`)**: Resizes input images prior to inference. - **Device (`device=None`)**: Selects CPU or GPU for inference. - For a comprehensive overview, visit the [Predict Settings](#predict-settings) section and the [Predict Guide](../modes/predict.md). + +For a comprehensive overview, visit the [Predict Settings](#predict-settings) section and the [Predict Guide](../modes/predict.md). ### Why should I use mixed precision training with YOLO models? diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md index d1d7c8de485..a2276f2e008 100644 --- a/docs/en/usage/cli.md +++ b/docs/en/usage/cli.md @@ -1,7 +1,7 @@ --- comments: true -description: Explore the YOLOv8 command line interface (CLI) for easy execution of detection tasks without needing a Python environment. -keywords: YOLOv8 CLI, command line interface, YOLOv8 commands, detection tasks, Ultralytics, model training, model prediction +description: Explore the YOLO11 command line interface (CLI) for easy execution of detection tasks without needing a Python environment. +keywords: YOLO11 CLI, command line interface, YOLO11 commands, detection tasks, Ultralytics, model training, model prediction --- # Command Line Interface Usage @@ -16,7 +16,7 @@ The YOLO command line interface (CLI) allows for simple single-line commands wit allowfullscreen>
- Watch: Mastering Ultralytics YOLOv8: CLI + Watch: Mastering Ultralytics YOLO: CLI

!!! example @@ -37,28 +37,28 @@ The YOLO command line interface (CLI) allows for simple single-line commands wit Train a detection model for 10 [epochs](https://www.ultralytics.com/glossary/epoch) with an initial learning_rate of 0.01 ```bash - yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 + yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 ``` === "Predict" Predict a YouTube video using a pretrained segmentation model at image size 320: ```bash - yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + yolo predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 ``` === "Val" Val a pretrained detection model at batch-size 1 and image size 640: ```bash - yolo val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640 + yolo val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640 ``` === "Export" - Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) + Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required) ```bash - yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128 ``` === "Special" @@ -75,7 +75,7 @@ The YOLO command line interface (CLI) allows for simple single-line commands wit Where: -- `TASK` (optional) is one of `[detect, segment, classify, pose, obb]`. If it is not passed explicitly YOLOv8 will try to guess the `TASK` from the model type. +- `TASK` (optional) is one of `[detect, segment, classify, pose, obb]`. If it is not passed explicitly YOLO11 will try to guess the `TASK` from the model type. - `MODE` (required) is one of `[train, val, predict, export, track, benchmark]` - `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. For a full list of available `ARGS` see the [Configuration](cfg.md) page and `defaults.yaml` @@ -83,21 +83,21 @@ Where: Arguments must be passed as `arg=val` pairs, split by an equals `=` sign and delimited by spaces ` ` between pairs. Do not use `--` argument prefixes or commas `,` between arguments. - - `yolo predict model=yolov8n.pt imgsz=640 conf=0.25`   โœ… - - `yolo predict model yolov8n.pt imgsz 640 conf 0.25`   โŒ - - `yolo predict --model yolov8n.pt --imgsz 640 --conf 0.25`   โŒ + - `yolo predict model=yolo11n.pt imgsz=640 conf=0.25`   โœ… + - `yolo predict model yolo11n.pt imgsz 640 conf 0.25`   โŒ + - `yolo predict --model yolo11n.pt --imgsz 640 --conf 0.25`   โŒ ## Train -Train YOLOv8n on the COCO8 dataset for 100 epochs at image size 640. For a full list of available arguments see the [Configuration](cfg.md) page. +Train YOLO11n on the COCO8 dataset for 100 epochs at image size 640. For a full list of available arguments see the [Configuration](cfg.md) page. !!! example === "Train" - Start training YOLOv8n on COCO8 for 100 epochs at image-size 640. + Start training YOLO11n on COCO8 for 100 epochs at image-size 640. ```bash - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=100 imgsz=640 ``` === "Resume" @@ -109,15 +109,15 @@ Train YOLOv8n on the COCO8 dataset for 100 epochs at image size 640. For a full ## Val -Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. +Validate trained YOLO11n model [accuracy](https://www.ultralytics.com/glossary/accuracy) on the COCO8 dataset. No arguments are needed as the `model` retains its training `data` and arguments as model attributes. !!! example === "Official" - Validate an official YOLOv8n model. + Validate an official YOLO11n model. ```bash - yolo detect val model=yolov8n.pt + yolo detect val model=yolo11n.pt ``` === "Custom" @@ -129,15 +129,15 @@ Validate trained YOLOv8n model [accuracy](https://www.ultralytics.com/glossary/a ## Predict -Use a trained YOLOv8n model to run predictions on images. +Use a trained YOLO11n model to run predictions on images. !!! example === "Official" - Predict with an official YOLOv8n model. + Predict with an official YOLO11n model. ```bash - yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' + yolo detect predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg' ``` === "Custom" @@ -149,15 +149,15 @@ Use a trained YOLOv8n model to run predictions on images. ## Export -Export a YOLOv8n model to a different format like ONNX, CoreML, etc. +Export a YOLO11n model to a different format like ONNX, CoreML, etc. !!! example === "Official" - Export an official YOLOv8n model to ONNX format. + Export an official YOLO11n model to ONNX format. ```bash - yolo export model=yolov8n.pt format=onnx + yolo export model=yolo11n.pt format=onnx ``` === "Custom" @@ -167,7 +167,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc. yolo export model=path/to/best.pt format=onnx ``` -Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. +Available YOLO11 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. {% include "macros/export-table.md" %} @@ -183,21 +183,21 @@ Default arguments can be overridden by simply passing them as arguments in the C Train a detection model for `10 epochs` with `learning_rate` of `0.01` ```bash - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 ``` === "Predict" Predict a YouTube video using a pretrained segmentation model at image size 320: ```bash - yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + yolo segment predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 ``` === "Val" Validate a pretrained detection model at batch-size 1 and image size 640: ```bash - yolo detect val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640 + yolo detect val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640 ``` ## Overriding default config file @@ -219,19 +219,19 @@ This will create `default_copy.yaml`, which you can then pass as `cfg=default_co ## FAQ -### How do I use the Ultralytics YOLOv8 command line interface (CLI) for model training? +### How do I use the Ultralytics YOLO11 command line interface (CLI) for model training? -To train a YOLOv8 model using the CLI, you can execute a simple one-line command in the terminal. For example, to train a detection model for 10 epochs with a [learning rate](https://www.ultralytics.com/glossary/learning-rate) of 0.01, you would run: +To train a YOLO11 model using the CLI, you can execute a simple one-line command in the terminal. For example, to train a detection model for 10 epochs with a [learning rate](https://www.ultralytics.com/glossary/learning-rate) of 0.01, you would run: ```bash -yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 +yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 ``` This command uses the `train` mode with specific arguments. Refer to the full list of available arguments in the [Configuration Guide](cfg.md). -### What tasks can I perform with the Ultralytics YOLOv8 CLI? +### What tasks can I perform with the Ultralytics YOLO11 CLI? -The Ultralytics YOLOv8 CLI supports a variety of tasks including detection, segmentation, classification, validation, prediction, export, and tracking. For instance: +The Ultralytics YOLO11 CLI supports a variety of tasks including detection, segmentation, classification, validation, prediction, export, and tracking. For instance: - **Train a Model**: Run `yolo train data= model= epochs=`. - **Run Predictions**: Use `yolo predict model= source= imgsz=`. @@ -239,32 +239,32 @@ The Ultralytics YOLOv8 CLI supports a variety of tasks including detection, segm Each task can be customized with various arguments. For detailed syntax and examples, see the respective sections like [Train](#train), [Predict](#predict), and [Export](#export). -### How can I validate the accuracy of a trained YOLOv8 model using the CLI? +### How can I validate the accuracy of a trained YOLO11 model using the CLI? -To validate a YOLOv8 model's accuracy, use the `val` mode. For example, to validate a pretrained detection model with a [batch size](https://www.ultralytics.com/glossary/batch-size) of 1 and image size of 640, run: +To validate a YOLO11 model's accuracy, use the `val` mode. For example, to validate a pretrained detection model with a [batch size](https://www.ultralytics.com/glossary/batch-size) of 1 and image size of 640, run: ```bash -yolo val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640 +yolo val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640 ``` This command evaluates the model on the specified dataset and provides performance metrics. For more details, refer to the [Val](#val) section. -### What formats can I export my YOLOv8 models to using the CLI? +### What formats can I export my YOLO11 models to using the CLI? -YOLOv8 models can be exported to various formats such as ONNX, CoreML, TensorRT, and more. For instance, to export a model to ONNX format, run: +YOLO11 models can be exported to various formats such as ONNX, CoreML, TensorRT, and more. For instance, to export a model to ONNX format, run: ```bash -yolo export model=yolov8n.pt format=onnx +yolo export model=yolo11n.pt format=onnx ``` For complete details, visit the [Export](../modes/export.md) page. -### How do I customize YOLOv8 CLI commands to override default arguments? +### How do I customize YOLO11 CLI commands to override default arguments? -To override default arguments in YOLOv8 CLI commands, pass them as `arg=value` pairs. For example, to train a model with custom arguments, use: +To override default arguments in YOLO11 CLI commands, pass them as `arg=value` pairs. For example, to train a model with custom arguments, use: ```bash -yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 +yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 ``` For a full list of available arguments and their descriptions, refer to the [Configuration Guide](cfg.md). Ensure arguments are formatted correctly, as shown in the [Overriding default arguments](#overriding-default-arguments) section. diff --git a/docs/en/usage/engine.md b/docs/en/usage/engine.md index dc44047ff70..d5d807c9952 100644 --- a/docs/en/usage/engine.md +++ b/docs/en/usage/engine.md @@ -1,7 +1,7 @@ --- comments: true -description: Learn to customize the YOLOv8 Trainer for specific tasks. Step-by-step instructions with Python examples for maximum model performance. -keywords: Ultralytics, YOLOv8, Trainer Customization, Python, Machine Learning, AI, Model Training, DetectionTrainer, Custom Models +description: Learn to customize the YOLO11 Trainer for specific tasks. Step-by-step instructions with Python examples for maximum model performance. +keywords: Ultralytics, YOLO11, Trainer Customization, Python, Machine Learning, AI, Model Training, DetectionTrainer, Custom Models --- Both the Ultralytics YOLO command-line and Python interfaces are simply a high-level abstraction on the base engine executors. Let's take a look at the Trainer engine. @@ -14,7 +14,7 @@ Both the Ultralytics YOLO command-line and Python interfaces are simply a high-l allowfullscreen>
- Watch: Mastering Ultralytics YOLOv8: Advanced Customization + Watch: Mastering Ultralytics YOLO: Advanced Customization

## BaseTrainer @@ -26,7 +26,7 @@ BaseTrainer contains the generic boilerplate training routine. It can be customi ## DetectionTrainer -Here's how you can use the YOLOv8 `DetectionTrainer` and customize it. +Here's how you can use the YOLO11 `DetectionTrainer` and customize it. ```python from ultralytics.models.yolo.detect import DetectionTrainer @@ -96,9 +96,9 @@ There are other components that can be customized similarly like `Validators` an ## FAQ -### How do I customize the Ultralytics YOLOv8 DetectionTrainer for specific tasks? +### How do I customize the Ultralytics YOLO11 DetectionTrainer for specific tasks? -To customize the Ultralytics YOLOv8 `DetectionTrainer` for a specific task, you can override its methods to adapt to your custom model and dataloader. Start by inheriting from `DetectionTrainer` and then redefine methods like `get_model` to implement your custom functionalities. Here's an example: +To customize the Ultralytics YOLO11 `DetectionTrainer` for a specific task, you can override its methods to adapt to your custom model and dataloader. Start by inheriting from `DetectionTrainer` and then redefine methods like `get_model` to implement your custom functionalities. Here's an example: ```python from ultralytics.models.yolo.detect import DetectionTrainer @@ -117,18 +117,18 @@ trained_model = trainer.best # get best model For further customization like changing the `loss function` or adding a `callback`, you can reference our [Callbacks Guide](../usage/callbacks.md). -### What are the key components of the BaseTrainer in Ultralytics YOLOv8? +### What are the key components of the BaseTrainer in Ultralytics YOLO11? -The `BaseTrainer` in Ultralytics YOLOv8 serves as the foundation for training routines and can be customized for various tasks by overriding its generic methods. Key components include: +The `BaseTrainer` in Ultralytics YOLO11 serves as the foundation for training routines and can be customized for various tasks by overriding its generic methods. Key components include: - `get_model(cfg, weights)` to build the model to be trained. - `get_dataloader()` to build the dataloader. For more details on the customization and source code, see the [`BaseTrainer` Reference](../reference/engine/trainer.md). -### How can I add a callback to the Ultralytics YOLOv8 DetectionTrainer? +### How can I add a callback to the Ultralytics YOLO11 DetectionTrainer? -You can add callbacks to monitor and modify the training process in Ultralytics YOLOv8 `DetectionTrainer`. For instance, here's how you can add a callback to log model weights after every training [epoch](https://www.ultralytics.com/glossary/epoch): +You can add callbacks to monitor and modify the training process in Ultralytics YOLO11 `DetectionTrainer`. For instance, here's how you can add a callback to log model weights after every training [epoch](https://www.ultralytics.com/glossary/epoch): ```python from ultralytics.models.yolo.detect import DetectionTrainer @@ -148,19 +148,19 @@ trainer.train() For further details on callback events and entry points, refer to our [Callbacks Guide](../usage/callbacks.md). -### Why should I use Ultralytics YOLOv8 for model training? +### Why should I use Ultralytics YOLO11 for model training? -Ultralytics YOLOv8 offers a high-level abstraction on powerful engine executors, making it ideal for rapid development and customization. Key benefits include: +Ultralytics YOLO11 offers a high-level abstraction on powerful engine executors, making it ideal for rapid development and customization. Key benefits include: - **Ease of Use**: Both command-line and Python interfaces simplify complex tasks. - **Performance**: Optimized for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) and various vision AI applications. - **Customization**: Easily extendable for custom models, [loss functions](https://www.ultralytics.com/glossary/loss-function), and dataloaders. -Learn more about YOLOv8's capabilities by visiting [Ultralytics YOLO](https://www.ultralytics.com/yolo). +Learn more about YOLO11's capabilities by visiting [Ultralytics YOLO](https://www.ultralytics.com/yolo). -### Can I use the Ultralytics YOLOv8 DetectionTrainer for non-standard models? +### Can I use the Ultralytics YOLO11 DetectionTrainer for non-standard models? -Yes, Ultralytics YOLOv8 `DetectionTrainer` is highly flexible and can be customized for non-standard models. By inheriting from `DetectionTrainer`, you can overload different methods to support your specific model's needs. Here's a simple example: +Yes, Ultralytics YOLO11 `DetectionTrainer` is highly flexible and can be customized for non-standard models. By inheriting from `DetectionTrainer`, you can overload different methods to support your specific model's needs. Here's a simple example: ```python from ultralytics.models.yolo.detect import DetectionTrainer diff --git a/docs/en/usage/python.md b/docs/en/usage/python.md index 5236af0a1dc..177f5d45b08 100644 --- a/docs/en/usage/python.md +++ b/docs/en/usage/python.md @@ -1,12 +1,12 @@ --- comments: true -description: Learn to integrate YOLOv8 in Python for object detection, segmentation, and classification. Load, train models, and make predictions easily with our comprehensive guide. -keywords: YOLOv8, Python, object detection, segmentation, classification, machine learning, AI, pretrained models, train models, make predictions +description: Learn to integrate YOLO11 in Python for object detection, segmentation, and classification. Load, train models, and make predictions easily with our comprehensive guide. +keywords: YOLO11, Python, object detection, segmentation, classification, machine learning, AI, pretrained models, train models, make predictions --- # Python Usage -Welcome to the YOLOv8 Python Usage documentation! This guide is designed to help you seamlessly integrate YOLOv8 into your Python projects for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification. Here, you'll learn how to load and use pretrained models, train new models, and perform predictions on images. The easy-to-use Python interface is a valuable resource for anyone looking to incorporate YOLOv8 into their Python projects, allowing you to quickly implement advanced object detection capabilities. Let's get started! +Welcome to the YOLO11 Python Usage documentation! This guide is designed to help you seamlessly integrate YOLO11 into your Python projects for [object detection](https://www.ultralytics.com/glossary/object-detection), segmentation, and classification. Here, you'll learn how to load and use pretrained models, train new models, and perform predictions on images. The easy-to-use Python interface is a valuable resource for anyone looking to incorporate YOLO11 into their Python projects, allowing you to quickly implement advanced object detection capabilities. Let's get started!


@@ -16,7 +16,7 @@ Welcome to the YOLOv8 Python Usage documentation! This guide is designed to help allowfullscreen>
- Watch: Mastering Ultralytics YOLOv8: Python + Watch: Mastering Ultralytics YOLO11: Python

For example, users can load a model, train it, evaluate its performance on a validation set, and even export it to ONNX format with just a few lines of code. @@ -27,10 +27,10 @@ For example, users can load a model, train it, evaluate its performance on a val from ultralytics import YOLO # Create a new YOLO model from scratch - model = YOLO("yolov8n.yaml") + model = YOLO("yolo11n.yaml") # Load a pretrained YOLO model (recommended for training) - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") # Train the model using the 'coco8.yaml' dataset for 3 epochs results = model.train(data="coco8.yaml", epochs=3) @@ -47,16 +47,16 @@ For example, users can load a model, train it, evaluate its performance on a val ## [Train](../modes/train.md) -Train mode is used for training a YOLOv8 model on a custom dataset. In this mode, the model is trained using the specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can accurately predict the classes and locations of objects in an image. +Train mode is used for training a YOLO11 model on a custom dataset. In this mode, the model is trained using the specified dataset and hyperparameters. The training process involves optimizing the model's parameters so that it can accurately predict the classes and locations of objects in an image. !!! example "Train" - === "From pretrained(recommended)" + === "From pretrained (recommended)" ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") # pass any model type + model = YOLO("yolo11n.pt") # pass any model type results = model.train(epochs=5) ``` @@ -65,7 +65,7 @@ Train mode is used for training a YOLOv8 model on a custom dataset. In this mode ```python from ultralytics import YOLO - model = YOLO("yolov8n.yaml") + model = YOLO("yolo11n.yaml") results = model.train(data="coco8.yaml", epochs=5) ``` @@ -80,7 +80,7 @@ Train mode is used for training a YOLOv8 model on a custom dataset. In this mode ## [Val](../modes/val.md) -Val mode is used for validating a YOLOv8 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its [accuracy](https://www.ultralytics.com/glossary/accuracy) and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance. +Val mode is used for validating a YOLO11 model after it has been trained. In this mode, the model is evaluated on a validation set to measure its [accuracy](https://www.ultralytics.com/glossary/accuracy) and generalization performance. This mode can be used to tune the hyperparameters of the model to improve its performance. !!! example "Val" @@ -89,8 +89,8 @@ Val mode is used for validating a YOLOv8 model after it has been trained. In thi ```python from ultralytics import YOLO - # Load a YOLOv8 model - model = YOLO("yolov8n.yaml") + # Load a YOLO11 model + model = YOLO("yolo11n.yaml") # Train the model model.train(data="coco8.yaml", epochs=5) @@ -104,8 +104,8 @@ Val mode is used for validating a YOLOv8 model after it has been trained. In thi ```python from ultralytics import YOLO - # Load a YOLOv8 model - model = YOLO("yolov8n.yaml") + # Load a YOLO11 model + model = YOLO("yolo11n.yaml") # Train the model model.train(data="coco8.yaml", epochs=5) @@ -118,7 +118,7 @@ Val mode is used for validating a YOLOv8 model after it has been trained. In thi ## [Predict](../modes/predict.md) -Predict mode is used for making predictions using a trained YOLOv8 model on new images or videos. In this mode, the model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model predicts the classes and locations of objects in the input images or videos. +Predict mode is used for making predictions using a trained YOLO11 model on new images or videos. In this mode, the model is loaded from a checkpoint file, and the user can provide images or videos to perform inference. The model predicts the classes and locations of objects in the input images or videos. !!! example "Predict" @@ -189,27 +189,27 @@ Predict mode is used for making predictions using a trained YOLOv8 model on new ## [Export](../modes/export.md) -Export mode is used for exporting a YOLOv8 model to a format that can be used for deployment. In this mode, the model is converted to a format that can be used by other software applications or hardware devices. This mode is useful when deploying the model to production environments. +Export mode is used for exporting a YOLO11 model to a format that can be used for deployment. In this mode, the model is converted to a format that can be used by other software applications or hardware devices. This mode is useful when deploying the model to production environments. !!! example "Export" === "Export to ONNX" - Export an official YOLOv8n model to ONNX with dynamic batch-size and image-size. + Export an official YOLO11n model to ONNX with dynamic batch-size and image-size. ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.export(format="onnx", dynamic=True) ``` === "Export to TensorRT" - Export an official YOLOv8n model to TensorRT on `device=0` for acceleration on CUDA devices. + Export an official YOLO11n model to TensorRT on `device=0` for acceleration on CUDA devices. ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.export(format="onnx", device=0) ``` @@ -217,7 +217,7 @@ Export mode is used for exporting a YOLOv8 model to a format that can be used fo ## [Track](../modes/track.md) -Track mode is used for tracking objects in real-time using a YOLOv8 model. In this mode, the model is loaded from a checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful for applications such as surveillance systems or self-driving cars. +Track mode is used for tracking objects in real-time using a YOLO11 model. In this mode, the model is loaded from a checkpoint file, and the user can provide a live video stream to perform real-time object tracking. This mode is useful for applications such as surveillance systems or self-driving cars. !!! example "Track" @@ -227,8 +227,8 @@ Track mode is used for tracking objects in real-time using a YOLOv8 model. In th from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.pt") # load an official detection model - model = YOLO("yolov8n-seg.pt") # load an official segmentation model + model = YOLO("yolo11n.pt") # load an official detection model + model = YOLO("yolo11n-seg.pt") # load an official segmentation model model = YOLO("path/to/best.pt") # load a custom model # Track with the model @@ -240,92 +240,48 @@ Track mode is used for tracking objects in real-time using a YOLOv8 model. In th ## [Benchmark](../modes/benchmark.md) -Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. +Benchmark mode is used to profile the speed and accuracy of various export formats for YOLO11. The benchmarks provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation) or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for their specific use case based on their requirements for speed and accuracy. !!! example "Benchmark" === "Python" - Benchmark an official YOLOv8n model across all export formats. + Benchmark an official YOLO11n model across all export formats. ```python from ultralytics.utils.benchmarks import benchmark # Benchmark - benchmark(model="yolov8n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) + benchmark(model="yolo11n.pt", data="coco8.yaml", imgsz=640, half=False, device=0) ``` [Benchmark Examples](../modes/benchmark.md){ .md-button } -## Explorer - -Explorer API can be used to explore datasets with advanced semantic, vector-similarity and SQL search among other features. It also enabled searching for images based on their content using natural language by utilizing the power of LLMs. The Explorer API allows you to write your own dataset exploration notebooks or scripts to get insights into your datasets. - -!!! example "Semantic Search Using Explorer" - - === "Using Images" - - ```python - from ultralytics import Explorer - - # create an Explorer object - exp = Explorer(data="coco8.yaml", model="yolov8n.pt") - exp.create_embeddings_table() - - similar = exp.get_similar(img="https://ultralytics.com/images/bus.jpg", limit=10) - print(similar.head()) - - # Search using multiple indices - similar = exp.get_similar( - img=["https://ultralytics.com/images/bus.jpg", "https://ultralytics.com/images/bus.jpg"], limit=10 - ) - print(similar.head()) - ``` - - === "Using Dataset Indices" - - ```python - from ultralytics import Explorer - - # create an Explorer object - exp = Explorer(data="coco8.yaml", model="yolov8n.pt") - exp.create_embeddings_table() - - similar = exp.get_similar(idx=1, limit=10) - print(similar.head()) - - # Search using multiple indices - similar = exp.get_similar(idx=[1, 10], limit=10) - print(similar.head()) - ``` - -[Explorer](../datasets/explorer/index.md){ .md-button } - ## Using Trainers `YOLO` model class is a high-level wrapper on the Trainer classes. Each YOLO task has its own trainer that inherits from `BaseTrainer`. !!! tip "Detection Trainer Example" - ```python - from ultralytics.models.yolo import DetectionPredictor, DetectionTrainer, DetectionValidator + ```python + from ultralytics.models.yolo import DetectionPredictor, DetectionTrainer, DetectionValidator - # trainer - trainer = DetectionTrainer(overrides={}) - trainer.train() - trained_model = trainer.best + # trainer + trainer = DetectionTrainer(overrides={}) + trainer.train() + trained_model = trainer.best - # Validator - val = DetectionValidator(args=...) - val(model=trained_model) + # Validator + val = DetectionValidator(args=...) + val(model=trained_model) - # predictor - pred = DetectionPredictor(overrides={}) - pred(source=SOURCE, model=trained_model) + # predictor + pred = DetectionPredictor(overrides={}) + pred(source=SOURCE, model=trained_model) - # resume from last weight - overrides["resume"] = trainer.last - trainer = detect.DetectionTrainer(overrides=overrides) - ``` + # resume from last weight + overrides["resume"] = trainer.last + trainer = detect.DetectionTrainer(overrides=overrides) + ``` You can easily customize Trainers to support custom tasks or explore R&D ideas. Learn more about Customizing `Trainers`, `Validators` and `Predictors` to suit your project needs in the Customization Section. @@ -333,15 +289,15 @@ You can easily customize Trainers to support custom tasks or explore R&D ideas. ## FAQ -### How can I integrate YOLOv8 into my Python project for object detection? +### How can I integrate YOLO11 into my Python project for object detection? -Integrating Ultralytics YOLOv8 into your Python projects is simple. You can load a pre-trained model or train a new model from scratch. Here's how to get started: +Integrating Ultralytics YOLO11 into your Python projects is simple. You can load a pre-trained model or train a new model from scratch. Here's how to get started: ```python from ultralytics import YOLO # Load a pretrained YOLO model -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") # Perform object detection on an image results = model("https://ultralytics.com/images/bus.jpg") @@ -353,9 +309,9 @@ for result in results: See more detailed examples in our [Predict Mode](../modes/predict.md) section. -### What are the different modes available in YOLOv8? +### What are the different modes available in YOLO11? -Ultralytics YOLOv8 provides various modes to cater to different [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) workflows. These include: +Ultralytics YOLO11 provides various modes to cater to different [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) workflows. These include: - **[Train](../modes/train.md)**: Train a model using custom datasets. - **[Val](../modes/val.md)**: Validate model performance on a validation set. @@ -366,15 +322,15 @@ Ultralytics YOLOv8 provides various modes to cater to different [machine learnin Each mode is designed to provide comprehensive functionalities for different stages of model development and deployment. -### How do I train a custom YOLOv8 model using my dataset? +### How do I train a custom YOLO11 model using my dataset? -To train a custom YOLOv8 model, you need to specify your dataset and other hyperparameters. Here's a quick example: +To train a custom YOLO11 model, you need to specify your dataset and other hyperparameters. Here's a quick example: ```python from ultralytics import YOLO # Load the YOLO model -model = YOLO("yolov8n.yaml") +model = YOLO("yolo11n.yaml") # Train the model with custom dataset model.train(data="path/to/your/dataset.yaml", epochs=10) @@ -382,15 +338,15 @@ model.train(data="path/to/your/dataset.yaml", epochs=10) For more details on training and hyperlinks to example usage, visit our [Train Mode](../modes/train.md) page. -### How do I export YOLOv8 models for deployment? +### How do I export YOLO11 models for deployment? -Exporting YOLOv8 models in a format suitable for deployment is straightforward with the `export` function. For example, you can export a model to ONNX format: +Exporting YOLO11 models in a format suitable for deployment is straightforward with the `export` function. For example, you can export a model to ONNX format: ```python from ultralytics import YOLO # Load the YOLO model -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") # Export the model to ONNX format model.export(format="onnx") @@ -398,15 +354,15 @@ model.export(format="onnx") For various export options, refer to the [Export Mode](../modes/export.md) documentation. -### Can I validate my YOLOv8 model on different datasets? +### Can I validate my YOLO11 model on different datasets? -Yes, validating YOLOv8 models on different datasets is possible. After training, you can use the validation mode to evaluate the performance: +Yes, validating YOLO11 models on different datasets is possible. After training, you can use the validation mode to evaluate the performance: ```python from ultralytics import YOLO -# Load a YOLOv8 model -model = YOLO("yolov8n.yaml") +# Load a YOLO11 model +model = YOLO("yolo11n.yaml") # Train the model model.train(data="coco8.yaml", epochs=5) diff --git a/docs/en/usage/simple-utilities.md b/docs/en/usage/simple-utilities.md index d8905fa81bf..c88caacb2f6 100644 --- a/docs/en/usage/simple-utilities.md +++ b/docs/en/usage/simple-utilities.md @@ -25,10 +25,6 @@ The `ultralytics` package comes with a myriad of utilities that can support, enh ## Data -### YOLO Data Explorer - -[YOLO Explorer](../datasets/explorer/index.md) was added in the `8.1.0` anniversary update and is a powerful tool you can use to better understand your dataset. One of the key functions that YOLO Explorer provides, is the ability to use text queries to find object instances in your dataset. - ### Auto Labeling / Annotations Dataset annotation is a very resource intensive and time-consuming process. If you have a YOLO [object detection](https://www.ultralytics.com/glossary/object-detection) model trained on a reasonable amount of data, you can use it and [SAM](../models/sam.md) to auto-annotate additional data (segmentation format). @@ -36,26 +32,45 @@ Dataset annotation is a very resource intensive and time-consuming process. If y ```{ .py .annotate } from ultralytics.data.annotator import auto_annotate -auto_annotate( # (1)! +auto_annotate( data="path/to/new/data", - det_model="yolov8n.pt", + det_model="yolo11n.pt", sam_model="mobile_sam.pt", device="cuda", output_dir="path/to/save_labels", ) ``` -1. Nothing returns from this function +This function does not return any value. For further details on how the function operates: - [See the reference section for `annotator.auto_annotate`](../reference/data/annotator.md#ultralytics.data.annotator.auto_annotate) for more insight on how the function operates. - - Use in combination with the [function `segments2boxes`](#convert-segments-to-bounding-boxes) to generate object detection bounding boxes as well +### Visualize Dataset Annotations + +This function visualizes YOLO annotations on an image before training, helping to identify and correct any wrong annotations that could lead to incorrect detection results. It draws bounding boxes, labels objects with class names, and adjusts text color based on the background's luminance for better readability. + +```{ .py .annotate } +from ultralytics.data.utils import visualize_image_annotations + +label_map = { # Define the label map with all annotated class labels. + 0: "person", + 1: "car", +} + +# Visualize +visualize_image_annotations( + "path/to/image.jpg", # Input image path. + "path/to/annotations.txt", # Annotation file path for the image. + label_map, +) +``` + ### Convert Segmentation Masks into YOLO Format ![Segmentation Masks to YOLO Format](https://github.com/ultralytics/docs/releases/download/0/segmentation-masks-to-yolo-format.avif) -Use to convert a dataset of segmentation mask images to the `YOLO` segmentation format. +Use to convert a dataset of segmentation mask images to the [`YOLO`](../models/yolo11.md) segmentation format. This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format. The converted masks will be saved in the specified output directory. @@ -63,7 +78,8 @@ The converted masks will be saved in the specified output directory. ```python from ultralytics.data.converter import convert_segment_masks_to_yolo_seg -# The classes here is the total classes in the dataset, for COCO dataset we have 80 classes +# The classes here is the total classes in the dataset. +# for COCO dataset we have 80 classes. convert_segment_masks_to_yolo_seg(masks_dir="path/to/masks_dir", output_dir="path/to/output_dir", classes=80) ``` @@ -93,7 +109,7 @@ from ultralytics.utils.plotting import Annotator from ultralytics import YOLO import cv2 -model = YOLO('yolov8n.pt') # Load pretrain or fine-tune model +model = YOLO('yolo11n.pt') # Load pretrain or fine-tune model # Process the image source = cv2.imread('path/to/image.jpg') @@ -375,6 +391,91 @@ See docstring for each function or visit the `ultralytics.utils.ops` [reference Ultralytics includes an Annotator class that can be used to annotate any kind of data. It's easiest to use with [object detection bounding boxes](../modes/predict.md#boxes), [pose key points](../modes/predict.md#keypoints), and [oriented bounding boxes](../modes/predict.md#obb). +#### Ultralytics Sweep Annotation + +!!! example "Python Examples using YOLO11 ๐Ÿš€" + + === "Python" + + ```python + import cv2 + + from ultralytics import YOLO + from ultralytics.utils.plotting import Annotator, colors + + # User defined video path and model file + cap = cv2.VideoCapture("Path/to/video/file.mp4") + model = YOLO(model="yolo11s-seg.pt") # Model file i.e. yolo11s.pt or yolo11m-seg.pt + + if not cap.isOpened(): + print("Error: Could not open video.") + exit() + + # Initialize the video writer object. + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + video_writer = cv2.VideoWriter("ultralytics.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + + masks = None # Initialize variable to store masks data + f = 0 # Initialize frame count variable for enabling mouse event. + line_x = w # Store width of line. + dragging = False # Initialize bool variable for line dragging. + classes = model.names # Store model classes names for plotting. + window_name = "Ultralytics Sweep Annotator" + + + def drag_line(event, x, y, flags, param): # Mouse callback for dragging line. + global line_x, dragging + if event == cv2.EVENT_LBUTTONDOWN or (flags & cv2.EVENT_FLAG_LBUTTON): + line_x = max(0, min(x, w)) + dragging = True + + + while cap.isOpened(): # Loop over the video capture object. + ret, im0 = cap.read() + if not ret: + break + f = f + 1 # Increment frame count. + count = 0 # Re-initialize count variable on every frame for precise counts. + annotator = Annotator(im0) + results = model.track(im0, persist=True) # Track objects using track method. + if f == 1: + cv2.namedWindow(window_name) + cv2.setMouseCallback(window_name, drag_line) + + if results[0].boxes.id is not None: + if results[0].masks is not None: + masks = results[0].masks.xy + track_ids = results[0].boxes.id.int().cpu().tolist() + clss = results[0].boxes.cls.cpu().tolist() + boxes = results[0].boxes.xyxy.cpu() + + for mask, box, cls, t_id in zip(masks or [None] * len(boxes), boxes, clss, track_ids): + color = colors(t_id, True) # Assign different color to each tracked object. + if mask is not None and mask.size > 0: + # If you want to overlay the masks + # mask[:, 0] = np.clip(mask[:, 0], line_x, w) + # mask_img = cv2.fillPoly(im0.copy(), [mask.astype(int)], color) + # cv2.addWeighted(mask_img, 0.5, im0, 0.5, 0, im0) + + if box[0] > line_x: + count += 1 + annotator.seg_bbox(mask=mask, mask_color=color, label=str(classes[cls])) + else: + if box[0] > line_x: + count += 1 + annotator.box_label(box=box, color=color, label=str(classes[cls])) + + annotator.sweep_annotator(line_x=line_x, line_y=h, label=f"COUNT:{count}") # Display the sweep + cv2.imshow(window_name, im0) + video_writer.write(im0) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() # Release the video capture. + video_writer.release() # Release the video writer. + cv2.destroyAllWindows() # Destroy all opened windows. + ``` + #### Horizontal Bounding Boxes ```{ .py .annotate } @@ -459,13 +560,24 @@ image_with_obb = ann.result() #### Bounding Boxes Circle Annotation [Circle Label](https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Annotator.circle_label) +

+
+ +
+ Watch: In-Depth Guide to Text & Circle Annotations with Python Live Demos | Ultralytics Annotations ๐Ÿš€ +

+ ```python import cv2 from ultralytics import YOLO from ultralytics.utils.plotting import Annotator -model = YOLO("yolov8s.pt") +model = YOLO("yolo11s.pt") names = model.names cap = cv2.VideoCapture("path/to/video/file.mp4") @@ -504,7 +616,7 @@ import cv2 from ultralytics import YOLO from ultralytics.utils.plotting import Annotator -model = YOLO("yolov8s.pt") +model = YOLO("yolo11s.pt") names = model.names cap = cv2.VideoCapture("path/to/video/file.mp4") @@ -592,7 +704,7 @@ from ultralytics.data.annotator import auto_annotate auto_annotate( data="path/to/new/data", - det_model="yolov8n.pt", + det_model="yolo11n.pt", sam_model="mobile_sam.pt", device="cuda", output_dir="path/to/save_labels", diff --git a/docs/en/yolov5/environments/aws_quickstart_tutorial.md b/docs/en/yolov5/environments/aws_quickstart_tutorial.md index 0e5daf2fe9d..387817dcc92 100644 --- a/docs/en/yolov5/environments/aws_quickstart_tutorial.md +++ b/docs/en/yolov5/environments/aws_quickstart_tutorial.md @@ -8,7 +8,7 @@ keywords: YOLOv5, AWS, Deep Learning, Machine Learning, AWS EC2, YOLOv5 setup, D Setting up a high-performance deep learning environment can be daunting for newcomers, but fear not! ๐Ÿ› ๏ธ With this guide, we'll walk you through the process of getting YOLOv5 up and running on an AWS Deep Learning instance. By leveraging the power of Amazon Web Services (AWS), even those new to [machine learning](https://www.ultralytics.com/glossary/machine-learning-ml) can get started quickly and cost-effectively. The AWS platform's scalability is perfect for both experimentation and production deployment. -Other quickstart options for YOLOv5 include our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [GCP Deep Learning VM](./google_cloud_quickstart_tutorial.md), and our Docker image at [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) Docker Pulls. +Other quickstart options for YOLOv5 include our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [GCP Deep Learning VM](./google_cloud_quickstart_tutorial.md), and our Docker image at [Docker Hub](https://hub.docker.com/r/ultralytics/yolov5) Docker Pulls. ## Step 1: AWS Console Sign-In diff --git a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md index 55b92316dce..9063949bc49 100644 --- a/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md +++ b/docs/en/yolov5/environments/docker_image_quickstart_tutorial.md @@ -8,11 +8,11 @@ keywords: YOLOv5, Docker, Ultralytics, setup, guide, tutorial, machine learning, This tutorial will guide you through the process of setting up and running YOLOv5 in a Docker container. -You can also explore other quickstart options for YOLOv5, such as our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [GCP Deep Learning VM](./google_cloud_quickstart_tutorial.md), and [Amazon AWS](./aws_quickstart_tutorial.md). +You can also explore other quickstart options for YOLOv5, such as our [Colab Notebook](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb) Open In Colab Open In Kaggle, [GCP Deep Learning VM](./google_cloud_quickstart_tutorial.md), and [Amazon AWS](./aws_quickstart_tutorial.md). ## Prerequisites -1. **NVIDIA Driver**: Version 455.23 or higher. Download from [Nvidia's website](https://www.nvidia.com/Download/index.aspx). +1. **NVIDIA Driver**: Version 455.23 or higher. Download from [NVIDIA's website](https://www.nvidia.com/Download/index.aspx). 2. **NVIDIA-Docker**: Allows Docker to interact with your local GPU. Installation instructions are available on the [NVIDIA-Docker GitHub repository](https://github.com/NVIDIA/nvidia-docker). 3. **Docker Engine - CE**: Version 19.03 or higher. Download and installation instructions can be found on the [Docker website](https://docs.docker.com/get-started/get-docker/). diff --git a/docs/en/yolov5/index.md b/docs/en/yolov5/index.md index 36050584631..180bd25e640 100644 --- a/docs/en/yolov5/index.md +++ b/docs/en/yolov5/index.md @@ -8,7 +8,7 @@ keywords: YOLOv5, Ultralytics, object detection, computer vision, deep learning,

- + Ultralytics YOLOv5 v7.0 banner

@@ -18,15 +18,15 @@ keywords: YOLOv5, Ultralytics, object detection, computer vision, deep learning,
Run on Gradient Open In Colab -Open In Kaggle +Open In Kaggle

-Welcome to the Ultralytics' YOLOv5๐Ÿš€ Documentation! YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" [object detection](https://www.ultralytics.com/glossary/object-detection) model, is designed to deliver high-speed, high-accuracy results in real-time. +Welcome to the Ultralytics' YOLOv5๐Ÿš€ Documentation! YOLOv5, the fifth iteration of the revolutionary "You Only Look Once" object detection model, is designed to deliver high-speed, high-accuracy results in real-time.

-Built on PyTorch, this powerful [deep learning](https://www.ultralytics.com/glossary/deep-learning-dl) framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use-cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) projects. Let's get started! +Built on PyTorch, this powerful deep learning framework has garnered immense popularity for its versatility, ease of use, and high performance. Our documentation guides you through the installation process, explains the architectural nuances of the model, showcases various use-cases, and provides a series of detailed tutorials. These resources will help you harness the full potential of YOLOv5 for your computer vision projects. Let's get started!
@@ -54,7 +54,7 @@ Here's a compilation of comprehensive tutorials that will guide you through diff Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](environments/azureml_quickstart_tutorial.md) @@ -80,14 +80,14 @@ This badge indicates that all [YOLOv5 GitHub Actions](https://github.com/ultraly space Ultralytics BiliBili space - Ultralytics Discord + Ultralytics Discord
## Connect and Contribute Your journey with YOLOv5 doesn't have to be a solitary one. Join our vibrant community on [GitHub](https://github.com/ultralytics/yolov5), connect with professionals on [LinkedIn](https://www.linkedin.com/company/ultralytics/), share your results on [Twitter](https://twitter.com/ultralytics), and find educational resources on [YouTube](https://www.youtube.com/ultralytics?sub_confirmation=1). Follow us on [TikTok](https://www.tiktok.com/@ultralytics) and [BiliBili](https://ultralytics.com/bilibili) for more engaging content. -Interested in contributing? We welcome contributions of all forms; from code improvements and bug reports to documentation updates. Check out our [contributing guidelines](../help/contributing.md/) for more information. +Interested in contributing? We welcome contributions of all forms; from code improvements and bug reports to documentation updates. Check out our [contributing guidelines](../help/contributing.md) for more information. We're excited to see the innovative ways you'll use YOLOv5. Dive in, experiment, and revolutionize your computer vision projects! ๐Ÿš€ diff --git a/docs/en/yolov5/tutorials/clearml_logging_integration.md b/docs/en/yolov5/tutorials/clearml_logging_integration.md index f8ee5c348e5..17984f0f078 100644 --- a/docs/en/yolov5/tutorials/clearml_logging_integration.md +++ b/docs/en/yolov5/tutorials/clearml_logging_integration.md @@ -10,7 +10,7 @@ keywords: ClearML, YOLOv5, machine learning, experiment tracking, data versionin ## About ClearML -[ClearML](https://clear.ml/) is an [open-source](https://github.com/allegroai/clearml) toolbox designed to save you time โฑ๏ธ. +[ClearML](https://clear.ml/) is an [open-source](https://github.com/clearml/clearml) toolbox designed to save you time โฑ๏ธ. ๐Ÿ”จ Track every YOLOv5 training run in the experiment manager @@ -102,7 +102,7 @@ Versioning your data separately from your code is generally a good idea and make ### Prepare Your Dataset -The YOLOv5 repository supports a number of different datasets by using YAML files containing their information. By default datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the YAML or with the scripts provided by yolov5, you get this folder structure: +The YOLOv5 repository supports a number of different datasets by using YAML files containing their information. By default, datasets are downloaded to the `../datasets` folder in relation to the repository root folder. So if you downloaded the `coco128` dataset using the link in the YAML or with the scripts provided by yolov5, you get this folder structure: ``` .. diff --git a/docs/en/yolov5/tutorials/comet_logging_integration.md b/docs/en/yolov5/tutorials/comet_logging_integration.md index c5f20dda93d..88e3e5a5770 100644 --- a/docs/en/yolov5/tutorials/comet_logging_integration.md +++ b/docs/en/yolov5/tutorials/comet_logging_integration.md @@ -138,7 +138,7 @@ python train.py \ ### Controlling the number of Prediction Images logged to Comet -When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable. +When logging predictions from YOLOv5, Comet will log the images associated with each set of predictions. By default, a maximum of 100 validation images are logged. You can increase or decrease this number using the `COMET_MAX_IMAGE_UPLOADS` environment variable. ```shell env COMET_MAX_IMAGE_UPLOADS=200 python train.py \ diff --git a/docs/en/yolov5/tutorials/hyperparameter_evolution.md b/docs/en/yolov5/tutorials/hyperparameter_evolution.md index 80c0d39b524..e9830e14df9 100644 --- a/docs/en/yolov5/tutorials/hyperparameter_evolution.md +++ b/docs/en/yolov5/tutorials/hyperparameter_evolution.md @@ -153,7 +153,7 @@ We recommend a minimum of 300 generations of evolution for best results. Note th Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/model_ensembling.md b/docs/en/yolov5/tutorials/model_ensembling.md index 814c8969218..cc76cc0cdab 100644 --- a/docs/en/yolov5/tutorials/model_ensembling.md +++ b/docs/en/yolov5/tutorials/model_ensembling.md @@ -134,7 +134,7 @@ Done. (0.223s) Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/model_export.md b/docs/en/yolov5/tutorials/model_export.md index e5f0c73007c..4869efb0177 100644 --- a/docs/en/yolov5/tutorials/model_export.md +++ b/docs/en/yolov5/tutorials/model_export.md @@ -26,20 +26,20 @@ YOLOv5 inference is officially supported in 11 formats: ๐Ÿ’ก ProTip: Export to ONNX or OpenVINO for up to 3x CPU speedup. See [CPU Benchmarks](https://github.com/ultralytics/yolov5/pull/6613). ๐Ÿ’ก ProTip: Export to TensorRT for up to 5x GPU speedup. See [GPU Benchmarks](https://github.com/ultralytics/yolov5/pull/6963). -| Format | `export.py --include` | Model | -| :------------------------------------------------------------------------- | :-------------------- | :------------------------ | -| [PyTorch](https://pytorch.org/) | - | `yolov5s.pt` | -| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov5s.torchscript` | -| [ONNX](https://onnx.ai/) | `onnx` | `yolov5s.onnx` | -| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov5s_openvino_model/` | -| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov5s.engine` | -| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov5s.mlmodel` | -| [TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov5s_saved_model/` | -| [TensorFlow GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov5s.pb` | -| [TensorFlow Lite](https://ai.google.dev/edge/litert) | `tflite` | `yolov5s.tflite` | -| [TensorFlow Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov5s_edgetpu.tflite` | -| [TensorFlow.js](https://www.tensorflow.org/js) | `tfjs` | `yolov5s_web_model/` | -| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov5s_paddle_model/` | +| Format | `export.py --include` | Model | +| :----------------------------------------------------------- | :-------------------- | :------------------------ | +| [PyTorch](https://pytorch.org/) | - | `yolov5s.pt` | +| [TorchScript](../../integrations/torchscript.md) | `torchscript` | `yolov5s.torchscript` | +| [ONNX](../../integrations/onnx.md) | `onnx` | `yolov5s.onnx` | +| [OpenVINO](../../integrations/openvino.md) | `openvino` | `yolov5s_openvino_model/` | +| [TensorRT](../../integrations/tensorrt.md) | `engine` | `yolov5s.engine` | +| [CoreML](../../integrations/coreml.md) | `coreml` | `yolov5s.mlmodel` | +| [TensorFlow SavedModel](../../integrations/tf-savedmodel.md) | `saved_model` | `yolov5s_saved_model/` | +| [TensorFlow GraphDef](../../integrations/tf-graphdef.md) | `pb` | `yolov5s.pb` | +| [TensorFlow Lite](../../integrations/tflite.md) | `tflite` | `yolov5s.tflite` | +| [TensorFlow Edge TPU](../../integrations/edge-tpu.md) | `edgetpu` | `yolov5s_edgetpu.tflite` | +| [TensorFlow.js](../../integrations/tfjs.md) | `tfjs` | `yolov5s_web_model/` | +| [PaddlePaddle](../../integrations/paddlepaddle.md) | `paddle` | `yolov5s_paddle_model/` | ## Benchmarks @@ -234,7 +234,7 @@ YOLOv5 OpenVINO C++ inference examples: Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md index 8bda8772e1c..0adfb32c8ab 100644 --- a/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md +++ b/docs/en/yolov5/tutorials/model_pruning_and_sparsity.md @@ -97,7 +97,7 @@ In the results we can observe that we have achieved a **sparsity of 30%** in our Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/multi_gpu_training.md b/docs/en/yolov5/tutorials/multi_gpu_training.md index 53f3a1c1fd3..d61fab83278 100644 --- a/docs/en/yolov5/tutorials/multi_gpu_training.md +++ b/docs/en/yolov5/tutorials/multi_gpu_training.md @@ -173,7 +173,7 @@ If you went through all the above, feel free to raise an Issue by giving as much Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md index 27e26f144ff..0f464adf890 100644 --- a/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md +++ b/docs/en/yolov5/tutorials/pytorch_hub_model_loading.md @@ -361,7 +361,7 @@ model = torch.hub.load("ultralytics/yolov5", "custom", path="yolov5s_paddle_mode Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md index 55728f21e74..53f29d6f626 100644 --- a/docs/en/yolov5/tutorials/roboflow_datasets_integration.md +++ b/docs/en/yolov5/tutorials/roboflow_datasets_integration.md @@ -29,7 +29,7 @@ After uploading data to Roboflow, you can label your data and review previous la ## Versioning -You can make versions of your dataset with different preprocessing and offline augmentation options. YOLOv5 does online augmentations natively, so be intentional when layering Roboflow's offline augmentations on top. +You can make versions of your dataset with different preprocessing and offline augmentation options. YOLOv5 does online augmentations natively, so be intentional when layering Roboflow offline augmentations on top. ![Roboflow Preprocessing](https://github.com/ultralytics/docs/releases/download/0/roboflow-preprocessing.avif) @@ -60,7 +60,7 @@ The real world is messy and your model will invariably encounter situations your Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) @@ -102,4 +102,4 @@ Active learning is a machine learning strategy that iteratively improves a model ### How can I use Ultralytics environments for training YOLOv5 models on different platforms? -Ultralytics provides ready-to-use environments with pre-installed dependencies like CUDA, CUDNN, Python, and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making it easier to kickstart your training projects. These environments are available on various platforms such as Google Cloud, AWS, Azure, and Docker. You can also access free GPU notebooks via [Paperspace](https://bit.ly/yolov5-paperspace-notebook), [Google Colab](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb), and [Kaggle](https://www.kaggle.com/ultralytics/yolov5). For specific setup instructions, visit the [Supported Environments](#supported-environments) section of the documentation. +Ultralytics provides ready-to-use environments with pre-installed dependencies like CUDA, CUDNN, Python, and [PyTorch](https://www.ultralytics.com/glossary/pytorch), making it easier to kickstart your training projects. These environments are available on various platforms such as Google Cloud, AWS, Azure, and Docker. You can also access free GPU notebooks via [Paperspace](https://bit.ly/yolov5-paperspace-notebook), [Google Colab](https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb), and [Kaggle](https://www.kaggle.com/models/ultralytics/yolov5). For specific setup instructions, visit the [Supported Environments](#supported-environments) section of the documentation. diff --git a/docs/en/yolov5/tutorials/test_time_augmentation.md b/docs/en/yolov5/tutorials/test_time_augmentation.md index 336ad3f79bc..30c53b72301 100644 --- a/docs/en/yolov5/tutorials/test_time_augmentation.md +++ b/docs/en/yolov5/tutorials/test_time_augmentation.md @@ -151,7 +151,7 @@ You can customize the TTA ops applied in the YOLOv5 `forward_augment()` method [ Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/tips_for_best_training_results.md b/docs/en/yolov5/tutorials/tips_for_best_training_results.md index 634297203ea..5e7f55eb0ea 100644 --- a/docs/en/yolov5/tutorials/tips_for_best_training_results.md +++ b/docs/en/yolov5/tutorials/tips_for_best_training_results.md @@ -18,7 +18,7 @@ We've put together a full guide for users looking to get the best results on the - **Instances per class.** โ‰ฅ 10000 instances (labeled objects) per class recommended - **Image variety.** Must be representative of deployed environment. For real-world use cases we recommend images from different times of day, different seasons, different weather, different lighting, different angles, different sources (scraped online, collected locally, different cameras) etc. - **Label consistency.** All instances of all classes in all images must be labelled. Partial labelling will not work. -- **Label [accuracy](https://www.ultralytics.com/glossary/accuracy).** Labels must closely enclose each object. No space should exist between an object and it's [bounding box](https://www.ultralytics.com/glossary/bounding-box). No objects should be missing a label. +- **Label [accuracy](https://www.ultralytics.com/glossary/accuracy).** Labels must closely enclose each object. No space should exist between an object, and it's [bounding box](https://www.ultralytics.com/glossary/bounding-box). No objects should be missing a label. - **Label verification.** View `train_batch*.jpg` on train start to verify your labels appear correct, i.e. see [example](./train_custom_data.md#local-logging) mosaic. - **Background images.** Background images are images with no objects that are added to a dataset to reduce False Positives (FP). We recommend about 0-10% background images to help reduce FPs (COCO has 1000 background images for reference, 1% of the total). No labels are required for background images. diff --git a/docs/en/yolov5/tutorials/train_custom_data.md b/docs/en/yolov5/tutorials/train_custom_data.md index aa093e4b81c..c6f9d6f2692 100644 --- a/docs/en/yolov5/tutorials/train_custom_data.md +++ b/docs/en/yolov5/tutorials/train_custom_data.md @@ -18,7 +18,7 @@ pip install -r requirements.txt # install ## Train On Custom Data - + Ultralytics active learning

@@ -77,7 +77,7 @@ Export in `YOLOv5 Pytorch` format, then copy the snippet into your training scri ### 2.1 Create `dataset.yaml` -[COCO128](https://www.kaggle.com/ultralytics/coco128) is an example small tutorial dataset composed of the first 128 images in [COCO](https://cocodataset.org/) train2017. These same 128 images are used for both training and validation to verify our training pipeline is capable of [overfitting](https://www.ultralytics.com/glossary/overfitting). [data/coco128.yaml](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), shown below, is the dataset config file that defines 1) the dataset root directory `path` and relative paths to `train` / `val` / `test` image directories (or `*.txt` files with image paths) and 2) a class `names` dictionary: +[COCO128](https://www.kaggle.com/datasets/ultralytics/coco128) is an example small tutorial dataset composed of the first 128 images in [COCO](https://cocodataset.org/) train2017. These same 128 images are used for both training and validation to verify our training pipeline is capable of [overfitting](https://www.ultralytics.com/glossary/overfitting). [data/coco128.yaml](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), shown below, is the dataset config file that defines 1) the dataset root directory `path` and relative paths to `train` / `val` / `test` image directories (or `*.txt` files with image paths) and 2) a class `names` dictionary: ```yaml # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] @@ -145,7 +145,7 @@ python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt ๐Ÿ’ก Always train from a local dataset. Mounted or network drives like Google Drive will be very slow. -All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc. For more details see the Training section of our tutorial notebook. Open In Colab Open In Kaggle +All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc. For more details see the Training section of our tutorial notebook. Open In Colab Open In Kaggle ## 5. Visualize @@ -211,7 +211,7 @@ Once your model is trained you can use your best checkpoint `best.pt` to: Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md b/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md index 9e689ad3eb1..9a37ace1650 100644 --- a/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md +++ b/docs/en/yolov5/tutorials/transfer_learning_with_frozen_layers.md @@ -141,7 +141,7 @@ Interestingly, the more modules are frozen the less GPU memory is required to tr Ultralytics provides a range of ready-to-use environments, each pre-installed with essential dependencies such as [CUDA](https://developer.nvidia.com/cuda-zone), [CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/), and [PyTorch](https://pytorch.org/), to kickstart your projects. -- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle +- **Free GPU Notebooks**: Run on Gradient Open In Colab Open In Kaggle - **Google Cloud**: [GCP Quickstart Guide](../environments/google_cloud_quickstart_tutorial.md) - **Amazon**: [AWS Quickstart Guide](../environments/aws_quickstart_tutorial.md) - **Azure**: [AzureML Quickstart Guide](../environments/azureml_quickstart_tutorial.md) diff --git a/docs/mkdocs_github_authors.yaml b/docs/mkdocs_github_authors.yaml index 4f7f3b3a389..2393f0835be 100644 --- a/docs/mkdocs_github_authors.yaml +++ b/docs/mkdocs_github_authors.yaml @@ -1,12 +1,18 @@ +107626595+pderrenger@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/107626595?v=4 + username: pderrenger 116908874+jk4e@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/116908874?v=4 username: jk4e 1185102784@qq.com: - avatar: null - username: null + avatar: https://avatars.githubusercontent.com/u/61612323?v=4 + username: Laughing-q 130829914+IvorZhu331@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/130829914?v=4 username: IvorZhu331 +131249114+ServiAmirPM@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/131249114?v=4 + username: ServiAmirPM 131261051+MatthewNoyce@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/131261051?v=4 username: MatthewNoyce @@ -16,12 +22,18 @@ 1579093407@qq.com: avatar: https://avatars.githubusercontent.com/u/160490334?v=4 username: YOLOv5-Magic +16029431+jules-ai@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/16029431?v=4 + username: jules-ai 17216799+ouphi@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/17216799?v=4 username: ouphi 17316848+maianumerosky@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/17316848?v=4 username: maianumerosky +25704330+JairajJangle@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/25704330?v=4 + username: JairajJangle 32206511+Y-T-G@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/32206511?v=4 username: Y-T-G @@ -37,6 +49,9 @@ 40165666+berry-ding@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/40165666?v=4 username: berry-ding +44016758+M-Amrollahi@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/44016758?v=4 + username: M-Amrollahi 46103969+inisis@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/46103969?v=4 username: inisis @@ -46,6 +61,9 @@ 48149018+zhixuwei@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/48149018?v=4 username: zhixuwei +49172033+Lucashygi@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/49172033?v=4 + username: Lucashygi 49699333+dependabot[bot]@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/27347476?v=4 username: dependabot @@ -72,16 +90,28 @@ username: Skillnoob 79740115+0xSynapse@users.noreply.github.com: avatar: https://avatars.githubusercontent.com/u/79740115?v=4 - username: 0xSynapse + username: github +8401806+wangzhaode@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/8401806?v=4 + username: wangzhaode +91465467+lalayants@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/91465467?v=4 + username: lalayants Francesco.mttl@gmail.com: avatar: https://avatars.githubusercontent.com/u/3855193?v=4 username: ambitious-octopus +Fruchtzwerg94@users.noreply.github.com: + avatar: https://avatars.githubusercontent.com/u/29866610?v=4 + username: Fruchtzwerg94 abirami.vina@gmail.com: avatar: https://avatars.githubusercontent.com/u/25847604?v=4 username: abirami-vina ahmelsamahy@gmail.com: avatar: https://avatars.githubusercontent.com/u/10195309?v=4 username: Ahelsamahy +alexis.barou@ultralytics.com: + avatar: https://avatars.githubusercontent.com/u/195105218?v=4 + username: https://github.com/LexBarou andrei.kochin@intel.com: avatar: https://avatars.githubusercontent.com/u/72827868?v=4 username: andrei-kochin @@ -91,6 +121,12 @@ ayush.chaurarsia@gmail.com: chr043416@gmail.com: avatar: https://avatars.githubusercontent.com/u/62513924?v=4 username: RizwanMunawar +davis.justin@mssm.org: + avatar: https://avatars.githubusercontent.com/u/23462437?v=4 + username: justincdavis +francesco.mttl@gmail.com: + avatar: https://avatars.githubusercontent.com/u/3855193?v=4 + username: ambitious-octopus glenn.jocher@ultralytics.com: avatar: https://avatars.githubusercontent.com/u/26833433?v=4 username: glenn-jocher @@ -109,6 +145,9 @@ lakshantha@ultralytics.com: lakshanthad@yahoo.com: avatar: https://avatars.githubusercontent.com/u/20147381?v=4 username: lakshanthad +makei05@outlook.de: + avatar: https://avatars.githubusercontent.com/u/78843978?v=4 + username: Skillnoob matthewnoyce@icloud.com: avatar: https://avatars.githubusercontent.com/u/131261051?v=4 username: MatthewNoyce @@ -128,8 +167,8 @@ rulosanti@gmail.com: avatar: null username: null shuizhuyuanluo@126.com: - avatar: null - username: null + avatar: https://avatars.githubusercontent.com/u/171016?v=4 + username: nihui sometimesocrazy@gmail.com: avatar: null username: null @@ -145,3 +184,9 @@ web@ultralytics.com: xinwang614@gmail.com: avatar: https://avatars.githubusercontent.com/u/17264618?v=4 username: GreatV +zhaode.wzd@alibaba-inc.com: + avatar: https://avatars.githubusercontent.com/u/8401806?v=4 + username: wangzhaode +zhushuoyu0501@gmail.com: + avatar: null + username: null diff --git a/docs/model_data.py b/docs/model_data.py new file mode 100644 index 00000000000..1ae003799ed --- /dev/null +++ b/docs/model_data.py @@ -0,0 +1,93 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +data = { + "YOLO11": { + "n": {"size": 640, "map": 39.5, "cpu": 56.1, "t4": 1.5, "params": 2.6, "flops": 6.5}, + "s": {"size": 640, "map": 47.0, "cpu": 90.0, "t4": 2.5, "params": 9.4, "flops": 21.5}, + "m": {"size": 640, "map": 51.5, "cpu": 183.2, "t4": 4.7, "params": 20.1, "flops": 68.0}, + "l": {"size": 640, "map": 53.4, "cpu": 238.6, "t4": 6.2, "params": 25.3, "flops": 86.9}, + "x": {"size": 640, "map": 54.7, "cpu": 462.8, "t4": 11.3, "params": 56.9, "flops": 194.9}, + }, + "YOLOv10": { + "n": {"size": 640, "map": 39.5, "cpu": "", "t4": 1.56, "params": 2.3, "flops": 6.7}, + "s": {"size": 640, "map": 46.7, "cpu": "", "t4": 2.66, "params": 7.2, "flops": 21.6}, + "m": {"size": 640, "map": 51.3, "cpu": "", "t4": 5.48, "params": 15.4, "flops": 59.1}, + "b": {"size": 640, "map": 52.7, "cpu": "", "t4": 6.54, "params": 24.4, "flops": 92.0}, + "l": {"size": 640, "map": 53.3, "cpu": "", "t4": 8.33, "params": 29.5, "flops": 120.3}, + "x": {"size": 640, "map": 54.4, "cpu": "", "t4": 12.2, "params": 56.9, "flops": 160.4}, + }, + "YOLOv9": { + "t": {"size": 640, "map": 38.3, "cpu": "", "t4": 2.3, "params": 2.0, "flops": 7.7}, + "s": {"size": 640, "map": 46.8, "cpu": "", "t4": 3.54, "params": 7.1, "flops": 26.4}, + "m": {"size": 640, "map": 51.4, "cpu": "", "t4": 6.43, "params": 20.0, "flops": 76.3}, + "c": {"size": 640, "map": 53.0, "cpu": "", "t4": 7.16, "params": 25.3, "flops": 102.1}, + "e": {"size": 640, "map": 55.6, "cpu": "", "t4": 16.77, "params": 57.3, "flops": 189.0}, + }, + "YOLOv8": { + "n": {"size": 640, "map": 37.3, "cpu": 80.4, "t4": 1.47, "params": 3.2, "flops": 8.7}, + "s": {"size": 640, "map": 44.9, "cpu": 128.4, "t4": 2.66, "params": 11.2, "flops": 28.6}, + "m": {"size": 640, "map": 50.2, "cpu": 234.7, "t4": 5.86, "params": 25.9, "flops": 78.9}, + "l": {"size": 640, "map": 52.9, "cpu": 375.2, "t4": 9.06, "params": 43.7, "flops": 165.2}, + "x": {"size": 640, "map": 53.9, "cpu": 479.1, "t4": 14.37, "params": 68.2, "flops": 257.8}, + }, + "YOLOv7": { + "l": {"size": 640, "map": 51.4, "cpu": "", "t4": 6.84, "params": 36.9, "flops": 104.7}, + "x": {"size": 640, "map": 53.1, "cpu": "", "t4": 11.57, "params": 71.3, "flops": 189.9}, + }, + "YOLOv6-3.0": { + "n": {"size": 640, "map": 37.5, "cpu": "", "t4": 1.17, "params": 4.7, "flops": 11.4}, + "s": {"size": 640, "map": 45.0, "cpu": "", "t4": 2.66, "params": 18.5, "flops": 45.3}, + "m": {"size": 640, "map": 50.0, "cpu": "", "t4": 5.28, "params": 34.9, "flops": 85.8}, + "l": {"size": 640, "map": 52.8, "cpu": "", "t4": 8.95, "params": 59.6, "flops": 150.7}, + }, + "YOLOv5": { + "n": {"size": 640, "map": 28.0, "cpu": 73.6, "t4": 1.12, "params": 2.6, "flops": 7.7}, + "s": {"size": 640, "map": 37.4, "cpu": 120.7, "t4": 1.92, "params": 9.1, "flops": 24.0}, + "m": {"size": 640, "map": 45.4, "cpu": 233.9, "t4": 4.03, "params": 25.1, "flops": 64.2}, + "l": {"size": 640, "map": 49.0, "cpu": 408.4, "t4": 6.61, "params": 53.2, "flops": 135.0}, + "x": {"size": 640, "map": 50.7, "cpu": 763.2, "t4": 11.89, "params": 97.2, "flops": 246.4}, + }, + "PP-YOLOE+": { + "t": {"size": 640, "map": 39.9, "cpu": "", "t4": 2.84, "params": 4.85, "flops": 19.15}, + "s": {"size": 640, "map": 43.7, "cpu": "", "t4": 2.62, "params": 7.93, "flops": 17.36}, + "m": {"size": 640, "map": 49.8, "cpu": "", "t4": 5.56, "params": 23.43, "flops": 49.91}, + "l": {"size": 640, "map": 52.9, "cpu": "", "t4": 8.36, "params": 52.20, "flops": 110.07}, + "x": {"size": 640, "map": 54.7, "cpu": "", "t4": 14.3, "params": 98.42, "flops": 206.59}, + }, + "DAMO-YOLO": { + "t": {"size": 640, "map": 42.0, "cpu": "", "t4": 2.32, "params": 8.5, "flops": 18.1}, + "s": {"size": 640, "map": 46.0, "cpu": "", "t4": 3.45, "params": 16.3, "flops": 37.8}, + "m": {"size": 640, "map": 49.2, "cpu": "", "t4": 5.09, "params": 28.2, "flops": 61.8}, + "l": {"size": 640, "map": 50.8, "cpu": "", "t4": 7.18, "params": 42.1, "flops": 97.3}, + }, + "YOLOX": { + "nano": {"size": 416, "map": 25.8, "cpu": "", "t4": "", "params": 0.91, "flops": 1.08}, + "tiny": {"size": 416, "map": 32.8, "cpu": "", "t4": "", "params": 5.06, "flops": 6.45}, + "s": {"size": 640, "map": 40.5, "cpu": "", "t4": 2.56, "params": 9.0, "flops": 26.8}, + "m": {"size": 640, "map": 46.9, "cpu": "", "t4": 5.43, "params": 25.3, "flops": 73.8}, + "l": {"size": 640, "map": 49.7, "cpu": "", "t4": 9.04, "params": 54.2, "flops": 155.6}, + "x": {"size": 640, "map": 51.1, "cpu": "", "t4": 16.1, "params": 99.1, "flops": 281.9}, + }, + "RTDETRv2": { + "s": {"size": 640, "map": 48.1, "cpu": "", "t4": 5.03, "params": 20, "flops": 60}, + "m": {"size": 640, "map": 51.9, "cpu": "", "t4": 7.51, "params": 36, "flops": 100}, + "l": {"size": 640, "map": 53.4, "cpu": "", "t4": 9.76, "params": 42, "flops": 136}, + "x": {"size": 640, "map": 54.3, "cpu": "", "t4": 15.03, "params": 76, "flops": 259}, + }, + "EfficientDet": { + "d0": {"size": 640, "map": 34.6, "cpu": 10.2, "t4": 3.92, "params": 3.9, "flops": 2.54}, + "d1": {"size": 640, "map": 40.5, "cpu": 13.5, "t4": 7.31, "params": 6.6, "flops": 6.10}, + "d2": {"size": 640, "map": 43.0, "cpu": 17.7, "t4": 10.92, "params": 8.1, "flops": 11.0}, + "d3": {"size": 640, "map": 47.5, "cpu": 28.0, "t4": 19.59, "params": 12.0, "flops": 24.9}, + "d4": {"size": 640, "map": 49.7, "cpu": 42.8, "t4": 33.55, "params": 20.7, "flops": 55.2}, + "d5": {"size": 640, "map": 51.5, "cpu": 72.5, "t4": 67.86, "params": 33.7, "flops": 130.0}, + "d6": {"size": 640, "map": 52.6, "cpu": 92.8, "t4": 89.29, "params": 51.9, "flops": 226.0}, + "d7": {"size": 640, "map": 53.7, "cpu": 122.0, "t4": 128.07, "params": 51.9, "flops": 325.0}, + }, +} + +if __name__ == "__main__": + import json + + with open("model_data.json", "w") as f: + json.dump(data, f) diff --git a/docs/overrides/assets/favicon.ico b/docs/overrides/assets/favicon.ico deleted file mode 100644 index 7aa5066187ae0bb3179a5bc13c282e481871404d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9662 zcmds-3vd)w7KSg80{UJSQmQ_$D3L?75q9Q6FqRT4cW7P#@d5IAb5z%E`MTnS1 zS1~FoYIHH`t|TELJVFve5)uRwV;~6xh$tEmBJv0+i_+B8l*j*c_nn!{OwS~w2dfNq zxJqZxcVCfli4YeEA^IcmD=`(pzdsv4nF&fF?nkUZR3i=} z4k4-#X^49e-OdI}N7evSSHv8|Q3OQ^-cwR$qKfOt8nHSTEWHt1qA-WsQxV>Ceyq6K zGq26wa0cvpKf8XbJz!nd%=S*O>phfaCid*wo_x7^4!GyLz^?Z&$4$0qbAdY^(d;5W zU-vTFx^5lES~Fc_*Pp>8xA>yX1NQtF@MlC@*Nx-Z34ah^KNbW21~>Sk%>i!qSZ@sY zf6PvZkG6cLkAs`9U9DL+$JUko<74j0zi0m3s}ya#Ls9uu>D@M4dMlog-tCV|?`IE7 zPw52d$seseIRhQFch>Wtb*ZA_`y|!9Ea_mLK|fR&bab0RO5 zk{Z`b`gV<^Z?Yxrc?dP%$_+Zc)1YHtfW6Y7@3(>- zzdKS2_Dzz$D>Uff2C%OKd#=TPAXC!UpGx}b6G>mDOWKtxy`>YBXVqoubgcE?yw8v) zYK;`;#Yo3@fV~PiN8a1ABkzUyOdQzv+w2Yhm9#HS(%yHlkEPPH`*k^I*Q+unz0j$B z#>nQq#@8oz``Ld8VVB4|^y0w&74mMfe<-Qp1F*jb_T`f5-;@M)0y}GQXU&W9%1#$b zdxBF9MjT=x*u7!dKeIJc0=*PTI#dA72A$l6yw^Z4)zJFq(9CvdhS|%Y_Y&yEWasPH&wd~R z`<~FeYjy%VRWFqE#WOOs>ghAPPqW$cH2ZM}_QNJSG*e*JAf0z7_Py_-{=5y%EQ4lV zGj&gO|B+O)1bKf!(vD=Xqc(t@KL4kr%BN&n#oV49^0iY9G7i_OFYh7jTpyA5!^P0d zMra20@gVABBiF|qNe8l^nU&DY|Dc(VpqUlW`uos&81~vQ>{Whtf`<@l?b421577Do z&ijeFw)HWzwnt}gaA2=nDk+40fuyQ`A@B2%_a`M)%mw>nlD5r~wDn;*y*+yX_wbYT zh6n7;$a^~UJ{5Il@X-p{`}?gjzNt)Jc%(!Qh2E2p_blZ7wCPJ>@@}%HS@jW~MfGn$ zGpVRCuYmm}Xy)J0%wlL}5%dy)9sW&QX2{d!)6~F7djPjY6ysW*aiUiCh;*(#2KHXi z%slwg(Li4ctnHc|UezJ?t;oC0PUX`il}%Pvr4yB-KQ!+)`tlw$n;vuf#&gf#4fomH zJ9p*R!9QR>51yLNo*K-)8+i|A-+{bWbKakYW}b#-{sGO*gI*qoUS>ltkAOW1d4JIC zr)cxvC4rqv#;cjZYlnNCk)YzhdK)4gu^-Wj;9iACe$CI=#$+b=Ys3DJ3<36;@Fg#M zYLG9P^|2-edeQYUD!b%f(9ixi^jr6;rlPwe-z$S_G@>4%d#gwzKf@Urm;==I$zb;e zcxq^E4`8ndn|J7Cs?0;3X=`KY@r2r4Wa!0Y=lfCQ{cc+`w?i*up!Hjzmr;uHMj-DuC|Y-&qBYkj${nHxVV&U! zoi{hfq53m`FV%A|)7F=CZHHbaqCQ%+-C`dHy*se4)$D&#wE8M_7uaXVh(Ag5!&BEd zvqLjM?1f?2*CFq1*as=fxdPwgB&d`a@h{N)sO>BK{Q&z?h^PAUj@qv4Bi9(p8xFl( z5B8zZ%+=5go*k_IBiIKj${wKRf_-tU`Qb~;*_W*P$iC#SkJ?jhUuwg?-hn+E?08;5 zS^dnKA1nR%d;q=gLO;8sr$%PSvoX$l_T^yj5B5uN%_f@PXWkH_{QLY7z3;fPDeOxY zdog+$pQi@+Qa*espgt1nBdr+%&44}U${=>D<|JluU!0&$xV(oZ^rgN&9rdvnYWwNZ z@pgT5@YHbZs4-6L4H*eN%}WgIsV?*Tyj@?z=-bo7d1@G6vhr@#M~i(xfM)!)e!iX? z?ED=r`@S3XpFhix?{(3)C!&}6$?RoNAGwza=SyqZmzdqEk6dGDRX^z6|D2Q=`8#kc z`SvF8dOE@l{)sv-!6W+ic}~9M^VI0<+^dfbp4+}hT-wP!q=dV-)N2)vHR8|@qy^_a zj4zpaM}6cP6P#0@ZuGo*Ty?SJK6(Ly_Za-XNUuFOe?GrI+E2kPDh|EOOYkMHlP{UI z9na>2*!jM7cC`8VIL;e}s6s?y*2gMwp0C$w&-{E2`mj;()Y?E#_4!gLJOBTR!R?)! z&qH11`}dKEw-B`meSg5?P6YSd@QyZ@QAd^#x!0&TT&ttuOYefcmcIkw??mji^cur* z`t@?0m#DQ5kq|_U-K{aXi#77^5WfhqC~))&F@j&~u~mpZ{Q6AC7W@5;LUiYO%7i$F zM<&K!SRMdEU^y)8A6xlAA0d9`H=K)8snHs{0HiyghxQ9&i?|3A3ehN#19Qa5U4$s( zgBX*=ImwYmyvaPSN1R_~pC3A6e$3lwuhYUyG>UUun6FigXk}inSo90;gOKH965?0S C#gL)^ diff --git a/docs/overrides/javascript/benchmark.js b/docs/overrides/javascript/benchmark.js new file mode 100644 index 00000000000..ea65d377ead --- /dev/null +++ b/docs/overrides/javascript/benchmark.js @@ -0,0 +1,229 @@ +// YOLO models chart --------------------------------------------------------------------------------------------------- +const data = { + YOLO11: { + n: { speed: 1.55, mAP: 39.5 }, + s: { speed: 2.63, mAP: 47.0 }, + m: { speed: 5.27, mAP: 51.4 }, + l: { speed: 6.84, mAP: 53.2 }, + x: { speed: 12.49, mAP: 54.7 }, + }, + YOLOv10: { + n: { speed: 1.56, mAP: 39.5 }, + s: { speed: 2.66, mAP: 46.7 }, + m: { speed: 5.48, mAP: 51.3 }, + b: { speed: 6.54, mAP: 52.7 }, + l: { speed: 8.33, mAP: 53.3 }, + x: { speed: 12.2, mAP: 54.4 }, + }, + YOLOv9: { + t: { speed: 2.3, mAP: 37.8 }, + s: { speed: 3.54, mAP: 46.5 }, + m: { speed: 6.43, mAP: 51.5 }, + c: { speed: 7.16, mAP: 52.8 }, + e: { speed: 16.77, mAP: 55.1 }, + }, + YOLOv8: { + n: { speed: 1.47, mAP: 37.3 }, + s: { speed: 2.66, mAP: 44.9 }, + m: { speed: 5.86, mAP: 50.2 }, + l: { speed: 9.06, mAP: 52.9 }, + x: { speed: 14.37, mAP: 53.9 }, + }, + YOLOv7: { l: { speed: 6.84, mAP: 51.4 }, x: { speed: 11.57, mAP: 53.1 } }, + "YOLOv6-3.0": { + n: { speed: 1.17, mAP: 37.5 }, + s: { speed: 2.66, mAP: 45.0 }, + m: { speed: 5.28, mAP: 50.0 }, + l: { speed: 8.95, mAP: 52.8 }, + }, + YOLOv5: { + n: { speed: 1.12, mAP: 28.0 }, + s: { speed: 1.92, mAP: 37.4 }, + m: { speed: 4.03, mAP: 45.4 }, + l: { speed: 6.61, mAP: 49.0 }, + x: { speed: 11.89, mAP: 50.7 }, + }, + "PP-YOLOE+": { + t: { speed: 2.84, mAP: 39.9 }, + s: { speed: 2.62, mAP: 43.7 }, + m: { speed: 5.56, mAP: 49.8 }, + l: { speed: 8.36, mAP: 52.9 }, + x: { speed: 14.3, mAP: 54.7 }, + }, + "DAMO-YOLO": { + t: { speed: 2.32, mAP: 42.0 }, + s: { speed: 3.45, mAP: 46.0 }, + m: { speed: 5.09, mAP: 49.2 }, + l: { speed: 7.18, mAP: 50.8 }, + }, + YOLOX: { + s: { speed: 2.56, mAP: 40.5 }, + m: { speed: 5.43, mAP: 46.9 }, + l: { speed: 9.04, mAP: 49.7 }, + x: { speed: 16.1, mAP: 51.1 }, + }, + RTDETRv2: { + s: { speed: 5.03, mAP: 48.1 }, + m: { speed: 7.51, mAP: 51.9 }, + l: { speed: 9.76, mAP: 53.4 }, + x: { speed: 15.03, mAP: 54.3 }, + }, + EfficientDet: { + d0: { speed: 3.92, mAP: 33.8 }, + d1: { speed: 7.31, mAP: 39.6 }, + d2: { speed: 10.92, mAP: 43.0 }, + d3: { speed: 19.59, mAP: 45.8 }, + // d4: { speed: 33.55, mAP: 49.4 }, + // d5: { speed: 67.86, mAP: 50.7 }, + // d6: { speed: 89.29, mAP: 51.7 }, + // d7: { speed: 128.07, mAP: 53.7 }, + // d8: { speed: 157.57, mAP: 55.1 } + }, +}; + +let modelComparisonChart = null; // chart variable will hold the reference to the current chart instance. + +// Function to lighten a hex color by a specified amount. +function lightenHexColor(color, amount = 0.5) { + const r = parseInt(color.slice(1, 3), 16); + const g = parseInt(color.slice(3, 5), 16); + const b = parseInt(color.slice(5, 7), 16); + const newR = Math.min(255, Math.round(r + (255 - r) * amount)); + const newG = Math.min(255, Math.round(g + (255 - g) * amount)); + const newB = Math.min(255, Math.round(b + (255 - b) * amount)); + return `#${newR.toString(16).padStart(2, "0")}${newG.toString(16).padStart(2, "0")}${newB.toString(16).padStart(2, "0")}`; +} + +// Function to update the benchmarks chart. +function updateChart(initialDatasets = []) { + if (modelComparisonChart) { + modelComparisonChart.destroy(); + } // If a chart instance already exists, destroy it. + + // Define a specific color map for models. + const colorMap = { + YOLO11: "#0b23a9", + YOLOv10: "#ff7f0e", + YOLOv9: "#2ca02c", + YOLOv8: "#d62728", + YOLOv7: "#9467bd", + "YOLOv6-3.0": "#8c564b", + YOLOv5: "#e377c2", + "PP-YOLOE+": "#7f7f7f", + "DAMO-YOLO": "#bcbd22", + YOLOX: "#17becf", + RTDETRv2: "#eccd22", + EfficientDet: "#000000", + }; + + // Always include all models in the dataset creation + const datasets = Object.keys(data).map((algorithm, i) => { + const baseColor = + colorMap[algorithm] || `hsl(${Math.random() * 360}, 70%, 50%)`; + const lineColor = + Object.keys(data).indexOf(algorithm) === 0 + ? baseColor + : lightenHexColor(baseColor, 0.6); + + return { + label: algorithm, + data: Object.entries(data[algorithm]).map(([version, point]) => ({ + x: point.speed, + y: point.mAP, + version: version.toUpperCase(), + })), + fill: false, + borderColor: lineColor, + tension: 0.2, + pointRadius: Object.keys(data).indexOf(algorithm) === 0 ? 7 : 4, + pointHoverRadius: Object.keys(data).indexOf(algorithm) === 0 ? 9 : 6, + pointBackgroundColor: lineColor, + pointBorderColor: "#ffffff", + borderWidth: i === 0 ? 3 : 1.5, + hidden: + initialDatasets.length > 0 && !initialDatasets.includes(algorithm), + }; + }); + + // Create a new chart instance. + modelComparisonChart = new Chart( + document.getElementById("modelComparisonChart").getContext("2d"), + { + type: "line", + data: { datasets }, + options: { + //aspectRatio: 2.5, // higher is wider + plugins: { + legend: { + display: true, + position: "right", + align: "start", // start, end, center + labels: { color: "#808080" }, + onClick: (e, legendItem, legend) => { + const index = legendItem.datasetIndex; + const ci = legend.chart; + const meta = ci.getDatasetMeta(index); + meta.hidden = + meta.hidden === null ? !ci.data.datasets[index].hidden : null; + ci.update(); + }, + }, // Configure the legend. + tooltip: { + callbacks: { + label: (tooltipItem) => { + const { dataset, dataIndex } = tooltipItem; + const point = dataset.data[dataIndex]; + return `${dataset.label}${point.version.toLowerCase()}: Speed = ${point.x}ms/img, mAP50-95 = ${point.y}`; // Custom tooltip label. + }, + }, + mode: "nearest", + intersect: false, + }, // Configure the tooltip. + }, + interaction: { mode: "nearest", axis: "x", intersect: false }, // Configure the interaction mode. + scales: { + x: { + type: "linear", + position: "bottom", + title: { + display: true, + text: "Latency T4 TensorRT10 FP16 (ms/img)", + color: "#808080", + }, + grid: { color: "#e0e0e0" }, + ticks: { color: "#808080" }, + min: 0, + max: 18, + }, + y: { + title: { display: true, text: "COCO mAP 50-95", color: "#808080" }, + grid: { color: "#e0e0e0" }, + ticks: { color: "#808080" }, + min: 36, + max: 56, + }, + }, + }, + }, + ); +} + +function initChart(activeModels) { + updateChart(activeModels); +} + +document$.subscribe(function () { + (function initializeApp() { + if (typeof Chart !== "undefined") { + // Get active models from page config or use default + // e.g. + const pageConfig = document + .getElementById("modelComparisonChart") + .getAttribute("active-models"); + const activeModels = pageConfig ? JSON.parse(pageConfig) : []; + initChart(activeModels); + } else { + setTimeout(initializeApp, 50); // Retry every 50 ms + } + })(); +}); diff --git a/docs/overrides/javascript/extra.js b/docs/overrides/javascript/extra.js index 3233a644119..2de7572e76b 100644 --- a/docs/overrides/javascript/extra.js +++ b/docs/overrides/javascript/extra.js @@ -1,69 +1,145 @@ -// Function that applies light/dark theme based on the user's preference -const applyAutoTheme = () => { - // Determine the user's preferred color scheme - const prefersLight = window.matchMedia("(prefers-color-scheme: light)").matches; - const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches; +// Apply theme colors based on dark/light mode +const applyTheme = (isDark) => { + document.body.setAttribute( + "data-md-color-scheme", + isDark ? "slate" : "default", + ); + document.body.setAttribute( + "data-md-color-primary", + isDark ? "black" : "indigo", + ); +}; - // Apply the appropriate attributes based on the user's preference - if (prefersLight) { - document.body.setAttribute("data-md-color-scheme", "default"); - document.body.setAttribute("data-md-color-primary", "indigo"); - } else if (prefersDark) { - document.body.setAttribute("data-md-color-scheme", "slate"); - document.body.setAttribute("data-md-color-primary", "black"); +// Check and apply appropriate theme based on system/user preference +const checkTheme = () => { + const palette = JSON.parse(localStorage.getItem(".__palette") || "{}"); + if (palette.index === 0) { + // Auto mode is selected + applyTheme(window.matchMedia("(prefers-color-scheme: dark)").matches); } }; -// Function that checks and applies light/dark theme based on the user's preference (if auto theme is enabled) -function checkAutoTheme() { - // Array of supported language codes -> each language has its own palette (stored in local storage) - const supportedLangCodes = ["en", "zh", "ko", "ja", "ru", "de", "fr", "es", "pt", "it", "tr", "vi", "nl"]; - // Get the URL path - const path = window.location.pathname; - // Extract the language code from the URL (assuming it's in the format /xx/...) - const langCode = path.split("/")[1]; - // Check if the extracted language code is in the supported languages - const isValidLangCode = supportedLangCodes.includes(langCode); - // Construct the local storage key based on the language code if valid, otherwise default to the root key - const localStorageKey = isValidLangCode ? `/${langCode}/.__palette` : "/.__palette"; - // Retrieve the palette from local storage using the constructed key - const palette = localStorage.getItem(localStorageKey); - if (palette) { - // Check if the palette's index is 0 (auto theme) - const paletteObj = JSON.parse(palette); - if (paletteObj && paletteObj.index === 0) { - applyAutoTheme(); - } - } -} +// Watch for system theme changes +window + .matchMedia("(prefers-color-scheme: dark)") + .addEventListener("change", checkTheme); + +// Initialize theme handling on page load +document.addEventListener("DOMContentLoaded", () => { + // Watch for theme toggle changes + document + .getElementById("__palette_1") + ?.addEventListener( + "change", + (e) => e.target.checked && setTimeout(checkTheme), + ); + // Initial theme check + checkTheme(); +}); + +// Inkeep -------------------------------------------------------------------------------------------------------------- +document.addEventListener("DOMContentLoaded", () => { + const enableSearchBar = true; + + const inkeepScript = document.createElement("script"); + inkeepScript.src = "https://unpkg.com/@inkeep/uikit-js@0.3.18/dist/embed.js"; + inkeepScript.type = "module"; + inkeepScript.defer = true; + document.head.appendChild(inkeepScript); -// Run function when the script loads -checkAutoTheme(); + if (enableSearchBar) { + const containerDiv = document.createElement("div"); + containerDiv.style.transform = "scale(0.7)"; + containerDiv.style.transformOrigin = "left center"; -// Re-run the function when the user's preference changes (when the user changes their system theme) -window.matchMedia("(prefers-color-scheme: light)").addEventListener("change", checkAutoTheme); -window.matchMedia("(prefers-color-scheme: dark)").addEventListener("change", checkAutoTheme); + const inkeepDiv = document.createElement("div"); + inkeepDiv.id = "inkeepSearchBar"; + containerDiv.appendChild(inkeepDiv); -// Re-run the function when the palette changes (e.g. user switched from dark theme to auto theme) -// ! We can't use window.addEventListener("storage", checkAutoTheme) because it will NOT be triggered on the current tab -// ! So we have to use the following workaround: -// Get the palette input for auto theme -var autoThemeInput = document.getElementById("__palette_1"); -if (autoThemeInput) { - // Add a click event listener to the input - autoThemeInput.addEventListener("click", function () { - // Check if the auto theme is selected - if (autoThemeInput.checked) { - // Re-run the function after a short delay (to ensure that the palette has been updated) - setTimeout(applyAutoTheme); + const headerElement = document.querySelector(".md-header__inner"); + const searchContainer = headerElement.querySelector(".md-header__source"); + + if (headerElement && searchContainer) { + headerElement.insertBefore(containerDiv, searchContainer); } - }); -} + } -// Add iframe navigation -window.onhashchange = function() { - window.parent.postMessage({ - type: 'navigation', - hash: window.location.pathname + window.location.search + window.location.hash - }, '*'); -}; + // configure and initialize the widget + const addInkeepWidget = (componentType, targetElementId) => { + const inkeepWidget = Inkeep().embed({ + componentType, + ...(componentType !== "ChatButton" + ? { targetElement: targetElementId } + : {}), + colorModeSync: { + observedElement: document.documentElement, + isDarkModeCallback: (el) => { + const currentTheme = el.getAttribute("data-color-mode"); + return currentTheme === "dark"; + }, + colorModeAttribute: "data-color-mode-scheme", + }, + properties: { + chatButtonType: "PILL", + fixedPositionXOffset: "1rem", + fixedPositionYOffset: "3rem", + chatButtonBgColor: "#E1FF25", + baseSettings: { + apiKey: "13dfec2e75982bc9bae3199a08e13b86b5fbacd64e9b2f89", + integrationId: "cm1shscmm00y26sj83lgxzvkw", + organizationId: "org_e3869az6hQZ0mXdF", + primaryBrandColor: "#E1FF25", + organizationDisplayName: "Ultralytics", + theme: { + stylesheetUrls: ["/stylesheets/style.css"], + }, + }, + modalSettings: { + // optional settings + }, + searchSettings: { + placeholder: "Search", + }, + aiChatSettings: { + chatSubjectName: "Ultralytics", + botAvatarSrcUrl: + "https://storage.googleapis.com/organization-image-assets/ultralytics-botAvatarSrcUrl-1729379860806.svg", + quickQuestions: [ + "What's new in Ultralytics YOLO11?", + "How can I get started with Ultralytics HUB?", + "How does Ultralytics Enterprise Licensing work?", + ], + getHelpCallToActions: [ + { + name: "Ask on Ultralytics GitHub", + url: "https://github.com/ultralytics/ultralytics", + icon: { + builtIn: "FaGithub", + }, + }, + { + name: "Ask on Ultralytics Discourse", + url: "https://community.ultralytics.com/", + icon: { + builtIn: "FaDiscourse", + }, + }, + { + name: "Ask on Ultralytics Discord", + url: "https://discord.com/invite/ultralytics", + icon: { + builtIn: "FaDiscord", + }, + }, + ], + }, + }, + }); + }; + inkeepScript.addEventListener("load", () => { + const widgetContainer = document.getElementById("inkeepSearchBar"); + + addInkeepWidget("ChatButton"); + widgetContainer && addInkeepWidget("SearchBar", "#inkeepSearchBar"); + }); +}); diff --git a/docs/overrides/javascript/giscus.js b/docs/overrides/javascript/giscus.js new file mode 100644 index 00000000000..b57e4437d02 --- /dev/null +++ b/docs/overrides/javascript/giscus.js @@ -0,0 +1,85 @@ +// Giscus functionality +function loadGiscus() { + const giscusContainer = document.getElementById("giscus-container"); + if (!giscusContainer || giscusContainer.querySelector("script")) { + return; + } + + const script = document.createElement("script"); + script.src = "https://giscus.app/client.js"; + script.setAttribute("data-repo", "ultralytics/ultralytics"); + script.setAttribute("data-repo-id", "R_kgDOH-jzvQ"); + script.setAttribute("data-category", "Docs"); + script.setAttribute("data-category-id", "DIC_kwDOH-jzvc4CWLkL"); + script.setAttribute("data-mapping", "pathname"); + script.setAttribute("data-strict", "1"); + script.setAttribute("data-reactions-enabled", "1"); + script.setAttribute("data-emit-metadata", "0"); + script.setAttribute("data-input-position", "top"); + script.setAttribute("data-theme", "preferred_color_scheme"); + script.setAttribute("data-lang", "en"); + script.setAttribute("data-loading", "lazy"); + script.setAttribute("crossorigin", "anonymous"); + script.setAttribute("async", ""); + + giscusContainer.appendChild(script); + + // Synchronize Giscus theme with palette + var palette = __md_get("__palette"); + if (palette && typeof palette.color === "object") { + var theme = palette.color.scheme === "slate" ? "dark" : "light"; + script.setAttribute("data-theme", theme); + } + + // Register event handlers for theme changes + var ref = document.querySelector("[data-md-component=palette]"); + if (ref) { + ref.addEventListener("change", function () { + var palette = __md_get("__palette"); + if (palette && typeof palette.color === "object") { + var theme = palette.color.scheme === "slate" ? "dark" : "light"; + + // Instruct Giscus to change theme + var frame = document.querySelector(".giscus-frame"); + if (frame) { + frame.contentWindow.postMessage( + { giscus: { setConfig: { theme } } }, + "https://giscus.app", + ); + } + } + }); + } +} + +// Use Intersection Observer to load Giscus when the container is visible +function setupGiscusLoader() { + const giscusContainer = document.getElementById("giscus-container"); + + if (giscusContainer) { + const observer = new IntersectionObserver( + (entries) => { + entries.forEach((entry) => { + if (entry.isIntersecting) { + loadGiscus(); + observer.unobserve(entry.target); + } + }); + }, + { threshold: 0.1 }, + ); // Trigger when 10% of the element is visible + + observer.observe(giscusContainer); + } +} + +// Hook into MkDocs' navigation system +if (typeof document$ !== "undefined") { + document$.subscribe(() => { + // This function is called on every page load/change + setupGiscusLoader(); + }); +} else { + console.warn("MkDocs document$ not found. Falling back to DOMContentLoaded."); + document.addEventListener("DOMContentLoaded", setupGiscusLoader); +} diff --git a/docs/overrides/main.html b/docs/overrides/main.html index 18305746f05..233aec092ed 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -30,7 +30,7 @@
diff --git a/docs/overrides/partials/comments.html b/docs/overrides/partials/comments.html index a99f4f814b8..fdfce5d651b 100644 --- a/docs/overrides/partials/comments.html +++ b/docs/overrides/partials/comments.html @@ -1,51 +1,7 @@ {% if page.meta.comments %}

{{ lang.t("meta.comments") }}

- - + +
- - {% endif %} diff --git a/docs/overrides/partials/source-file.html b/docs/overrides/partials/source-file.html deleted file mode 100644 index 84e2ab1f7da..00000000000 --- a/docs/overrides/partials/source-file.html +++ /dev/null @@ -1,26 +0,0 @@ -{% import "partials/language.html" as lang with context %} - - - -
-
- - - - {% if page.meta.git_revision_date_localized %} - ๐Ÿ“… {{ lang.t("source.file.date.updated") }}: - {{ page.meta.git_revision_date_localized }} - {% if page.meta.git_creation_date_localized %} -
- ๐ŸŽ‚ {{ lang.t("source.file.date.created") }}: - {{ page.meta.git_creation_date_localized }} - {% endif %} - - - {% elif page.meta.revision_date %} - ๐Ÿ“… {{ lang.t("source.file.date.updated") }}: - {{ page.meta.revision_date }} - {% endif %} -
-
diff --git a/docs/overrides/stylesheets/style.css b/docs/overrides/stylesheets/style.css index a9a89d9013e..5c9f3c22df2 100644 --- a/docs/overrides/stylesheets/style.css +++ b/docs/overrides/stylesheets/style.css @@ -76,7 +76,6 @@ div.highlight { .banner-wrapper { justify-content: space-between; gap: 16px; - padding: 16px; } @@ -121,7 +120,6 @@ div.highlight { .banner-wrapper > .banner-button-wrapper, .banner-wrapper > .banner-button-wrapper > .banner-button-wrapper { padding: 2px; - background-color: rgba(222, 255, 56, 0.2); } @@ -131,13 +129,10 @@ div.highlight { .banner-wrapper > .banner-button-wrapper > .banner-button-wrapper > button { cursor: pointer; - min-width: 132px; padding: 10px; - font-weight: 500; color: #111f68; - background-color: rgb(222, 255, 56); } @@ -156,13 +151,11 @@ div.highlight { .banner-wrapper { gap: 32px; - padding: 12px; } .banner-wrapper > .banner-content-wrapper { gap: 24px; - margin: 0 auto; } } @@ -217,6 +210,13 @@ div.highlight { height: 50px; border-radius: 50%; margin-right: 3px; + background-color: #f0f0f0; /* Placeholder color */ + opacity: 0; /* Start fully transparent */ + transition: opacity 0.3s ease-in-out; +} + +.author-link .hover-item[src] { + opacity: 1; /* Fade in when src is set (image loaded) */ } .hover-item:hover { @@ -264,3 +264,16 @@ div.highlight { } } /* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */ + +/* Inkeep ----------------------------------------------------------------------------------------------------------- */ +.ikp-floating-button { + color: #111f68; +} +#inkeepSearchBar { + transition: all 0.2s ease-in-out; +} +#inkeepSearchBar:hover { + transform: scale(1.1); + filter: brightness(1.2); +} +/* Inkeep ----------------------------------------------------------------------------------------------------------- */ diff --git a/examples/README.md b/examples/README.md index 931bdc634c7..ee06d337b62 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,6 +1,6 @@ -## Ultralytics YOLOv8 Example Applications +## Ultralytics Examples -This repository features a collection of real-world applications and walkthroughs, provided as either Python files or notebooks. Explore the examples below to see how YOLOv8 can be integrated into various applications. +This directory features a collection of real-world applications and walkthroughs, provided as either Python files or notebooks. Explore the examples below to see how YOLO can be integrated into various applications. ### Ultralytics YOLO Example Applications @@ -8,18 +8,21 @@ This repository features a collection of real-world applications and walkthrough | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------ | ----------------------------------------------------------------------------------------- | | [YOLO ONNX Detection Inference with C++](./YOLOv8-CPP-Inference) | C++/ONNX | [Justas Bartnykas](https://github.com/JustasBart) | | [YOLO OpenCV ONNX Detection Python](./YOLOv8-OpenCV-ONNX-Python) | OpenCV/Python/ONNX | [Farid Inawan](https://github.com/frdteknikelektro) | -| [YOLOv8 .NET ONNX ImageSharp](https://github.com/dme-compunet/YOLOv8) | C#/ONNX/ImageSharp | [Compunet](https://github.com/dme-compunet) | +| [YOLO C# ONNX-Runtime](https://github.com/dme-compunet/YoloSharp) | .NET/ONNX-Runtime | [Compunet](https://github.com/dme-compunet) | | [YOLO .Net ONNX Detection C#](https://www.nuget.org/packages/Yolov8.Net) | C# .Net | [Samuel Stainback](https://github.com/sstainba) | | [YOLOv8 on NVIDIA Jetson(TensorRT and DeepStream)](https://wiki.seeedstudio.com/YOLOv8-DeepStream-TRT-Jetson/) | Python | [Lakshantha](https://github.com/lakshanthad) | | [YOLOv8 ONNXRuntime Python](./YOLOv8-ONNXRuntime) | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel) | +| [RTDETR ONNXRuntime Python](./RTDETR-ONNXRuntime-Python) | Python/ONNXRuntime | [Semih Demirel](https://github.com/semihhdemirel) | | [YOLOv8 ONNXRuntime CPP](./YOLOv8-ONNXRuntime-CPP) | C++/ONNXRuntime | [DennisJcy](https://github.com/DennisJcy), [Onuralp Sezer](https://github.com/onuralpszr) | | [RTDETR ONNXRuntime C#](https://github.com/Kayzwer/yolo-cs/blob/master/RTDETR.cs) | C#/ONNX | [Kayzwer](https://github.com/Kayzwer) | | [YOLOv8 SAHI Video Inference](https://github.com/RizwanMunawar/ultralytics/blob/main/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py) | Python | [Muhammad Rizwan Munawar](https://github.com/RizwanMunawar) | | [YOLOv8 Region Counter](https://github.com/RizwanMunawar/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py) | Python | [Muhammad Rizwan Munawar](https://github.com/RizwanMunawar) | | [YOLOv8 Segmentation ONNXRuntime Python](./YOLOv8-Segmentation-ONNXRuntime-Python) | Python/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) | | [YOLOv8 LibTorch CPP](./YOLOv8-LibTorch-CPP-Inference) | C++/LibTorch | [Myyura](https://github.com/Myyura) | -| [YOLOv8 OpenCV INT8 TFLite Python](./YOLOv8-OpenCV-int8-tflite-Python) | Python | [Wamiq Raza](https://github.com/wamiqraza) | +| [YOLOv8 OpenCV INT8 TFLite Python](./YOLOv8-TFLite-Python) | Python | [Wamiq Raza](https://github.com/wamiqraza) | | [YOLOv8 All Tasks ONNXRuntime Rust](./YOLOv8-ONNXRuntime-Rust) | Rust/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) | +| [YOLOv8 OpenVINO CPP](./YOLOv8-OpenVINO-CPP-Inference) | C++/OpenVINO | [Erlangga Yudi Pradana](https://github.com/rlggyp) | +| [YOLOv5-YOLO11 ONNXRuntime Rust](./YOLO-Series-ONNXRuntime-Rust) | Rust/ONNXRuntime | [jamjamjon](https://github.com/jamjamjon) | ### How to Contribute diff --git a/examples/RTDETR-ONNXRuntime-Python/README.md b/examples/RTDETR-ONNXRuntime-Python/README.md new file mode 100644 index 00000000000..1861da8295d --- /dev/null +++ b/examples/RTDETR-ONNXRuntime-Python/README.md @@ -0,0 +1,43 @@ +# RTDETR - ONNX Runtime + +This project implements RTDETR using ONNX Runtime. + +## Installation + +To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process. + +### Installing Required Dependencies + +You can install the required dependencies by running the following command: + +```bash +pip install -r requirements.txt +``` + +### Installing `onnxruntime-gpu` + +If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command: + +```bash +pip install onnxruntime-gpu +``` + +Note: Make sure you have the appropriate GPU drivers installed on your system. + +### Installing `onnxruntime` (CPU version) + +If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command: + +```bash +pip install onnxruntime +``` + +### Usage + +After successfully installing the required packages, you can run the RTDETR implementation using the following command: + +```bash +python main.py --model rtdetr-l.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5 +``` + +Make sure to replace rtdetr-l.onnx with the path to your RTDETR ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed. diff --git a/examples/RTDETR-ONNXRuntime-Python/main.py b/examples/RTDETR-ONNXRuntime-Python/main.py new file mode 100644 index 00000000000..d794a7d648b --- /dev/null +++ b/examples/RTDETR-ONNXRuntime-Python/main.py @@ -0,0 +1,222 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +import argparse + +import cv2 +import numpy as np +import onnxruntime as ort +import torch + +from ultralytics.utils import ASSETS, yaml_load +from ultralytics.utils.checks import check_requirements, check_yaml + + +class RTDETR: + """RTDETR object detection model class for handling inference and visualization.""" + + def __init__(self, model_path, img_path, conf_thres=0.5, iou_thres=0.5): + """ + Initializes the RTDETR object with the specified parameters. + + Args: + model_path: Path to the ONNX model file. + img_path: Path to the input image. + conf_thres: Confidence threshold for object detection. + iou_thres: IoU threshold for non-maximum suppression + """ + self.model_path = model_path + self.img_path = img_path + self.conf_thres = conf_thres + self.iou_thres = iou_thres + + # Set up the ONNX runtime session with CUDA and CPU execution providers + self.session = ort.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + self.model_input = self.session.get_inputs() + self.input_width = self.model_input[0].shape[2] + self.input_height = self.model_input[0].shape[3] + + # Load class names from the COCO dataset YAML file + self.classes = yaml_load(check_yaml("coco8.yaml"))["names"] + + # Generate a color palette for drawing bounding boxes + self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) + + def draw_detections(self, box, score, class_id): + """ + Draws bounding boxes and labels on the input image based on the detected objects. + + Args: + box: Detected bounding box. + score: Corresponding detection score. + class_id: Class ID for the detected object. + + Returns: + None + """ + # Extract the coordinates of the bounding box + x1, y1, x2, y2 = box + + # Retrieve the color for the class ID + color = self.color_palette[class_id] + + # Draw the bounding box on the image + cv2.rectangle(self.img, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) + + # Create the label text with class name and score + label = f"{self.classes[class_id]}: {score:.2f}" + + # Calculate the dimensions of the label text + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + + # Calculate the position of the label text + label_x = x1 + label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 + + # Draw a filled rectangle as the background for the label text + cv2.rectangle( + self.img, + (int(label_x), int(label_y - label_height)), + (int(label_x + label_width), int(label_y + label_height)), + color, + cv2.FILLED, + ) + + # Draw the label text on the image + cv2.putText( + self.img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA + ) + + def preprocess(self): + """ + Preprocesses the input image before performing inference. + + Returns: + image_data: Preprocessed image data ready for inference. + """ + # Read the input image using OpenCV + self.img = cv2.imread(self.img_path) + + # Get the height and width of the input image + self.img_height, self.img_width = self.img.shape[:2] + + # Convert the image color space from BGR to RGB + img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) + + # Resize the image to match the input shape + img = cv2.resize(img, (self.input_width, self.input_height)) + + # Normalize the image data by dividing it by 255.0 + image_data = np.array(img) / 255.0 + + # Transpose the image to have the channel dimension as the first dimension + image_data = np.transpose(image_data, (2, 0, 1)) # Channel first + + # Expand the dimensions of the image data to match the expected input shape + image_data = np.expand_dims(image_data, axis=0).astype(np.float32) + + # Return the preprocessed image data + return image_data + + def bbox_cxcywh_to_xyxy(self, boxes): + """ + Converts bounding boxes from (center x, center y, width, height) format to (x_min, y_min, x_max, y_max) format. + + Args: + boxes (numpy.ndarray): An array of shape (N, 4) where each row represents + a bounding box in (cx, cy, w, h) format. + + Returns: + numpy.ndarray: An array of shape (N, 4) where each row represents + a bounding box in (x_min, y_min, x_max, y_max) format. + """ + # Calculate half width and half height of the bounding boxes + half_width = boxes[:, 2] / 2 + half_height = boxes[:, 3] / 2 + + # Calculate the coordinates of the bounding boxes + x_min = boxes[:, 0] - half_width + y_min = boxes[:, 1] - half_height + x_max = boxes[:, 0] + half_width + y_max = boxes[:, 1] + half_height + + # Return the bounding boxes in (x_min, y_min, x_max, y_max) format + return np.column_stack((x_min, y_min, x_max, y_max)) + + def postprocess(self, model_output): + """ + Postprocesses the model output to extract detections and draw them on the input image. + + Args: + model_output: Output of the model inference. + + Returns: + np.array: Annotated image with detections. + """ + # Squeeze the model output to remove unnecessary dimensions + outputs = np.squeeze(model_output[0]) + + # Extract bounding boxes and scores from the model output + boxes = outputs[:, :4] + scores = outputs[:, 4:] + + # Get the class labels and scores for each detection + labels = np.argmax(scores, axis=1) + scores = np.max(scores, axis=1) + + # Apply confidence threshold to filter out low-confidence detections + mask = scores > self.conf_thres + boxes, scores, labels = boxes[mask], scores[mask], labels[mask] + + # Convert bounding boxes to (x_min, y_min, x_max, y_max) format + boxes = self.bbox_cxcywh_to_xyxy(boxes) + + # Scale bounding boxes to match the original image dimensions + boxes[:, 0::2] *= self.img_width + boxes[:, 1::2] *= self.img_height + + # Draw detections on the image + for box, score, label in zip(boxes, scores, labels): + self.draw_detections(box, score, label) + + # Return the annotated image + return self.img + + def main(self): + """ + Executes the detection on the input image using the ONNX model. + + Returns: + np.array: Output image with annotations. + """ + # Preprocess the image for model input + image_data = self.preprocess() + + # Run the model inference + model_output = self.session.run(None, {self.model_input[0].name: image_data}) + + # Process and return the model output + return self.postprocess(model_output) + + +if __name__ == "__main__": + # Set up argument parser for command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, default="rtdetr-l.onnx", help="Path to the ONNX model file.") + parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to the input image.") + parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold for object detection.") + parser.add_argument("--iou-thres", type=float, default=0.5, help="IoU threshold for non-maximum suppression.") + args = parser.parse_args() + + # Check for dependencies and set up ONNX runtime + check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime") + + # Create the detector instance with specified parameters + detection = RTDETR(args.model, args.img, args.conf_thres, args.iou_thres) + + # Perform detection and get the output image + output_image = detection.main() + + # Display the annotated output image + cv2.namedWindow("Output", cv2.WINDOW_NORMAL) + cv2.imshow("Output", output_image) + cv2.waitKey(0) diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml b/examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml new file mode 100644 index 00000000000..048ece887df --- /dev/null +++ b/examples/YOLO-Series-ONNXRuntime-Rust/Cargo.toml @@ -0,0 +1,14 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +[package] +name = "YOLO-ONNXRuntime-Rust" +version = "0.1.0" +edition = "2021" +authors = ["Jamjamjon "] + +[dependencies] +anyhow = "1.0.92" +clap = "4.5.20" +tracing = "0.1.40" +tracing-subscriber = "0.3.18" +usls = { version = "0.0.19", features = ["auto"] } diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/README.md b/examples/YOLO-Series-ONNXRuntime-Rust/README.md new file mode 100644 index 00000000000..0b6fabe20d9 --- /dev/null +++ b/examples/YOLO-Series-ONNXRuntime-Rust/README.md @@ -0,0 +1,94 @@ +# YOLO-Series ONNXRuntime Rust Demo for Core YOLO Tasks + +This repository provides a Rust demo for key YOLO-Series tasks such as `Classification`, `Segmentation`, `Detection`, `Pose Detection`, and `OBB` using ONNXRuntime. It supports various YOLO models (v5 - 11) across multiple vision tasks. + +## Introduction + +- This example leverages the latest versions of both ONNXRuntime and YOLO models. +- We utilize the [usls](https://github.com/jamjamjon/usls/tree/main) crate to streamline YOLO model inference, providing efficient data loading, visualization, and optimized inference performance. + +## Features + +- **Extensive Model Compatibility**: Supports `YOLOv5`, `YOLOv6`, `YOLOv7`, `YOLOv8`, `YOLOv9`, `YOLOv10`, `YOLO11`, `YOLO-world`, `RTDETR`, and others, covering a wide range of YOLO versions. +- **Versatile Task Coverage**: Includes `Classification`, `Segmentation`, `Detection`, `Pose`, and `OBB`. +- **Precision Flexibility**: Works with `FP16` and `FP32` ONNX models. +- **Execution Providers**: Accelerated support for `CPU`, `CUDA`, `CoreML`, and `TensorRT`. +- **Dynamic Input Shapes**: Dynamically adjusts to variable `batch`, `width`, and `height` dimensions for flexible model input. +- **Flexible Data Loading**: The `DataLoader` handles images, folders, videos, and video streams. +- **Real-Time Display and Video Export**: `Viewer` provides real-time frame visualization and video export functions, similar to OpenCVโ€™s `imshow()` and `imwrite()`. +- **Enhanced Annotation and Visualization**: The `Annotator` facilitates comprehensive result rendering, with support for bounding boxes (HBB), oriented bounding boxes (OBB), polygons, masks, keypoints, and text labels. + +## Setup Instructions + +### 1. ONNXRuntime Linking + +
+You have two options to link the ONNXRuntime library: + +- **Option 1: Manual Linking** + + - For detailed setup, consult the [ONNX Runtime linking documentation](https://ort.pyke.io/setup/linking). + - **Linux or macOS**: + 1. Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases). + 2. Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable: + ```shell + export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0 + ``` + +- **Option 2: Automatic Download** + - Use the `--features auto` flag to handle downloading automatically: + ```shell + cargo run -r --example yolo --features auto + ``` + +
+ +### 2. \[Optional\] Install CUDA, CuDNN, and TensorRT + +- The CUDA execution provider requires CUDA version `12.x`. +- The TensorRT execution provider requires both CUDA `12.x` and TensorRT `10.x`. + +### 3. \[Optional\] Install ffmpeg + +To view video frames and save video inferences, install `rust-ffmpeg`. For instructions, see: +[https://github.com/zmwangx/rust-ffmpeg/wiki/Notes-on-building#dependencies](https://github.com/zmwangx/rust-ffmpeg/wiki/Notes-on-building#dependencies) + +## Get Started + +```Shell +# customized +cargo run -r -- --task detect --ver v8 --nc 6 --model xxx.onnx # YOLOv8 + +# Classify +cargo run -r -- --task classify --ver v5 --scale s --width 224 --height 224 --nc 1000 # YOLOv5 +cargo run -r -- --task classify --ver v8 --scale n --width 224 --height 224 --nc 1000 # YOLOv8 +cargo run -r -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLO11 + +# Detect +cargo run -r -- --task detect --ver v5 --scale n # YOLOv5 +cargo run -r -- --task detect --ver v6 --scale n # YOLOv6 +cargo run -r -- --task detect --ver v7 --scale t # YOLOv7 +cargo run -r -- --task detect --ver v8 --scale n # YOLOv8 +cargo run -r -- --task detect --ver v9 --scale t # YOLOv9 +cargo run -r -- --task detect --ver v10 --scale n # YOLOv10 +cargo run -r -- --task detect --ver v11 --scale n # YOLO11 +cargo run -r -- --task detect --ver rtdetr --scale l # RTDETR + +# Pose +cargo run -r -- --task pose --ver v8 --scale n # YOLOv8-Pose +cargo run -r -- --task pose --ver v11 --scale n # YOLO11-Pose + +# Segment +cargo run -r -- --task segment --ver v5 --scale n # YOLOv5-Segment +cargo run -r -- --task segment --ver v8 --scale n # YOLOv8-Segment +cargo run -r -- --task segment --ver v11 --scale n # YOLOv8-Segment +cargo run -r -- --task segment --ver v8 --model yolo/FastSAM-s-dyn-f16.onnx # FastSAM + +# OBB +cargo run -r -- --ver v8 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv8-Obb +cargo run -r -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLO11-Obb +``` + +**`cargo run -- --help` for more options** + +For more details, please refer to [usls-yolo](https://github.com/jamjamjon/usls/tree/main/examples/yolo). diff --git a/examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs b/examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs new file mode 100644 index 00000000000..3c71a253108 --- /dev/null +++ b/examples/YOLO-Series-ONNXRuntime-Rust/src/main.rs @@ -0,0 +1,236 @@ +use anyhow::Result; +use clap::Parser; + +use usls::{ + models::YOLO, Annotator, DataLoader, Device, Options, Viewer, Vision, YOLOScale, YOLOTask, + YOLOVersion, COCO_SKELETONS_16, +}; + +#[derive(Parser, Clone)] +#[command(author, version, about, long_about = None)] +pub struct Args { + /// Path to the ONNX model + #[arg(long)] + pub model: Option, + + /// Input source path + #[arg(long, default_value_t = String::from("../../ultralytics/assets/bus.jpg"))] + pub source: String, + + /// YOLO Task + #[arg(long, value_enum, default_value_t = YOLOTask::Detect)] + pub task: YOLOTask, + + /// YOLO Version + #[arg(long, value_enum, default_value_t = YOLOVersion::V8)] + pub ver: YOLOVersion, + + /// YOLO Scale + #[arg(long, value_enum, default_value_t = YOLOScale::N)] + pub scale: YOLOScale, + + /// Batch size + #[arg(long, default_value_t = 1)] + pub batch_size: usize, + + /// Minimum input width + #[arg(long, default_value_t = 224)] + pub width_min: isize, + + /// Input width + #[arg(long, default_value_t = 640)] + pub width: isize, + + /// Maximum input width + #[arg(long, default_value_t = 1024)] + pub width_max: isize, + + /// Minimum input height + #[arg(long, default_value_t = 224)] + pub height_min: isize, + + /// Input height + #[arg(long, default_value_t = 640)] + pub height: isize, + + /// Maximum input height + #[arg(long, default_value_t = 1024)] + pub height_max: isize, + + /// Number of classes + #[arg(long, default_value_t = 80)] + pub nc: usize, + + /// Class confidence + #[arg(long)] + pub confs: Vec, + + /// Enable TensorRT support + #[arg(long)] + pub trt: bool, + + /// Enable CUDA support + #[arg(long)] + pub cuda: bool, + + /// Enable CoreML support + #[arg(long)] + pub coreml: bool, + + /// Use TensorRT half precision + #[arg(long)] + pub half: bool, + + /// Device ID to use + #[arg(long, default_value_t = 0)] + pub device_id: usize, + + /// Enable performance profiling + #[arg(long)] + pub profile: bool, + + /// Disable contour drawing, for saving time + #[arg(long)] + pub no_contours: bool, + + /// Show result + #[arg(long)] + pub view: bool, + + /// Do not save output + #[arg(long)] + pub nosave: bool, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + // logger + if args.profile { + tracing_subscriber::fmt() + .with_max_level(tracing::Level::INFO) + .init(); + } + + // model path + let path = match &args.model { + None => format!( + "yolo/{}-{}-{}.onnx", + args.ver.name(), + args.scale.name(), + args.task.name() + ), + Some(x) => x.to_string(), + }; + + // saveout + let saveout = match &args.model { + None => format!( + "{}-{}-{}", + args.ver.name(), + args.scale.name(), + args.task.name() + ), + Some(x) => { + let p = std::path::PathBuf::from(&x); + p.file_stem().unwrap().to_str().unwrap().to_string() + } + }; + + // device + let device = if args.cuda { + Device::Cuda(args.device_id) + } else if args.trt { + Device::Trt(args.device_id) + } else if args.coreml { + Device::CoreML(args.device_id) + } else { + Device::Cpu(args.device_id) + }; + + // build options + let options = Options::new() + .with_model(&path)? + .with_yolo_version(args.ver) + .with_yolo_task(args.task) + .with_device(device) + .with_trt_fp16(args.half) + .with_ixx(0, 0, (1, args.batch_size as _, 4).into()) + .with_ixx(0, 2, (args.height_min, args.height, args.height_max).into()) + .with_ixx(0, 3, (args.width_min, args.width, args.width_max).into()) + .with_confs(if args.confs.is_empty() { + &[0.2, 0.15] + } else { + &args.confs + }) + .with_nc(args.nc) + .with_find_contours(!args.no_contours) // find contours or not + // .with_names(&COCO_CLASS_NAMES_80) // detection class names + // .with_names2(&COCO_KEYPOINTS_17) // keypoints class names + // .exclude_classes(&[0]) + // .retain_classes(&[0, 5]) + .with_profile(args.profile); + + // build model + let mut model = YOLO::new(options)?; + + // build dataloader + let dl = DataLoader::new(&args.source)? + .with_batch(model.batch() as _) + .build()?; + + // build annotator + let annotator = Annotator::default() + .with_skeletons(&COCO_SKELETONS_16) + .without_masks(true) // no masks plotting when doing segment task + .with_bboxes_thickness(3) + .with_keypoints_name(false) // enable keypoints names + .with_saveout_subs(&["YOLO"]) + .with_saveout(&saveout); + + // build viewer + let mut viewer = if args.view { + Some(Viewer::new().with_delay(5).with_scale(1.).resizable(true)) + } else { + None + }; + + // run & annotate + for (xs, _paths) in dl { + let ys = model.forward(&xs, args.profile)?; + let images_plotted = annotator.plot(&xs, &ys, !args.nosave)?; + + // show image + match &mut viewer { + Some(viewer) => viewer.imshow(&images_plotted)?, + None => continue, + } + + // check out window and key event + match &mut viewer { + Some(viewer) => { + if !viewer.is_open() || viewer.is_key_pressed(usls::Key::Escape) { + break; + } + } + None => continue, + } + + // write video + if !args.nosave { + match &mut viewer { + Some(viewer) => viewer.write_batch(&images_plotted)?, + None => continue, + } + } + } + + // finish video write + if !args.nosave { + if let Some(viewer) = &mut viewer { + viewer.finish_write()?; + } + } + + Ok(()) +} diff --git a/examples/YOLOv8-Action-Recognition/action_recognition.py b/examples/YOLOv8-Action-Recognition/action_recognition.py index aad74375a57..38b6a252693 100644 --- a/examples/YOLOv8-Action-Recognition/action_recognition.py +++ b/examples/YOLOv8-Action-Recognition/action_recognition.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import argparse import time @@ -263,7 +263,7 @@ def crop_and_pad(frame, box, margin_percent): def run( - weights: str = "yolov8n.pt", + weights: str = "yolo11n.pt", device: str = "", source: str = "https://www.youtube.com/watch?v=dQw4w9WgXcQ", output_path: Optional[str] = None, @@ -279,7 +279,7 @@ def run( Run action recognition on a video source using YOLO for object detection and a video classifier. Args: - weights (str): Path to the YOLO model weights. Defaults to "yolov8n.pt". + weights (str): Path to the YOLO model weights. Defaults to "yolo11n.pt". device (str): Device to run the model on. Use 'cuda' for NVIDIA GPU, 'mps' for Apple Silicon, or 'cpu'. Defaults to auto-detection. source (str): Path to mp4 video file or YouTube URL. Defaults to a sample YouTube video. output_path (Optional[str], optional): Path to save the output video. Defaults to None. @@ -421,7 +421,7 @@ def run( def parse_opt(): """Parse command line arguments.""" parser = argparse.ArgumentParser() - parser.add_argument("--weights", type=str, default="yolov8n.pt", help="ultralytics detector model path") + parser.add_argument("--weights", type=str, default="yolo11n.pt", help="ultralytics detector model path") parser.add_argument("--device", default="", help='cuda device, i.e. 0 or 0,1,2,3 or cpu/mps, "" for auto-detection') parser.add_argument( "--source", diff --git a/examples/YOLOv8-CPP-Inference/README.md b/examples/YOLOv8-CPP-Inference/README.md index 5bb2586dd63..243d448e366 100644 --- a/examples/YOLOv8-CPP-Inference/README.md +++ b/examples/YOLOv8-CPP-Inference/README.md @@ -1,6 +1,6 @@ # YOLOv8/YOLOv5 Inference C++ -This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV's DNN API. +This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV DNN API. ## Usage @@ -27,13 +27,13 @@ make To export YOLOv8 models: -```commandline +```bash yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12 ``` To export YOLOv5 models: -```commandline +```bash python3 export.py --weights yolov5s.pt --img 480 640 --include onnx --opset 12 ``` @@ -45,6 +45,6 @@ yolov5s.onnx: ![image](https://user-images.githubusercontent.com/40023722/217357005-07464492-d1da-42e3-98a7-fc753f87d5e6.png) -This repository utilizes OpenCV's DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case. +This repository utilizes OpenCV DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case. The **main** branch version uses Qt as a GUI wrapper. The primary focus here is the **Inference** class file, which demonstrates how to transpose YOLOv8 models to work as YOLOv5 models. diff --git a/examples/YOLOv8-LibTorch-CPP-Inference/README.md b/examples/YOLOv8-LibTorch-CPP-Inference/README.md index 930c3cd2225..1380071ee4a 100644 --- a/examples/YOLOv8-LibTorch-CPP-Inference/README.md +++ b/examples/YOLOv8-LibTorch-CPP-Inference/README.md @@ -30,6 +30,6 @@ make To export YOLOv8 models: -```commandline +```bash yolo export model=yolov8s.pt imgsz=640 format=torchscript ``` diff --git a/examples/YOLOv8-LibTorch-CPP-Inference/main.cc b/examples/YOLOv8-LibTorch-CPP-Inference/main.cc index b68b7f7e4bf..0937b56828e 100644 --- a/examples/YOLOv8-LibTorch-CPP-Inference/main.cc +++ b/examples/YOLOv8-LibTorch-CPP-Inference/main.cc @@ -226,6 +226,7 @@ int main() { cv::Mat image = cv::imread("/path/to/bus.jpg"); cv::Mat input_image; letterbox(image, input_image, {640, 640}); + cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB); torch::Tensor image_tensor = torch::from_blob(input_image.data, {input_image.rows, input_image.cols, 3}, torch::kByte).to(device); image_tensor = image_tensor.toType(torch::kFloat32).div(255); diff --git a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp index a65391f5d7d..168df490c27 100644 --- a/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp +++ b/examples/YOLOv8-ONNXRuntime-CPP/inference.cpp @@ -107,11 +107,11 @@ char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) { iouThreshold = iParams.iouThreshold; imgSize = iParams.imgSize; modelType = iParams.modelType; + cudaEnable = iParams.cudaEnable; env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo"); Ort::SessionOptions sessionOption; if (iParams.cudaEnable) { - cudaEnable = iParams.cudaEnable; OrtCUDAProviderOptions cudaOption; cudaOption.device_id = 0; sessionOption.AppendExecutionProvider_CUDA(cudaOption); diff --git a/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml b/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml index 8ac747e7e34..8eb421a86a1 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml +++ b/examples/YOLOv8-ONNXRuntime-Rust/Cargo.toml @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license [package] name = "yolov8-rs" @@ -9,11 +9,11 @@ edition = "2021" [dependencies] clap = { version = "4.2.4", features = ["derive"] } -image = { version = "0.24.7", default-features = false, features = ["jpeg", "png", "webp-encoder"] } -imageproc = { version = "0.23.0", default-features = false } -ndarray = { version = "0.15.6" } -ort = { version = "1.16.3", default-features = false, features = ["load-dynamic", "copy-dylibs", "half"] } -rusttype = { version = "0.9", default-features = false } +image = { version = "0.25.2"} +imageproc = { version = "0.25.0"} +ndarray = { version = "0.16" } +ort = { version = "2.0.0-rc.5", features = ["cuda", "tensorrt", "load-dynamic", "copy-dylibs", "half"]} +rusttype = { version = "0.9.3" } anyhow = { version = "1.0.75" } regex = { version = "1.5.4" } rand = { version = "0.8.5" } @@ -21,3 +21,4 @@ chrono = { version = "0.4.30" } half = { version = "2.3.1" } dirs = { version = "5.0.1" } ureq = { version = "2.9.1" } +ab_glyph = "0.2.29" diff --git a/examples/YOLOv8-ONNXRuntime-Rust/README.md b/examples/YOLOv8-ONNXRuntime-Rust/README.md index 48a3017ce81..ec09edbf655 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/README.md +++ b/examples/YOLOv8-ONNXRuntime-Rust/README.md @@ -5,9 +5,9 @@ This repository provides a Rust demo for performing YOLOv8 tasks like `Classific ## Recently Updated - Add YOLOv8-OBB demo -- Update ONNXRuntime to 1.17.x +- Update ONNXRuntime to 1.19.x -Newly updated YOLOv8 example code is located in this repository (https://github.com/jamjamjon/usls/tree/main/examples/yolo) +Newly updated YOLOv8 example code is located in [this repository](https://github.com/jamjamjon/usls/tree/main/examples/yolo) ## Features @@ -22,25 +22,16 @@ Newly updated YOLOv8 example code is located in this repository (https://github. Please follow the Rust official installation. (https://www.rust-lang.org/tools/install) -### 2. Install ONNXRuntime +### 2. ONNXRuntime Linking -This repository use `ort` crate, which is ONNXRuntime wrapper for Rust. (https://docs.rs/ort/latest/ort/) +- #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking). -You can follow the instruction with `ort` doc or simply do this: - -- step1: Download ONNXRuntime(https://github.com/microsoft/onnxruntime/releases) -- setp2: Set environment variable `PATH` for linking. - -On ubuntu, You can do like this: - -```bash -vim ~/.bashrc - -# Add the path of ONNXRUntime lib -export LD_LIBRARY_PATH=/home/qweasd/Documents/onnxruntime-linux-x64-gpu-1.16.3/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - -source ~/.bashrc -``` +- #### For Linux or macOS Users: + - Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases). + - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable: + ```shell + export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0 + ``` ### 3. \[Optional\] Install CUDA & CuDNN & TensorRT @@ -96,13 +87,13 @@ cargo run --release -- --cuda --device_id 0 --model --source Set `--batch` to do multi-batch-size inference. -If you're using `--trt`, you can also set `--batch-min` and `--batch-max` to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should exported with dynamic shapes) +If you're using `--trt`, you can also set `--batch-min` and `--batch-max` to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should be exported with dynamic shapes.) ```bash cargo run --release -- --cuda --batch 2 --model --source ``` -Set `--height` and `--width` to do dynamic image size inference. (Note that the ONNX model should exported with dynamic shapes) +Set `--height` and `--width` to do dynamic image size inference. (Note that the ONNX model should be exported with dynamic shapes.) ```bash cargo run --release -- --cuda --width 480 --height 640 --model --source diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs index 2ba0dd49ec1..b5bc05a585a 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/cli.rs @@ -15,7 +15,7 @@ pub struct Args { /// device id #[arg(long, default_value_t = 0)] - pub device_id: u32, + pub device_id: i32, /// using TensorRT EP #[arg(long)] diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs index 1af7f7c5e12..0084535ee57 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/lib.rs @@ -117,3 +117,44 @@ pub fn check_font(font: &str) -> rusttype::Font<'static> { let buffer = std::fs::read(font_path).unwrap(); rusttype::Font::try_from_vec(buffer).unwrap() } + +use ab_glyph::FontArc; +pub fn load_font() -> FontArc { + use std::path::Path; + let font_path = Path::new("./font/Arial.ttf"); + match font_path.try_exists() { + Ok(true) => { + let buffer = std::fs::read(font_path).unwrap(); + FontArc::try_from_vec(buffer).unwrap() + } + Ok(false) => { + std::fs::create_dir_all("./font").unwrap(); + println!("Downloading font..."); + let source_url = "https://ultralytics.com/assets/Arial.ttf"; + let resp = ureq::get(source_url) + .timeout(std::time::Duration::from_secs(500)) + .call() + .unwrap_or_else(|err| panic!("> Failed to download font: {source_url}: {err:?}")); + + // read to buffer + let mut buffer = vec![]; + let total_size = resp + .header("Content-Length") + .and_then(|s| s.parse::().ok()) + .unwrap(); + let _reader = resp + .into_reader() + .take(total_size) + .read_to_end(&mut buffer) + .unwrap(); + // save + let mut fd = std::fs::File::create(font_path).unwrap(); + fd.write_all(&buffer).unwrap(); + println!("Font saved at: {:?}", font_path.display()); + FontArc::try_from_vec(buffer).unwrap() + } + Err(e) => { + panic!("Failed to load font {}", e); + } + } +} diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs index 8dd1567990c..fd3845ced08 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/main.rs @@ -6,7 +6,7 @@ fn main() -> Result<(), Box> { let args = Args::parse(); // 1. load image - let x = image::io::Reader::open(&args.source)? + let x = image::ImageReader::open(&args.source)? .with_guessed_format()? .decode()?; diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs index 1c0e5e494d8..95b2bdfffaa 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/model.rs @@ -1,5 +1,6 @@ #![allow(clippy::type_complexity)] +use ab_glyph::FontArc; use anyhow::Result; use image::{DynamicImage, GenericImageView, ImageBuffer}; use ndarray::{s, Array, Axis, IxDyn}; @@ -7,7 +8,7 @@ use rand::{thread_rng, Rng}; use std::path::PathBuf; use crate::{ - check_font, gen_time_string, non_max_suppression, Args, Batch, Bbox, Embedding, OrtBackend, + gen_time_string, load_font, non_max_suppression, Args, Batch, Bbox, Embedding, OrtBackend, OrtConfig, OrtEP, Point2, YOLOResult, YOLOTask, SKELETON, }; @@ -36,9 +37,9 @@ impl YOLOv8 { let ep = if config.trt { OrtEP::Trt(config.device_id) } else if config.cuda { - OrtEP::Cuda(config.device_id) + OrtEP::CUDA(config.device_id) } else { - OrtEP::Cpu + OrtEP::CPU }; // batch @@ -330,12 +331,19 @@ impl YOLOv8 { // coefs * proto -> mask let coefs = Array::from_shape_vec((1, nm), coefs)?; // (n, nm) - let proto = proto.to_owned().into_shape((nm, nh * nw))?; // (nm, nh*nw) - let mask = coefs.dot(&proto).into_shape((nh, nw, 1))?; // (nh, nw, n) + + let proto = proto.to_owned(); + let proto = proto.to_shape((nm, nh * nw))?; // (nm, nh*nw) + let mask = coefs.dot(&proto); // (nh, nw, n) + let mask = mask.to_shape((nh, nw, 1))?; // build image from ndarray let mask_im: ImageBuffer, Vec> = - match ImageBuffer::from_raw(nw as u32, nh as u32, mask.into_raw_vec()) { + match ImageBuffer::from_raw( + nw as u32, + nh as u32, + mask.to_owned().into_raw_vec_and_offset().0, + ) { Some(image) => image, None => panic!("can not create image from ndarray"), }; @@ -410,7 +418,7 @@ impl YOLOv8 { skeletons: Option<&[(usize, usize)]>, ) { // check font then load - let font = check_font("Arial.ttf"); + let font: FontArc = load_font(); for (_idb, (img0, y)) in xs0.iter().zip(ys.iter()).enumerate() { let mut img = img0.to_rgb8(); @@ -422,12 +430,13 @@ impl YOLOv8 { let legend_size = img.width().max(img.height()) / scale; let x = img.width() / 20; let y = img.height() / 20 + i as u32 * legend_size; + imageproc::drawing::draw_text_mut( &mut img, image::Rgb([0, 255, 0]), x as i32, y as i32, - rusttype::Scale::uniform(legend_size as f32 - 1.), + legend_size as f32, &font, &legend, ); @@ -454,7 +463,7 @@ impl YOLOv8 { image::Rgb(self.color_palette[bbox.id()].into()), bbox.xmin() as i32, (bbox.ymin() - legend_size as f32) as i32, - rusttype::Scale::uniform(legend_size as f32 - 1.), + legend_size as f32, &font, &legend, ); @@ -551,7 +560,7 @@ impl YOLOv8 { None => String::from(""), }, self.engine.ep(), - if let OrtEP::Cpu = self.engine.ep() { + if let OrtEP::CPU = self.engine.ep() { "" } else { "(May still fall back to CPU)" diff --git a/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs b/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs index 857baaebae0..d88208dead3 100644 --- a/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs +++ b/examples/YOLOv8-ONNXRuntime-Rust/src/ort_backend.rs @@ -2,11 +2,13 @@ use anyhow::Result; use clap::ValueEnum; use half::f16; use ndarray::{Array, CowArray, IxDyn}; -use ort::execution_providers::{CUDAExecutionProviderOptions, TensorRTExecutionProviderOptions}; -use ort::tensor::TensorElementDataType; -use ort::{Environment, ExecutionProvider, Session, SessionBuilder, Value}; +use ort::{ + CPUExecutionProvider, CUDAExecutionProvider, ExecutionProvider, ExecutionProviderDispatch, + TensorRTExecutionProvider, +}; +use ort::{Session, SessionBuilder}; +use ort::{TensorElementType, ValueType}; use regex::Regex; - #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] pub enum YOLOTask { // YOLO tasks @@ -19,9 +21,9 @@ pub enum YOLOTask { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum OrtEP { // ONNXRuntime execution provider - Cpu, - Cuda(u32), - Trt(u32), + CPU, + CUDA(i32), + Trt(i32), } #[derive(Debug)] @@ -44,8 +46,9 @@ impl Default for Batch { #[derive(Debug, Default)] pub struct OrtInputs { // ONNX model inputs attrs - pub shapes: Vec>, - pub dtypes: Vec, + pub shapes: Vec>, + //pub dtypes: Vec, + pub dtypes: Vec, pub names: Vec, pub sizes: Vec>, } @@ -56,12 +59,19 @@ impl OrtInputs { let mut dtypes = Vec::new(); let mut names = Vec::new(); for i in session.inputs.iter() { - let shape: Vec = i + /* let shape: Vec = i .dimensions() .map(|x| if let Some(x) = x { x as i32 } else { -1i32 }) .collect(); - shapes.push(shape); - dtypes.push(i.input_type); + shapes.push(shape); */ + if let ort::ValueType::Tensor { ty, dimensions } = &i.input_type { + dtypes.push(ty.clone()); + let shape = dimensions.clone(); + shapes.push(shape); + } else { + panic!("ไธๆ”ฏๆŒ็š„ๆ•ฐๆฎๆ ผๅผ, {} - {}", file!(), line!()); + } + //dtypes.push(i.input_type); names.push(i.name.clone()); } Self { @@ -97,12 +107,14 @@ pub struct OrtBackend { impl OrtBackend { pub fn build(args: OrtConfig) -> Result { // build env & session - let env = Environment::builder() - .with_name("YOLOv8") - .with_log_level(ort::LoggingLevel::Verbose) - .build()? - .into_arc(); - let session = SessionBuilder::new(&env)?.with_model_from_file(&args.f)?; + // in version 2.x environment is removed + /* let env = ort::EnvironmentBuilder + ::with_name("YOLOv8") + .build()? + .into_arc(); */ + let sessionbuilder = SessionBuilder::new()?; + let session = sessionbuilder.commit_from_file(&args.f)?; + //let session = SessionBuilder::new(&env)?.with_model_from_file(&args.f)?; // get inputs let mut inputs = OrtInputs::new(&session); @@ -142,16 +154,19 @@ impl OrtBackend { // build provider let (ep, provider) = match args.ep { - OrtEP::Cuda(device_id) => Self::set_ep_cuda(device_id), + OrtEP::CUDA(device_id) => Self::set_ep_cuda(device_id), OrtEP::Trt(device_id) => Self::set_ep_trt(device_id, args.trt_fp16, &batch, &inputs), - _ => (OrtEP::Cpu, ExecutionProvider::CPU(Default::default())), + _ => ( + OrtEP::CPU, + ExecutionProviderDispatch::from(CPUExecutionProvider::default()), + ), }; // build session again with the new provider - let session = SessionBuilder::new(&env)? + let session = SessionBuilder::new()? // .with_optimization_level(ort::GraphOptimizationLevel::Level3)? .with_execution_providers([provider])? - .with_model_from_file(args.f)?; + .commit_from_file(args.f)?; // task: using given one or guessing let task = match args.task { @@ -185,57 +200,58 @@ impl OrtBackend { pub fn fetch_inputs_from_session( session: &Session, - ) -> (Vec>, Vec, Vec) { + ) -> (Vec>, Vec, Vec) { // get inputs attrs from ONNX model let mut shapes = Vec::new(); let mut dtypes = Vec::new(); let mut names = Vec::new(); for i in session.inputs.iter() { - let shape: Vec = i - .dimensions() - .map(|x| if let Some(x) = x { x as i32 } else { -1i32 }) - .collect(); - shapes.push(shape); - dtypes.push(i.input_type); + if let ort::ValueType::Tensor { ty, dimensions } = &i.input_type { + dtypes.push(ty.clone()); + let shape = dimensions.clone(); + shapes.push(shape); + } else { + panic!("ไธๆ”ฏๆŒ็š„ๆ•ฐๆฎๆ ผๅผ, {} - {}", file!(), line!()); + } names.push(i.name.clone()); } (shapes, dtypes, names) } - pub fn set_ep_cuda(device_id: u32) -> (OrtEP, ExecutionProvider) { - // set CUDA - if ExecutionProvider::CUDA(Default::default()).is_available() { + pub fn set_ep_cuda(device_id: i32) -> (OrtEP, ExecutionProviderDispatch) { + let cuda_provider = CUDAExecutionProvider::default().with_device_id(device_id); + if let Ok(true) = cuda_provider.is_available() { ( - OrtEP::Cuda(device_id), - ExecutionProvider::CUDA(CUDAExecutionProviderOptions { - device_id, - ..Default::default() - }), + OrtEP::CUDA(device_id), + ExecutionProviderDispatch::from(cuda_provider), //PlantForm::CUDA(cuda_provider) ) } else { println!("> CUDA is not available! Using CPU."); - (OrtEP::Cpu, ExecutionProvider::CPU(Default::default())) + ( + OrtEP::CPU, + ExecutionProviderDispatch::from(CPUExecutionProvider::default()), //PlantForm::CPU(CPUExecutionProvider::default()) + ) } } pub fn set_ep_trt( - device_id: u32, + device_id: i32, fp16: bool, batch: &Batch, inputs: &OrtInputs, - ) -> (OrtEP, ExecutionProvider) { + ) -> (OrtEP, ExecutionProviderDispatch) { // set TensorRT - if ExecutionProvider::TensorRT(Default::default()).is_available() { - let (height, width) = (inputs.sizes[0][0], inputs.sizes[0][1]); + let trt_provider = TensorRTExecutionProvider::default().with_device_id(device_id); - // dtype match checking - if inputs.dtypes[0] == TensorElementDataType::Float16 && !fp16 { + //trt_provider. + if let Ok(true) = trt_provider.is_available() { + let (height, width) = (inputs.sizes[0][0], inputs.sizes[0][1]); + if inputs.dtypes[0] == TensorElementType::Float16 && !fp16 { panic!( "Dtype mismatch! Expected: Float32, got: {:?}. You should use `--fp16`", inputs.dtypes[0] ); } - // dynamic shape: input_tensor_1:dim_1xdim_2x...,input_tensor_2:dim_3xdim_4x...,... let mut opt_string = String::new(); let mut min_string = String::new(); @@ -251,17 +267,16 @@ impl OrtBackend { let _ = opt_string.pop(); let _ = min_string.pop(); let _ = max_string.pop(); + + let trt_provider = trt_provider + .with_profile_opt_shapes(opt_string) + .with_profile_min_shapes(min_string) + .with_profile_max_shapes(max_string) + .with_fp16(fp16) + .with_timing_cache(true); ( OrtEP::Trt(device_id), - ExecutionProvider::TensorRT(TensorRTExecutionProviderOptions { - device_id, - fp16_enable: fp16, - timing_cache_enable: true, - profile_min_shapes: min_string, - profile_max_shapes: max_string, - profile_opt_shapes: opt_string, - ..Default::default() - }), + ExecutionProviderDispatch::from(trt_provider), ) } else { println!("> TensorRT is not available! Try using CUDA..."); @@ -283,8 +298,8 @@ impl OrtBackend { pub fn run(&self, xs: Array, profile: bool) -> Result>> { // ORT inference match self.dtype() { - TensorElementDataType::Float16 => self.run_fp16(xs, profile), - TensorElementDataType::Float32 => self.run_fp32(xs, profile), + TensorElementType::Float16 => self.run_fp16(xs, profile), + TensorElementType::Float32 => self.run_fp32(xs, profile), _ => todo!(), } } @@ -300,14 +315,13 @@ impl OrtBackend { // h2d let t = std::time::Instant::now(); let xs = CowArray::from(xs); - let xs = vec![Value::from_array(self.session.allocator(), &xs)?]; if profile { println!("[ORT H2D]: {:?}", t.elapsed()); } // run let t = std::time::Instant::now(); - let ys = self.session.run(xs)?; + let ys = self.session.run(ort::inputs![xs.view()]?)?; if profile { println!("[ORT Inference]: {:?}", t.elapsed()); } @@ -315,21 +329,22 @@ impl OrtBackend { // d2h Ok(ys .iter() - .map(|x| { + .map(|(_k, v)| { // d2h let t = std::time::Instant::now(); - let x = x.try_extract::<_>().unwrap().view().clone().into_owned(); + let v = v.try_extract_tensor().unwrap(); + //let v = v.try_extract::<_>().unwrap().view().clone().into_owned(); if profile { println!("[ORT D2H]: {:?}", t.elapsed()); } // f16->f32 let t_ = std::time::Instant::now(); - let x = x.mapv(f16::to_f32); + let v = v.mapv(f16::to_f32); if profile { println!("[ORT f16->f32]: {:?}", t_.elapsed()); } - x + v }) .collect::>>()) } @@ -338,14 +353,13 @@ impl OrtBackend { // h2d let t = std::time::Instant::now(); let xs = CowArray::from(xs); - let xs = vec![Value::from_array(self.session.allocator(), &xs)?]; if profile { println!("[ORT H2D]: {:?}", t.elapsed()); } // run let t = std::time::Instant::now(); - let ys = self.session.run(xs)?; + let ys = self.session.run(ort::inputs![xs.view()]?)?; if profile { println!("[ORT Inference]: {:?}", t.elapsed()); } @@ -353,39 +367,44 @@ impl OrtBackend { // d2h Ok(ys .iter() - .map(|x| { + .map(|(_k, v)| { let t = std::time::Instant::now(); - let x = x.try_extract::<_>().unwrap().view().clone().into_owned(); + let v = v.try_extract_tensor::().unwrap().into_owned(); + //let x = x.try_extract::<_>().unwrap().view().clone().into_owned(); if profile { println!("[ORT D2H]: {:?}", t.elapsed()); } - x + v }) .collect::>>()) } - pub fn output_shapes(&self) -> Vec> { + pub fn output_shapes(&self) -> Vec> { let mut shapes = Vec::new(); - for o in &self.session.outputs { - let shape: Vec<_> = o - .dimensions() - .map(|x| if let Some(x) = x { x as i32 } else { -1i32 }) - .collect(); - shapes.push(shape); + for output in &self.session.outputs { + if let ValueType::Tensor { ty: _, dimensions } = &output.output_type { + let shape = dimensions.clone(); + shapes.push(shape); + } else { + panic!("not support data format, {} - {}", file!(), line!()); + } } shapes } - pub fn output_dtypes(&self) -> Vec { + pub fn output_dtypes(&self) -> Vec { let mut dtypes = Vec::new(); - self.session - .outputs - .iter() - .for_each(|x| dtypes.push(x.output_type)); + for output in &self.session.outputs { + if let ValueType::Tensor { ty, dimensions: _ } = &output.output_type { + dtypes.push(ty.clone()); + } else { + panic!("not support data format, {} - {}", file!(), line!()); + } + } dtypes } - pub fn input_shapes(&self) -> &Vec> { + pub fn input_shapes(&self) -> &Vec> { &self.inputs.shapes } @@ -393,11 +412,11 @@ impl OrtBackend { &self.inputs.names } - pub fn input_dtypes(&self) -> &Vec { + pub fn input_dtypes(&self) -> &Vec { &self.inputs.dtypes } - pub fn dtype(&self) -> TensorElementDataType { + pub fn dtype(&self) -> TensorElementType { self.input_dtypes()[0] } diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py index 71b251d37a3..d1e18a404c1 100644 --- a/examples/YOLOv8-ONNXRuntime/main.py +++ b/examples/YOLOv8-ONNXRuntime/main.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import argparse diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/main.py b/examples/YOLOv8-OpenCV-ONNX-Python/main.py index c58b9ced5df..e9e095dd462 100644 --- a/examples/YOLOv8-OpenCV-ONNX-Python/main.py +++ b/examples/YOLOv8-OpenCV-ONNX-Python/main.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import argparse diff --git a/examples/YOLOv8-OpenCV-int8-tflite-Python/README.md b/examples/YOLOv8-OpenCV-int8-tflite-Python/README.md deleted file mode 100644 index ea14e4440ec..00000000000 --- a/examples/YOLOv8-OpenCV-int8-tflite-Python/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# YOLOv8 - Int8-TFLite Runtime - -Welcome to the YOLOv8 Int8 TFLite Runtime for efficient and optimized object detection project. This README provides comprehensive instructions for installing and using our YOLOv8 implementation. - -## Installation - -Ensure a smooth setup by following these steps to install necessary dependencies. - -### Installing Required Dependencies - -Install all required dependencies with this simple command: - -```bash -pip install -r requirements.txt -``` - -### Installing `tflite-runtime` - -To load TFLite models, install the `tflite-runtime` package using: - -```bash -pip install tflite-runtime -``` - -### Installing `tensorflow-gpu` (For NVIDIA GPU Users) - -Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`: - -```bash -pip install tensorflow-gpu -``` - -**Note:** Ensure you have compatible GPU drivers installed on your system. - -### Installing `tensorflow` (CPU Version) - -For CPU usage or non-NVIDIA GPUs, install TensorFlow with: - -```bash -pip install tensorflow -``` - -## Usage - -Follow these instructions to run YOLOv8 after successful installation. - -Convert the YOLOv8 model to Int8 TFLite format: - -```bash -yolo export model=yolov8n.pt imgsz=640 format=tflite int8 -``` - -Locate the Int8 TFLite model in `yolov8n_saved_model`. Choose `best_full_integer_quant` or verify quantization at [Netron](https://netron.app/). Then, execute the following in your terminal: - -```bash -python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf-thres 0.5 --iou-thres 0.5 -``` - -Replace `best_full_integer_quant.tflite` with your model file's path, `image.jpg` with your input image, and adjust the confidence (conf-thres) and IoU thresholds (iou-thres) as necessary. - -### Output - -The output is displayed as annotated images, showcasing the model's detection capabilities: - -![image](https://github.com/wamiqraza/Attribute-recognition-and-reidentification-Market1501-dataset/blob/main/img/bus.jpg) diff --git a/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py b/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py deleted file mode 100644 index 70bccfa1865..00000000000 --- a/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py +++ /dev/null @@ -1,298 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -import argparse - -import cv2 -import numpy as np -from tflite_runtime import interpreter as tflite - -from ultralytics.utils import ASSETS, yaml_load -from ultralytics.utils.checks import check_yaml - -# Declare as global variables, can be updated based trained model image size -img_width = 640 -img_height = 640 - - -class LetterBox: - """Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models.""" - - def __init__( - self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32 - ): - """Initializes LetterBox with parameters for reshaping and transforming image while maintaining aspect ratio.""" - self.new_shape = new_shape - self.auto = auto - self.scaleFill = scaleFill - self.scaleup = scaleup - self.stride = stride - self.center = center # Put the image in the middle or top-left - - def __call__(self, labels=None, image=None): - """Return updated labels and image with added border.""" - if labels is None: - labels = {} - img = labels.get("img") if image is None else image - shape = img.shape[:2] # current shape [height, width] - new_shape = labels.pop("rect_shape", self.new_shape) - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not self.scaleup: # only scale down, do not scale up (for better val mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if self.auto: # minimum rectangle - dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding - elif self.scaleFill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - if self.center: - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1)) - img = cv2.copyMakeBorder( - img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) - ) # add border - if labels.get("ratio_pad"): - labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation - - if len(labels): - labels = self._update_labels(labels, ratio, dw, dh) - labels["img"] = img - labels["resized_shape"] = new_shape - return labels - else: - return img - - def _update_labels(self, labels, ratio, padw, padh): - """Update labels.""" - labels["instances"].convert_bbox(format="xyxy") - labels["instances"].denormalize(*labels["img"].shape[:2][::-1]) - labels["instances"].scale(*ratio) - labels["instances"].add_padding(padw, padh) - return labels - - -class Yolov8TFLite: - """Class for performing object detection using YOLOv8 model converted to TensorFlow Lite format.""" - - def __init__(self, tflite_model, input_image, confidence_thres, iou_thres): - """ - Initializes an instance of the Yolov8TFLite class. - - Args: - tflite_model: Path to the TFLite model. - input_image: Path to the input image. - confidence_thres: Confidence threshold for filtering detections. - iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression. - """ - self.tflite_model = tflite_model - self.input_image = input_image - self.confidence_thres = confidence_thres - self.iou_thres = iou_thres - - # Load the class names from the COCO dataset - self.classes = yaml_load(check_yaml("coco8.yaml"))["names"] - - # Generate a color palette for the classes - self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3)) - - def draw_detections(self, img, box, score, class_id): - """ - Draws bounding boxes and labels on the input image based on the detected objects. - - Args: - img: The input image to draw detections on. - box: Detected bounding box. - score: Corresponding detection score. - class_id: Class ID for the detected object. - - Returns: - None - """ - # Extract the coordinates of the bounding box - x1, y1, w, h = box - - # Retrieve the color for the class ID - color = self.color_palette[class_id] - - # Draw the bounding box on the image - cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) - - # Create the label text with class name and score - label = f"{self.classes[class_id]}: {score:.2f}" - - # Calculate the dimensions of the label text - (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) - - # Calculate the position of the label text - label_x = x1 - label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 - - # Draw a filled rectangle as the background for the label text - cv2.rectangle( - img, - (int(label_x), int(label_y - label_height)), - (int(label_x + label_width), int(label_y + label_height)), - color, - cv2.FILLED, - ) - - # Draw the label text on the image - cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) - - def preprocess(self): - """ - Preprocesses the input image before performing inference. - - Returns: - image_data: Preprocessed image data ready for inference. - """ - # Read the input image using OpenCV - self.img = cv2.imread(self.input_image) - - print("image before", self.img) - # Get the height and width of the input image - self.img_height, self.img_width = self.img.shape[:2] - - letterbox = LetterBox(new_shape=[img_width, img_height], auto=False, stride=32) - image = letterbox(image=self.img) - image = [image] - image = np.stack(image) - image = image[..., ::-1].transpose((0, 3, 1, 2)) - img = np.ascontiguousarray(image) - # n, h, w, c - image = img.astype(np.float32) - return image / 255 - - def postprocess(self, input_image, output): - """ - Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. - - Args: - input_image (numpy.ndarray): The input image. - output (numpy.ndarray): The output of the model. - - Returns: - numpy.ndarray: The input image with detections drawn on it. - """ - boxes = [] - scores = [] - class_ids = [] - for pred in output: - pred = np.transpose(pred) - for box in pred: - x, y, w, h = box[:4] - x1 = x - w / 2 - y1 = y - h / 2 - boxes.append([x1, y1, w, h]) - idx = np.argmax(box[4:]) - scores.append(box[idx + 4]) - class_ids.append(idx) - - indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres) - - for i in indices: - # Get the box, score, and class ID corresponding to the index - box = boxes[i] - gain = min(img_width / self.img_width, img_height / self.img_height) - pad = ( - round((img_width - self.img_width * gain) / 2 - 0.1), - round((img_height - self.img_height * gain) / 2 - 0.1), - ) - box[0] = (box[0] - pad[0]) / gain - box[1] = (box[1] - pad[1]) / gain - box[2] = box[2] / gain - box[3] = box[3] / gain - score = scores[i] - class_id = class_ids[i] - if score > 0.25: - print(box, score, class_id) - # Draw the detection on the input image - self.draw_detections(input_image, box, score, class_id) - - return input_image - - def main(self): - """ - Performs inference using a TFLite model and returns the output image with drawn detections. - - Returns: - output_img: The output image with drawn detections. - """ - # Create an interpreter for the TFLite model - interpreter = tflite.Interpreter(model_path=self.tflite_model) - self.model = interpreter - interpreter.allocate_tensors() - - # Get the model inputs - input_details = interpreter.get_input_details() - output_details = interpreter.get_output_details() - - # Store the shape of the input for later use - input_shape = input_details[0]["shape"] - self.input_width = input_shape[1] - self.input_height = input_shape[2] - - # Preprocess the image data - img_data = self.preprocess() - img_data = img_data - # img_data = img_data.cpu().numpy() - # Set the input tensor to the interpreter - print(input_details[0]["index"]) - print(img_data.shape) - img_data = img_data.transpose((0, 2, 3, 1)) - - scale, zero_point = input_details[0]["quantization"] - img_data_int8 = (img_data / scale + zero_point).astype(np.int8) - interpreter.set_tensor(input_details[0]["index"], img_data_int8) - - # Run inference - interpreter.invoke() - - # Get the output tensor from the interpreter - output = interpreter.get_tensor(output_details[0]["index"]) - scale, zero_point = output_details[0]["quantization"] - output = (output.astype(np.float32) - zero_point) * scale - - output[:, [0, 2]] *= img_width - output[:, [1, 3]] *= img_height - print(output) - # Perform post-processing on the outputs to obtain output image. - return self.postprocess(self.img, output) - - -if __name__ == "__main__": - # Create an argument parser to handle command-line arguments - parser = argparse.ArgumentParser() - parser.add_argument( - "--model", type=str, default="yolov8n_full_integer_quant.tflite", help="Input your TFLite model." - ) - parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.") - parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold") - parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold") - args = parser.parse_args() - - # Create an instance of the Yolov8TFLite class with the specified arguments - detection = Yolov8TFLite(args.model, args.img, args.conf_thres, args.iou_thres) - - # Perform object detection and obtain the output image - output_image = detection.main() - - # Display the output image in a window - cv2.imshow("Output", output_image) - - # Wait for a key press to exit - cv2.waitKey(0) diff --git a/examples/YOLOv8-OpenVINO-CPP-Inference/README.md b/examples/YOLOv8-OpenVINO-CPP-Inference/README.md index e668a0e7e75..6c6c794dea3 100644 --- a/examples/YOLOv8-OpenVINO-CPP-Inference/README.md +++ b/examples/YOLOv8-OpenVINO-CPP-Inference/README.md @@ -50,7 +50,7 @@ Once built, you can run inference on an image using the following command: To use your YOLOv8 model with OpenVINO, you need to export it first. Use the command below to export the model: -```commandline +```bash yolo export model=yolov8s.pt imgsz=640 format=openvino ``` diff --git a/examples/YOLOv8-Region-Counter/readme.md b/examples/YOLOv8-Region-Counter/readme.md index a0811359eac..3ed06799107 100644 --- a/examples/YOLOv8-Region-Counter/readme.md +++ b/examples/YOLOv8-Region-Counter/readme.md @@ -1,7 +1,14 @@ # Regions Counting Using YOLOv8 (Inference on Video) -- Region counting is a method employed to tally the objects within a specified area, allowing for more sophisticated analyses when multiple regions are considered. These regions can be adjusted interactively using a Left Mouse Click, and the counting process occurs in real time. -- Regions can be adjusted to suit the user's preferences and requirements. +> **Region Counter** is now part of **[Ultralytics Solutions](https://docs.ultralytics.com/solutions/)**, offering improved features and regular updates. Enjoy improved features and regular updates! + +๐Ÿ”— **[Explore Object Counting in Regions Here](https://docs.ultralytics.com/guides/region-counting/)** + +> ๐Ÿ”” **Notice:** + +> The GitHub example will remain available but **will no longer be actively maintained**. For the latest updates and improvements, please use the official [link](https://docs.ultralytics.com/guides/region-counting/). Thank you! + +Region counting is a method employed to tally the objects within a specified area, allowing for more sophisticated analyses when multiple regions are considered. These regions can be adjusted interactively using a Left Mouse Click, and the counting process occurs in real time. Regions can be adjusted to suit the user's preferences and requirements.

@@ -73,7 +80,7 @@ Region counting is a computational method utilized to ascertain the quantity of **2. Is Friendly Region Plotting Supported by the Region Counter?** -The Region Counter offers the capability to create regions in various formats, such as polygons and rectangles. You have the flexibility to modify region attributes, including coordinates, colors, and other details, as demonstrated in the following code: +The Region Counting offers the capability to create regions in various formats, such as polygons and rectangles. You have the flexibility to modify region attributes, including coordinates, colors, and other details, as demonstrated in the following code: ```python from shapely.geometry import Polygon diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py index a6c739b7e58..915804ec2a9 100644 --- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py +++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import argparse from collections import defaultdict @@ -91,7 +91,7 @@ def mouse_callback(event, x, y, flags, param): def run( - weights="yolov8n.pt", + weights="yolo11n.pt", source=None, device="cpu", view_img=False, @@ -132,17 +132,19 @@ def run( model.to("cuda") if device == "0" else model.to("cpu") # Extract classes names - names = model.model.names + names = model.names # Video setup videocapture = cv2.VideoCapture(source) - frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4)) - fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v") + frame_width = int(videocapture.get(3)) + frame_height = int(videocapture.get(4)) + fps = int(videocapture.get(5)) + fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Output setup save_dir = increment_path(Path("ultralytics_rc_output") / "exp", exist_ok) save_dir.mkdir(parents=True, exist_ok=True) - video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height)) + video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.avi"), fourcc, fps, (frame_width, frame_height)) # Iterate over video frames while videocapture.isOpened(): @@ -183,7 +185,7 @@ def run( region_color = region["region_color"] region_text_color = region["text_color"] - polygon_coords = np.array(region["polygon"].exterior.coords, dtype=np.int32) + polygon_coordinates = np.array(region["polygon"].exterior.coords, dtype=np.int32) centroid_x, centroid_y = int(region["polygon"].centroid.x), int(region["polygon"].centroid.y) text_size, _ = cv2.getTextSize( @@ -201,7 +203,7 @@ def run( cv2.putText( frame, region_label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, region_text_color, line_thickness ) - cv2.polylines(frame, [polygon_coords], isClosed=True, color=region_color, thickness=region_thickness) + cv2.polylines(frame, [polygon_coordinates], isClosed=True, color=region_color, thickness=region_thickness) if view_img: if vid_frame_count == 1: @@ -227,7 +229,7 @@ def run( def parse_opt(): """Parse command line arguments.""" parser = argparse.ArgumentParser() - parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path") + parser.add_argument("--weights", type=str, default="yolo11n.pt", help="initial weights path") parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu") parser.add_argument("--source", type=str, required=True, help="video file path") parser.add_argument("--view-img", action="store_true", help="show results") @@ -241,9 +243,9 @@ def parse_opt(): return parser.parse_args() -def main(opt): +def main(options): """Main function.""" - run(**vars(opt)) + run(**vars(options)) if __name__ == "__main__": diff --git a/examples/YOLOv8-SAHI-Inference-Video/readme.md b/examples/YOLOv8-SAHI-Inference-Video/readme.md index 525aca5ac02..4dc169b3e17 100644 --- a/examples/YOLOv8-SAHI-Inference-Video/readme.md +++ b/examples/YOLOv8-SAHI-Inference-Video/readme.md @@ -1,11 +1,11 @@ -# YOLOv8 with SAHI (Inference on Video) +# YOLO11 with SAHI (Inference on Video) -[SAHI](https://docs.ultralytics.com/guides/sahi-tiled-inference/) is designed to optimize object detection algorithms for large-scale and high-resolution imagery. It partitions images into manageable slices, performs object detection on each slice, and then stitches the results back together. This tutorial will guide you through the process of running YOLOv8 inference on video files with the aid of SAHI. +[SAHI](https://docs.ultralytics.com/guides/sahi-tiled-inference/) is designed to optimize object detection algorithms for large-scale and high-resolution imagery. It partitions images into manageable slices, performs object detection on each slice, and then stitches the results back together. This tutorial will guide you through the process of running YOLO11 inference on video files with the aid of SAHI. ## Table of Contents - [Step 1: Install the Required Libraries](#step-1-install-the-required-libraries) -- [Step 2: Run the Inference with SAHI using Ultralytics YOLOv8](#step-2-run-the-inference-with-sahi-using-ultralytics-yolov8) +- [Step 2: Run the Inference with SAHI using Ultralytics YOLO11](#step-2-run-the-inference-with-sahi-using-ultralytics-yolo11) - [Usage Options](#usage-options) - [FAQ](#faq) @@ -18,13 +18,13 @@ Clone the repository, install dependencies and `cd` to this local directory for git clone https://github.com/ultralytics/ultralytics # Install dependencies -pip install sahi ultralytics +pip install -U sahi ultralytics # cd to local directory cd ultralytics/examples/YOLOv8-SAHI-Inference-Video ``` -## Step 2: Run the Inference with SAHI using Ultralytics YOLOv8 +## Step 2: Run the Inference with SAHI using Ultralytics YOLO11 Here are the basic commands for running the inference: @@ -33,14 +33,14 @@ Here are the basic commands for running the inference: python yolov8_sahi.py --source "path/to/video.mp4" --save-img #if you want to change model file -python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolov8n.pt" +python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolo11n.pt" ``` ## Usage Options - `--source`: Specifies the path to the video file you want to run inference on. - `--save-img`: Flag to save the detection results as images. -- `--weights`: Specifies a different YOLOv8 model file (e.g., `yolov8n.pt`, `yolov8s.pt`, `yolov8m.pt`, `yolov8l.pt`, `yolov8x.pt`). +- `--weights`: Specifies a different YOLO11 model file (e.g., `yolo11n.pt`, `yolov8s.pt`, `yolo11m.pt`, `yolo11l.pt`, `yolo11x.pt`). ## FAQ @@ -48,9 +48,9 @@ python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolov8n SAHI stands for Slicing Aided Hyper Inference. It is a library designed to optimize object detection algorithms for large-scale and high-resolution images. The library source code is available on [GitHub](https://github.com/obss/sahi). -**2. Why use SAHI with YOLOv8?** +**2. Why use SAHI with YOLO11?** -SAHI can handle large-scale images by slicing them into smaller, more manageable sizes without compromising the detection quality. This makes it a great companion to YOLOv8, especially when working with high-resolution videos. +SAHI can handle large-scale images by slicing them into smaller, more manageable sizes without compromising the detection quality. This makes it a great companion to YOLO11, especially when working with high-resolution videos. **3. How do I debug issues?** @@ -66,4 +66,4 @@ Yes, you can specify different YOLO model weights using the `--weights` option. **5. Where can I find more information?** -For a full guide to YOLOv8 with SAHI see [https://docs.ultralytics.com/guides/sahi-tiled-inference](https://docs.ultralytics.com/guides/sahi-tiled-inference/). +For a full guide to YOLO11 with SAHI see [https://docs.ultralytics.com/guides/sahi-tiled-inference](https://docs.ultralytics.com/guides/sahi-tiled-inference/). diff --git a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py index 4243cc35bb9..69872dcc9e4 100644 --- a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py +++ b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import argparse from pathlib import Path @@ -6,32 +6,37 @@ import cv2 from sahi import AutoDetectionModel from sahi.predict import get_sliced_prediction -from sahi.utils.yolov8 import download_yolov8s_model +from sahi.utils.ultralytics import download_yolo11n_model from ultralytics.utils.files import increment_path from ultralytics.utils.plotting import Annotator, colors class SAHIInference: - """Runs YOLOv8 and SAHI for object detection on video with options to view, save, and track results.""" + """Runs Ultralytics YOLO11 and SAHI for object detection on video with options to view, save, and track results.""" def __init__(self): - """Initializes the SAHIInference class for performing sliced inference using SAHI with YOLOv8 models.""" + """Initializes the SAHIInference class for performing sliced inference using SAHI with YOLO11 models.""" self.detection_model = None def load_model(self, weights): - """Loads a YOLOv8 model with specified weights for object detection using SAHI.""" - yolov8_model_path = f"models/{weights}" - download_yolov8s_model(yolov8_model_path) + """Loads a YOLO11 model with specified weights for object detection using SAHI.""" + yolo11_model_path = f"models/{weights}" + download_yolo11n_model(yolo11_model_path) self.detection_model = AutoDetectionModel.from_pretrained( - model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cpu" + model_type="ultralytics", model_path=yolo11_model_path, device="cpu" ) def inference( - self, weights="yolov8n.pt", source="test.mp4", view_img=False, save_img=False, exist_ok=False, track=False + self, + weights="yolo11n.pt", + source="test.mp4", + view_img=False, + save_img=False, + exist_ok=False, ): """ - Run object detection on a video using YOLOv8 and SAHI. + Run object detection on a video using YOLO11 and SAHI. Args: weights (str): Model weights path. @@ -39,7 +44,6 @@ def inference( view_img (bool): Show results. save_img (bool): Save results. exist_ok (bool): Overwrite existing files. - track (bool): Enable object tracking with SAHI """ # Video setup cap = cv2.VideoCapture(source) @@ -50,8 +54,8 @@ def inference( save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok) save_dir.mkdir(parents=True, exist_ok=True) video_writer = cv2.VideoWriter( - str(save_dir / f"{Path(source).stem}.mp4"), - cv2.VideoWriter_fourcc(*"mp4v"), + str(save_dir / f"{Path(source).stem}.avi"), + cv2.VideoWriter_fourcc(*"MJPG"), int(cap.get(5)), (frame_width, frame_height), ) @@ -64,12 +68,10 @@ def inference( break annotator = Annotator(frame) # Initialize annotator for plotting detection and tracking results results = get_sliced_prediction( - frame, + frame[..., ::-1], self.detection_model, slice_height=512, slice_width=512, - overlap_height_ratio=0.2, - overlap_width_ratio=0.2, ) detection_data = [ (det.category.name, det.category.id, (det.bbox.minx, det.bbox.miny, det.bbox.maxx, det.bbox.maxy)) @@ -93,7 +95,7 @@ def inference( def parse_opt(self): """Parse command line arguments.""" parser = argparse.ArgumentParser() - parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path") + parser.add_argument("--weights", type=str, default="yolo11n.pt", help="initial weights path") parser.add_argument("--source", type=str, required=True, help="video file path") parser.add_argument("--view-img", action="store_true", help="show results") parser.add_argument("--save-img", action="store_true", help="save results") diff --git a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py index c1779deaa06..b8e2e7d55d8 100644 --- a/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py +++ b/examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import argparse diff --git a/examples/YOLOv8-TFLite-Python/README.md b/examples/YOLOv8-TFLite-Python/README.md new file mode 100644 index 00000000000..0156759fdba --- /dev/null +++ b/examples/YOLOv8-TFLite-Python/README.md @@ -0,0 +1,55 @@ +# YOLOv8 - TFLite Runtime + +This example shows how to run inference with YOLOv8 TFLite model. It supports FP32, FP16 and INT8 models. + +## Installation + +### Installing `tflite-runtime` + +To load TFLite models, install the `tflite-runtime` package using: + +```bash +pip install tflite-runtime +``` + +### Installing `tensorflow-gpu` (For NVIDIA GPU Users) + +Leverage GPU acceleration with NVIDIA GPUs by installing `tensorflow-gpu`: + +```bash +pip install tensorflow-gpu +``` + +**Note:** Ensure you have compatible GPU drivers installed on your system. + +### Installing `tensorflow` (CPU Version) + +For CPU usage or non-NVIDIA GPUs, install TensorFlow with: + +```bash +pip install tensorflow +``` + +## Usage + +Follow these instructions to run YOLOv8 after successful installation. + +Convert the YOLOv8 model to TFLite format: + +```bash +yolo export model=yolov8n.pt imgsz=640 format=tflite int8 +``` + +Locate the TFLite model in `yolov8n_saved_model`. Then, execute the following in your terminal: + +```bash +python main.py --model yolov8n_full_integer_quant.tflite --img image.jpg --conf 0.25 --iou 0.45 --metadata "metadata.yaml" +``` + +Replace `best_full_integer_quant.tflite` with the TFLite model path, `image.jpg` with the input image path, `metadata.yaml` with the one generated by `ultralytics` during export, and adjust the confidence (conf) and IoU thresholds (iou) as necessary. + +### Output + +The output would show the detections along with the class labels and confidences of each detected object. + +![image](https://github.com/wamiqraza/Attribute-recognition-and-reidentification-Market1501-dataset/blob/main/img/bus.jpg) diff --git a/examples/YOLOv8-TFLite-Python/main.py b/examples/YOLOv8-TFLite-Python/main.py new file mode 100644 index 00000000000..00c40303285 --- /dev/null +++ b/examples/YOLOv8-TFLite-Python/main.py @@ -0,0 +1,221 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +import argparse +from typing import Tuple, Union + +import cv2 +import numpy as np +import tensorflow as tf +import yaml + +from ultralytics.utils import ASSETS + +try: + from tflite_runtime.interpreter import Interpreter +except ImportError: + import tensorflow as tf + + Interpreter = tf.lite.Interpreter + + +class YOLOv8TFLite: + """ + YOLOv8TFLite. + + A class for performing object detection using the YOLOv8 model with TensorFlow Lite. + + Attributes: + model (str): Path to the TensorFlow Lite model file. + conf (float): Confidence threshold for filtering detections. + iou (float): Intersection over Union threshold for non-maximum suppression. + metadata (Optional[str]): Path to the metadata file, if any. + + Methods: + detect(img_path: str) -> np.ndarray: + Performs inference and returns the output image with drawn detections. + """ + + def __init__(self, model: str, conf: float = 0.25, iou: float = 0.45, metadata: Union[str, None] = None): + """ + Initializes an instance of the YOLOv8TFLite class. + + Args: + model (str): Path to the TFLite model. + conf (float, optional): Confidence threshold for filtering detections. Defaults to 0.25. + iou (float, optional): IoU (Intersection over Union) threshold for non-maximum suppression. Defaults to 0.45. + metadata (Union[str, None], optional): Path to the metadata file or None if not used. Defaults to None. + """ + self.conf = conf + self.iou = iou + if metadata is None: + self.classes = {i: i for i in range(1000)} + else: + with open(metadata) as f: + self.classes = yaml.safe_load(f)["names"] + np.random.seed(42) + self.color_palette = np.random.uniform(128, 255, size=(len(self.classes), 3)) + + self.model = Interpreter(model_path=model) + self.model.allocate_tensors() + + input_details = self.model.get_input_details()[0] + + self.in_width, self.in_height = input_details["shape"][1:3] + self.in_index = input_details["index"] + self.in_scale, self.in_zero_point = input_details["quantization"] + self.int8 = input_details["dtype"] == np.int8 + + output_details = self.model.get_output_details()[0] + self.out_index = output_details["index"] + self.out_scale, self.out_zero_point = output_details["quantization"] + + def letterbox(self, img: np.ndarray, new_shape: Tuple = (640, 640)) -> Tuple[np.ndarray, Tuple[float, float]]: + """Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models.""" + shape = img.shape[:2] # current shape [height, width] + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + + # Compute padding + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) + + return img, (top / img.shape[0], left / img.shape[1]) + + def draw_detections(self, img: np.ndarray, box: np.ndarray, score: np.float32, class_id: int) -> None: + """ + Draws bounding boxes and labels on the input image based on the detected objects. + + Args: + img (np.ndarray): The input image to draw detections on. + box (np.ndarray): Detected bounding box in the format [x1, y1, width, height]. + score (np.float32): Corresponding detection score. + class_id (int): Class ID for the detected object. + + Returns: + None + """ + x1, y1, w, h = box + color = self.color_palette[class_id] + + cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2) + + label = f"{self.classes[class_id]}: {score:.2f}" + + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + + label_x = x1 + label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 + + cv2.rectangle( + img, + (int(label_x), int(label_y - label_height)), + (int(label_x + label_width), int(label_y + label_height)), + color, + cv2.FILLED, + ) + + cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) + + def preprocess(self, img: np.ndarray) -> Tuple[np.ndarray, Tuple[float, float]]: + """ + Preprocesses the input image before performing inference. + + Args: + img (np.ndarray): The input image to be preprocessed. + + Returns: + Tuple[np.ndarray, Tuple[float, float]]: A tuple containing: + - The preprocessed image (np.ndarray). + - A tuple of two float values representing the padding applied (top/bottom, left/right). + """ + img, pad = self.letterbox(img, (self.in_width, self.in_height)) + img = img[..., ::-1][None] # N,H,W,C for TFLite + img = np.ascontiguousarray(img) + img = img.astype(np.float32) + return img / 255, pad + + def postprocess(self, img: np.ndarray, outputs: np.ndarray, pad: Tuple[float, float]) -> np.ndarray: + """ + Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs. + + Args: + img (numpy.ndarray): The input image. + outputs (numpy.ndarray): The output of the model. + pad (Tuple[float, float]): Padding used by letterbox. + + Returns: + numpy.ndarray: The input image with detections drawn on it. + """ + outputs[:, 0] -= pad[1] + outputs[:, 1] -= pad[0] + outputs[:, :4] *= max(img.shape) + + outputs = outputs.transpose(0, 2, 1) + outputs[..., 0] -= outputs[..., 2] / 2 + outputs[..., 1] -= outputs[..., 3] / 2 + + for out in outputs: + scores = out[:, 4:].max(-1) + keep = scores > self.conf + boxes = out[keep, :4] + scores = scores[keep] + class_ids = out[keep, 4:].argmax(-1) + + indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf, self.iou).flatten() + + [self.draw_detections(img, boxes[i], scores[i], class_ids[i]) for i in indices] + + return img + + def detect(self, img_path: str) -> np.ndarray: + """ + Performs inference using a TFLite model and returns the output image with drawn detections. + + Args: + img_path (str): The path to the input image file. + + Returns: + np.ndarray: The output image with drawn detections. + """ + img = cv2.imread(img_path) + x, pad = self.preprocess(img) + if self.int8: + x = (x / self.in_scale + self.in_zero_point).astype(np.int8) + self.model.set_tensor(self.in_index, x) + + self.model.invoke() + + y = self.model.get_tensor(self.out_index) + + if self.int8: + y = (y.astype(np.float32) - self.out_zero_point) * self.out_scale + + return self.postprocess(img, y, pad) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--model", + type=str, + default="yolov8n_saved_model/yolov8n_full_integer_quant.tflite", + help="Path to TFLite model.", + ) + parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image") + parser.add_argument("--conf", type=float, default=0.25, help="Confidence threshold") + parser.add_argument("--iou", type=float, default=0.45, help="NMS IoU threshold") + parser.add_argument("--metadata", type=str, default="yolov8n_saved_model/metadata.yaml", help="Metadata yaml") + args = parser.parse_args() + + detector = YOLOv8TFLite(args.model, args.conf, args.iou, args.metadata) + result = detector.detect(str(ASSETS / "bus.jpg")) + + cv2.imshow("Output", result) + cv2.waitKey(0) diff --git a/examples/heatmaps.ipynb b/examples/heatmaps.ipynb index c8064cc7f4a..4f34da35a4c 100644 --- a/examples/heatmaps.ipynb +++ b/examples/heatmaps.ipynb @@ -13,17 +13,17 @@ "\n", " [ไธญๆ–‡](https://docs.ultralytics.com/zh/) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko/) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja/) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi/) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar/)\n", "\n", - " \"Ultralytics\n", + " \"Ultralytics\n", " \"Run\n", " \"Open\n", - " \"Open\n", + " \"Open\n", " \"Discord\"\n", "\n", - "Welcome to the Ultralytics YOLOv8 ๐Ÿš€ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", + "Welcome to the Ultralytics YOLO11 ๐Ÿš€ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n", "\n", - "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", + "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", "\n", - "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Heatmap Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Heatmap Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", "\n", "

" ] @@ -38,7 +38,7 @@ "\n", "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n", "\n", - "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" + "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" ] }, { @@ -56,7 +56,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Ultralytics YOLOv8.2.17 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", + "Ultralytics 8.2.17 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", "Setup complete โœ… (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n" ] } @@ -76,14 +76,14 @@ "source": [ "# Introduction to Heatmaps\n", "\n", - "A heatmap generated with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.\n", + "A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) transforms complex data into a vibrant, color-coded matrix. This visual tool employs a spectrum of colors to represent varying data values, where warmer hues indicate higher intensities and cooler tones signify lower values. Heatmaps excel in visualizing intricate data patterns, correlations, and anomalies, offering an accessible and engaging approach to data interpretation across diverse domains.\n", "\n", "## Real World Applications\n", "\n", "| Transportation | Retail |\n", "|:-----------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------:|\n", - "| ![Ultralytics YOLOv8 Transportation Heatmap](https://github.com/RizwanMunawar/ultralytics/assets/62513924/288d7053-622b-4452-b4e4-1f41aeb764aa) | ![Ultralytics YOLOv8 Retail Heatmap](https://github.com/RizwanMunawar/ultralytics/assets/62513924/edef75ad-50a7-4c0a-be4a-a66cdfc12802) |\n", - "| Ultralytics YOLOv8 Transportation Heatmap | Ultralytics YOLOv8 Retail Heatmap |\n" + "| ![Ultralytics YOLO11 Transportation Heatmap](https://github.com/RizwanMunawar/ultralytics/assets/62513924/288d7053-622b-4452-b4e4-1f41aeb764aa) | ![Ultralytics YOLO11 Retail Heatmap](https://github.com/RizwanMunawar/ultralytics/assets/62513924/edef75ad-50a7-4c0a-be4a-a66cdfc12802) |\n", + "| Ultralytics YOLO11 Transportation Heatmap | Ultralytics YOLO11 Retail Heatmap |\n" ] }, { @@ -96,10 +96,7 @@ "source": [ "import cv2\n", "\n", - "from ultralytics import YOLO, solutions\n", - "\n", - "# Load YOLO model\n", - "model = YOLO(\"yolov8n.pt\")\n", + "from ultralytics import solutions\n", "\n", "# Open video file\n", "cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n", @@ -113,10 +110,9 @@ "\n", "# Initialize heatmap object\n", "heatmap_obj = solutions.Heatmap(\n", - " colormap=cv2.COLORMAP_PARULA,\n", - " view_img=True,\n", - " shape=\"circle\",\n", - " names=model.names,\n", + " colormap=cv2.COLORMAP_PARULA, # Color of the heatmap\n", + " show=True, # Display the image during processing\n", + " model=\"yolo11n.pt\", # Ultralytics YOLO11 model file\n", ")\n", "\n", "while cap.isOpened():\n", @@ -125,11 +121,8 @@ " print(\"Video frame is empty or video processing has been successfully completed.\")\n", " break\n", "\n", - " # Perform tracking on the current frame\n", - " tracks = model.track(im0, persist=True, show=False)\n", - "\n", " # Generate heatmap on the frame\n", - " im0 = heatmap_obj.generate_heatmap(im0, tracks)\n", + " im0 = heatmap_obj.generate_heatmap(im0)\n", "\n", " # Write the frame to the output video\n", " video_writer.write(im0)\n", @@ -161,15 +154,15 @@ "- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n", "- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n", "\n", - "## YOLOv8 ๐Ÿš€ Resources\n", + "## YOLO11 ๐Ÿš€ Resources\n", "\n", - "YOLOv8 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLOv8:\n", + "YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n", "\n", - "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLOv8 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n", - "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLOv8, including installation guides, tutorials, and detailed API references.\n", + "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n", + "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n", "- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n", "\n", - "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLOv8. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed." + "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed." ] } ], diff --git a/examples/hub.ipynb b/examples/hub.ipynb index e86b795e5eb..05657155dfe 100644 --- a/examples/hub.ipynb +++ b/examples/hub.ipynb @@ -13,7 +13,7 @@ "\n", "[ไธญๆ–‡](https://docs.ultralytics.com/zh/hub/) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko/hub/) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja/hub/) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru/hub/) | [Deutsch](https://docs.ultralytics.com/de/hub/) | [Franรงais](https://docs.ultralytics.com/fr/hub/) | [Espaรฑol](https://docs.ultralytics.com/es/hub/) | [Portuguรชs](https://docs.ultralytics.com/pt/hub/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/hub/) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi/hub/) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar/hub/)\n", "\n", - " \"CI\n", + " \"CI\n", " \"Open\n", "\n", " \"Discord\"\n", @@ -36,7 +36,7 @@ "\n", "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n", "\n", - "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" + "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" ] }, { @@ -54,7 +54,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Ultralytics YOLOv8.2.3 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", + "Ultralytics 8.2.3 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", "Setup complete โœ… (2 CPUs, 12.7 GB RAM, 28.8/78.2 GB disk)\n" ] } diff --git a/examples/object_counting.ipynb b/examples/object_counting.ipynb index 988bc3269d7..b1f0c523f29 100644 --- a/examples/object_counting.ipynb +++ b/examples/object_counting.ipynb @@ -13,17 +13,17 @@ "\n", " [ไธญๆ–‡](https://docs.ultralytics.com/zh/) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko/) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja/) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi/) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar/)\n", "\n", - " \"Ultralytics\n", + " \"Ultralytics\n", " \"Run\n", " \"Open\n", - " \"Open\n", + " \"Open\n", " \"Discord\"\n", "\n", - "Welcome to the Ultralytics YOLOv8 ๐Ÿš€ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", + "Welcome to the Ultralytics YOLO11 ๐Ÿš€ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n", "\n", - "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", + "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", "\n", - "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Object Counting Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Object Counting Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", "\n", "" ] @@ -38,7 +38,7 @@ "\n", "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n", "\n", - "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" + "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" ] }, { @@ -56,7 +56,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Ultralytics YOLOv8.2.18 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", + "Ultralytics 8.2.18 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", "Setup complete โœ… (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n" ] } @@ -74,11 +74,11 @@ "id": "m7VkxQ2aeg7k" }, "source": [ - "# Object Counting using Ultralytics YOLOv8 ๐Ÿš€\n", + "# Object Counting using Ultralytics YOLO11 ๐Ÿš€\n", "\n", "## What is Object Counting?\n", "\n", - "Object counting with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLOv8 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and deep learning capabilities.\n", + "Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) involves accurate identification and counting of specific objects in videos and camera streams. YOLO11 excels in real-time applications, providing efficient and precise object counting for various scenarios like crowd analysis and surveillance, thanks to its state-of-the-art algorithms and deep learning capabilities.\n", "\n", "## Advantages of Object Counting?\n", "\n", @@ -90,8 +90,8 @@ "\n", "| Logistics | Aquaculture |\n", "|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------:|\n", - "| ![Conveyor Belt Packets Counting Using Ultralytics YOLOv8](https://github.com/RizwanMunawar/ultralytics/assets/62513924/70e2d106-510c-4c6c-a57a-d34a765aa757) | ![Fish Counting in Sea using Ultralytics YOLOv8](https://github.com/RizwanMunawar/ultralytics/assets/62513924/c60d047b-3837-435f-8d29-bb9fc95d2191) |\n", - "| Conveyor Belt Packets Counting Using Ultralytics YOLOv8 | Fish Counting in Sea using Ultralytics YOLOv8 |\n" + "| ![Conveyor Belt Packets Counting Using Ultralytics YOLO11](https://github.com/RizwanMunawar/ultralytics/assets/62513924/70e2d106-510c-4c6c-a57a-d34a765aa757) | ![Fish Counting in Sea using Ultralytics YOLO11](https://github.com/RizwanMunawar/ultralytics/assets/62513924/c60d047b-3837-435f-8d29-bb9fc95d2191) |\n", + "| Conveyor Belt Packets Counting Using Ultralytics YOLO11 | Fish Counting in Sea using Ultralytics YOLO11 |\n" ] }, { @@ -104,10 +104,7 @@ "source": [ "import cv2\n", "\n", - "from ultralytics import YOLO, solutions\n", - "\n", - "# Load the pre-trained YOLOv8 model\n", - "model = YOLO(\"yolov8n.pt\")\n", + "from ultralytics import solutions\n", "\n", "# Open the video file\n", "cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n", @@ -119,19 +116,15 @@ "# Define points for a line or region of interest in the video frame\n", "line_points = [(20, 400), (1080, 400)] # Line coordinates\n", "\n", - "# Specify classes to count, for example: person (0) and car (2)\n", - "classes_to_count = [0, 2] # Class IDs for person and car\n", - "\n", "# Initialize the video writer to save the output video\n", "video_writer = cv2.VideoWriter(\"object_counting_output.avi\", cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (w, h))\n", "\n", "# Initialize the Object Counter with visualization options and other parameters\n", "counter = solutions.ObjectCounter(\n", - " view_img=True, # Display the image during processing\n", - " reg_pts=line_points, # Region of interest points\n", - " names=model.names, # Class names from the YOLO model\n", - " draw_tracks=True, # Draw tracking lines for objects\n", - " line_thickness=2, # Thickness of the lines drawn\n", + " show=True, # Display the image during processing\n", + " region=line_points, # Region of interest points\n", + " model=\"yolo11n.pt\", # Ultralytics YOLO11 model file\n", + " line_width=2, # Thickness of the lines and bounding boxes\n", ")\n", "\n", "# Process video frames in a loop\n", @@ -141,11 +134,8 @@ " print(\"Video frame is empty or video processing has been successfully completed.\")\n", " break\n", "\n", - " # Perform object tracking on the current frame, filtering by specified classes\n", - " tracks = model.track(im0, persist=True, show=False, classes=classes_to_count)\n", - "\n", " # Use the Object Counter to count objects in the frame and get the annotated image\n", - " im0 = counter.start_counting(im0, tracks)\n", + " im0 = counter.count(im0)\n", "\n", " # Write the annotated frame to the output video\n", " video_writer.write(im0)\n", @@ -179,15 +169,15 @@ "- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n", "- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n", "\n", - "## YOLOv8 ๐Ÿš€ Resources\n", + "## YOLO11 ๐Ÿš€ Resources\n", "\n", - "YOLOv8 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLOv8:\n", + "YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n", "\n", - "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLOv8 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n", - "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLOv8, including installation guides, tutorials, and detailed API references.\n", + "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n", + "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n", "- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n", "\n", - "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLOv8. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed." + "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed." ] } ], diff --git a/examples/object_tracking.ipynb b/examples/object_tracking.ipynb index af43cc51745..f89c34ddeae 100644 --- a/examples/object_tracking.ipynb +++ b/examples/object_tracking.ipynb @@ -13,17 +13,17 @@ "\n", " [ไธญๆ–‡](https://docs.ultralytics.com/zh/) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko/) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja/) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi/) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar/)\n", "\n", - " \"Ultralytics\n", + " \"Ultralytics\n", " \"Run\n", " \"Open\n", - " \"Open\n", + " \"Open\n", " \"Discord\"\n", "\n", - "Welcome to the Ultralytics YOLOv8 ๐Ÿš€ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", + "Welcome to the Ultralytics YOLO11 ๐Ÿš€ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n", "\n", - "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", + "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", "\n", - "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Tracking Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Tracking Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", "\n", "" ] @@ -38,7 +38,7 @@ "\n", "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n", "\n", - "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" + "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" ] }, { @@ -56,7 +56,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Ultralytics YOLOv8.2.17 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", + "Ultralytics 8.2.17 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", "Setup complete โœ… (2 CPUs, 12.7 GB RAM, 29.8/78.2 GB disk)\n" ] } @@ -76,7 +76,7 @@ "source": [ "# Ultralytics Object Tracking\n", "\n", - "[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike semantic segmentation, it uniquely labels and precisely delineates each object, crucial for tasks like object detection and medical imaging.\n", + "[Ultralytics YOLO11](https://github.com/ultralytics/ultralytics/) instance segmentation involves identifying and outlining individual objects in an image, providing a detailed understanding of spatial distribution. Unlike semantic segmentation, it uniquely labels and precisely delineates each object, crucial for tasks like object detection and medical imaging.\n", "\n", "There are two types of instance segmentation tracking available in the Ultralytics package:\n", "\n", @@ -144,7 +144,7 @@ "track_history = defaultdict(lambda: [])\n", "\n", "# Load the YOLO model with segmentation capabilities\n", - "model = YOLO(\"yolov8n-seg.pt\")\n", + "model = YOLO(\"yolo11n-seg.pt\")\n", "\n", "# Open the video file\n", "cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n", @@ -176,7 +176,7 @@ "\n", " # Annotate each mask with its corresponding tracking ID and color\n", " for mask, track_id in zip(masks, track_ids):\n", - " annotator.seg_bbox(mask=mask, mask_color=colors(track_id, True), track_label=str(track_id))\n", + " annotator.seg_bbox(mask=mask, mask_color=colors(int(track_id), True), label=str(track_id))\n", "\n", " # Write the annotated frame to the output video\n", " out.write(im0)\n", @@ -214,15 +214,15 @@ "- [About Us](https://ultralytics.com/about): Discover our mission, vision, and the story behind Ultralytics.\n", "- [Join Our Team](https://ultralytics.com/work): Explore career opportunities and join our team of talented professionals.\n", "\n", - "## YOLOv8 ๐Ÿš€ Resources\n", + "## YOLO11 ๐Ÿš€ Resources\n", "\n", - "YOLOv8 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLOv8:\n", + "YOLO11 is the latest evolution in the YOLO series, offering state-of-the-art performance in object detection and image segmentation. Here are some essential resources to help you get started with YOLO11:\n", "\n", - "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLOv8 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n", - "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLOv8, including installation guides, tutorials, and detailed API references.\n", + "- [GitHub](https://github.com/ultralytics/ultralytics): Access the YOLO11 repository on GitHub, where you can find the source code, contribute to the project, and report issues.\n", + "- [Docs](https://docs.ultralytics.com/): Explore the official documentation for YOLO11, including installation guides, tutorials, and detailed API references.\n", "- [Discord](https://ultralytics.com/discord): Join our Discord community to connect with other users, share your projects, and get help from the Ultralytics team.\n", "\n", - "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLOv8. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed." + "These resources are designed to help you leverage the full potential of Ultralytics' offerings and YOLO11. Whether you're a beginner or an experienced developer, you'll find the information and support you need to succeed." ] } ], diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 1ecb2d98095..1d19aeee37d 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -3,7 +3,7 @@ "nbformat_minor": 0, "metadata": { "colab": { - "name": "YOLOv8 Tutorial", + "name": "YOLO11 Tutorial", "provenance": [], "toc_visible": true }, @@ -27,21 +27,28 @@ "\n", " [ไธญๆ–‡](https://docs.ultralytics.com/zh/) | [ํ•œ๊ตญ์–ด](https://docs.ultralytics.com/ko/) | [ๆ—ฅๆœฌ่ชž](https://docs.ultralytics.com/ja/) | [ะ ัƒััะบะธะน](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Franรงais](https://docs.ultralytics.com/fr/) | [Espaรฑol](https://docs.ultralytics.com/es/) | [Portuguรชs](https://docs.ultralytics.com/pt/) | [Tรผrkรงe](https://docs.ultralytics.com/tr/) | [Tiแบฟng Viแป‡t](https://docs.ultralytics.com/vi/) | [ุงู„ุนุฑุจูŠุฉ](https://docs.ultralytics.com/ar/)\n", "\n", - " \"Ultralytics\n", + " \"Ultralytics\n", " \"Run\n", " \"Open\n", - " \"Open\n", + " \"Open\n", "\n", " \"Discord\"\n", " \"Ultralytics\n", " \"Ultralytics\n", "\n", - "Welcome to the Ultralytics YOLOv8 ๐Ÿš€ notebook! YOLOv8 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n", + "Welcome to the Ultralytics YOLO11 ๐Ÿš€ notebook! YOLO11 is the latest version of the YOLO (You Only Look Once) AI models developed by Ultralytics. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLO11 and understand its features and capabilities.\n", "\n", - "YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", + "YOLO11 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n", "\n", - "We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", + "We hope that the resources in this notebook will help you get the most out of YOLO11. Please browse the YOLO11 Docs for details, raise an issue on GitHub for support, and join our Discord community for questions and discussions!\n", "\n", + " \n", + " \"Ultralytics\n", + " \n", + "

\n", + " Watch: How to Train\n", + " Ultralytics\n", + " YOLO11 Model on Custom Dataset using Google Colab Notebook ๐Ÿš€

\n", "" ] }, @@ -55,7 +62,7 @@ "\n", "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n", "\n", - "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" + "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)" ] }, { @@ -65,21 +72,21 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "96335d4c-20a9-4864-f7a4-bb2eb0077a9d" + "outputId": "2e992f9f-90bb-4668-de12-fed629975285" }, "source": [ "%pip install ultralytics\n", "import ultralytics\n", "ultralytics.checks()" ], - "execution_count": null, + "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Ultralytics YOLOv8.2.3 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", - "Setup complete โœ… (2 CPUs, 12.7 GB RAM, 28.8/78.2 GB disk)\n" + "Ultralytics 8.3.2 ๐Ÿš€ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n", + "Setup complete โœ… (2 CPUs, 12.7 GB RAM, 41.1/112.6 GB disk)\n" ] } ] @@ -92,7 +99,7 @@ "source": [ "# 1. Predict\n", "\n", - "YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLOv8 Predict Docs](https://docs.ultralytics.com/modes/train/).\n" + "YOLO11 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLO11 Predict Docs](https://docs.ultralytics.com/modes/train/).\n" ] }, { @@ -102,27 +109,27 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "84f32db2-80b0-4f35-9a2a-a56d11f7863f" + "outputId": "e3ebec6f-658a-4803-d80c-e07d12908767" }, "source": [ - "# Run inference on an image with YOLOv8n\n", - "!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'" + "# Run inference on an image with YOLO11n\n", + "!yolo predict model=yolo11n.pt source='https://ultralytics.com/images/zidane.jpg'" ], - "execution_count": null, + "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...\n", - "100% 6.23M/6.23M [00:00<00:00, 83.2MB/s]\n", - "Ultralytics YOLOv8.2.3 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", - "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs\n", + "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...\n", + "100% 5.35M/5.35M [00:00<00:00, 72.7MB/s]\n", + "Ultralytics 8.3.2 ๐Ÿš€ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n", + "YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n", "\n", "Downloading https://ultralytics.com/images/zidane.jpg to 'zidane.jpg'...\n", - "100% 165k/165k [00:00<00:00, 11.1MB/s]\n", - "image 1/1 /content/zidane.jpg: 384x640 2 persons, 1 tie, 21.4ms\n", - "Speed: 1.9ms preprocess, 21.4ms inference, 6.2ms postprocess per image at shape (1, 3, 384, 640)\n", + "100% 49.2k/49.2k [00:00<00:00, 5.37MB/s]\n", + "image 1/1 /content/zidane.jpg: 384x640 2 persons, 1 tie, 63.4ms\n", + "Speed: 14.5ms preprocess, 63.4ms inference, 820.9ms postprocess per image at shape (1, 3, 384, 640)\n", "Results saved to \u001b[1mruns/detect/predict\u001b[0m\n", "๐Ÿ’ก Learn more at https://docs.ultralytics.com/modes/predict\n" ] @@ -146,7 +153,7 @@ }, "source": [ "# 2. Val\n", - "Validate a model's accuracy on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLOv8 Val Docs](https://docs.ultralytics.com/modes/val/) for more information." + "Validate a model's accuracy on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset's `val` or `test` splits. The latest YOLO11 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLO11 Val Docs](https://docs.ultralytics.com/modes/val/) for more information." ] }, { @@ -167,43 +174,43 @@ "cell_type": "code", "metadata": { "id": "X58w8JLpMnjH", - "outputId": "bed10d45-ceb6-4b6f-86b7-9428208b142a", + "outputId": "af2a5deb-029b-466d-96a4-bd3e406987fa", "colab": { "base_uri": "https://localhost:8080/" } }, "source": [ - "# Validate YOLOv8n on COCO8 val\n", - "!yolo val model=yolov8n.pt data=coco8.yaml" + "# Validate YOLO11n on COCO8 val\n", + "!yolo val model=yolo11n.pt data=coco8.yaml" ], - "execution_count": null, + "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Ultralytics YOLOv8.2.3 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", - "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs\n", + "Ultralytics 8.3.2 ๐Ÿš€ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n", + "YOLO11n summary (fused): 238 layers, 2,616,248 parameters, 0 gradients, 6.5 GFLOPs\n", "\n", "Dataset 'coco8.yaml' images not found โš ๏ธ, missing path '/content/datasets/coco8/images/val'\n", "Downloading https://ultralytics.com/assets/coco8.zip to '/content/datasets/coco8.zip'...\n", - "100% 433k/433k [00:00<00:00, 14.2MB/s]\n", - "Unzipping /content/datasets/coco8.zip to /content/datasets/coco8...: 100% 25/25 [00:00<00:00, 1093.93file/s]\n", - "Dataset download success โœ… (1.3s), saved to \u001b[1m/content/datasets\u001b[0m\n", + "100% 433k/433k [00:00<00:00, 15.8MB/s]\n", + "Unzipping /content/datasets/coco8.zip to /content/datasets/coco8...: 100% 25/25 [00:00<00:00, 1188.35file/s]\n", + "Dataset download success โœ… (1.4s), saved to \u001b[1m/content/datasets\u001b[0m\n", "\n", "Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...\n", - "100% 755k/755k [00:00<00:00, 17.4MB/s]\n", - "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 157.00it/s]\n", + "100% 755k/755k [00:00<00:00, 17.7MB/s]\n", + "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco8/labels/val... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 142.04it/s]\n", "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco8/labels/val.cache\n", - " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:06<00:00, 6.89s/it]\n", - " all 4 17 0.621 0.833 0.888 0.63\n", - " person 4 10 0.721 0.5 0.519 0.269\n", - " dog 4 1 0.37 1 0.995 0.597\n", - " horse 4 2 0.751 1 0.995 0.631\n", - " elephant 4 2 0.505 0.5 0.828 0.394\n", - " umbrella 4 1 0.564 1 0.995 0.995\n", - " potted plant 4 1 0.814 1 0.995 0.895\n", - "Speed: 0.3ms preprocess, 4.9ms inference, 0.0ms loss, 1.3ms postprocess per image\n", + " Class Images Instances Box(P R mAP50 mAP50-95): 100% 1/1 [00:04<00:00, 4.75s/it]\n", + " all 4 17 0.57 0.85 0.847 0.632\n", + " person 3 10 0.557 0.6 0.585 0.272\n", + " dog 1 1 0.548 1 0.995 0.697\n", + " horse 1 2 0.531 1 0.995 0.674\n", + " elephant 1 2 0.371 0.5 0.516 0.256\n", + " umbrella 1 1 0.569 1 0.995 0.995\n", + " potted plant 1 1 0.847 1 0.995 0.895\n", + "Speed: 1.0ms preprocess, 73.8ms inference, 0.0ms loss, 561.4ms postprocess per image\n", "Results saved to \u001b[1mruns/detect/val\u001b[0m\n", "๐Ÿ’ก Learn more at https://docs.ultralytics.com/modes/val\n" ] @@ -220,13 +227,13 @@ "\n", "

\n", "\n", - "Train YOLOv8 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLOv8 Train Docs](https://docs.ultralytics.com/modes/train/) for more information." + "Train YOLO11 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLO11 Train Docs](https://docs.ultralytics.com/modes/train/) for more information." ] }, { "cell_type": "code", "source": [ - "#@title Select YOLOv8 ๐Ÿš€ logger {run: 'auto'}\n", + "#@title Select YOLO11 ๐Ÿš€ logger {run: 'auto'}\n", "logger = 'Comet' #@param ['Comet', 'TensorBoard']\n", "\n", "if logger == 'Comet':\n", @@ -246,64 +253,62 @@ "cell_type": "code", "metadata": { "id": "1NcFxRcFdJ_O", - "outputId": "9f60c6cb-fa9c-4785-cb7a-71d40abeaf38", + "outputId": "952f35f7-666f-4121-fbdf-2b3a33b28081", "colab": { "base_uri": "https://localhost:8080/" } }, "source": [ - "# Train YOLOv8n on COCO8 for 3 epochs\n", - "!yolo train model=yolov8n.pt data=coco8.yaml epochs=3 imgsz=640" + "# Train YOLO11n on COCO8 for 3 epochs\n", + "!yolo train model=yolo11n.pt data=coco8.yaml epochs=3 imgsz=640" ], - "execution_count": null, + "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Ultralytics YOLOv8.2.3 ๐Ÿš€ Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (T4, 15102MiB)\n", - "\u001b[34m\u001b[1mengine/trainer: \u001b[0mtask=detect, mode=train, model=yolov8n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train\n", + "Ultralytics 8.3.2 ๐Ÿš€ Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla T4, 15102MiB)\n", + "\u001b[34m\u001b[1mengine/trainer: \u001b[0mtask=detect, mode=train, model=yolo11n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=True, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, copy_paste_mode=flip, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train3\n", "\n", " from n params module arguments \n", " 0 -1 1 464 ultralytics.nn.modules.conv.Conv [3, 16, 3, 2] \n", " 1 -1 1 4672 ultralytics.nn.modules.conv.Conv [16, 32, 3, 2] \n", - " 2 -1 1 7360 ultralytics.nn.modules.block.C2f [32, 32, 1, True] \n", - " 3 -1 1 18560 ultralytics.nn.modules.conv.Conv [32, 64, 3, 2] \n", - " 4 -1 2 49664 ultralytics.nn.modules.block.C2f [64, 64, 2, True] \n", - " 5 -1 1 73984 ultralytics.nn.modules.conv.Conv [64, 128, 3, 2] \n", - " 6 -1 2 197632 ultralytics.nn.modules.block.C2f [128, 128, 2, True] \n", + " 2 -1 1 6640 ultralytics.nn.modules.block.C3k2 [32, 64, 1, False, 0.25] \n", + " 3 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n", + " 4 -1 1 26080 ultralytics.nn.modules.block.C3k2 [64, 128, 1, False, 0.25] \n", + " 5 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n", + " 6 -1 1 87040 ultralytics.nn.modules.block.C3k2 [128, 128, 1, True] \n", " 7 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] \n", - " 8 -1 1 460288 ultralytics.nn.modules.block.C2f [256, 256, 1, True] \n", + " 8 -1 1 346112 ultralytics.nn.modules.block.C3k2 [256, 256, 1, True] \n", " 9 -1 1 164608 ultralytics.nn.modules.block.SPPF [256, 256, 5] \n", - " 10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", - " 11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", - " 12 -1 1 148224 ultralytics.nn.modules.block.C2f [384, 128, 1] \n", - " 13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", - " 14 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", - " 15 -1 1 37248 ultralytics.nn.modules.block.C2f [192, 64, 1] \n", - " 16 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n", - " 17 [-1, 12] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", - " 18 -1 1 123648 ultralytics.nn.modules.block.C2f [192, 128, 1] \n", - " 19 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n", - " 20 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", - " 21 -1 1 493056 ultralytics.nn.modules.block.C2f [384, 256, 1] \n", - " 22 [15, 18, 21] 1 897664 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]] \n", - "Model summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs\n", + " 10 -1 1 249728 ultralytics.nn.modules.block.C2PSA [256, 256, 1] \n", + " 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", + " 12 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 13 -1 1 111296 ultralytics.nn.modules.block.C3k2 [384, 128, 1, False] \n", + " 14 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n", + " 15 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 16 -1 1 32096 ultralytics.nn.modules.block.C3k2 [256, 64, 1, False] \n", + " 17 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] \n", + " 18 [-1, 13] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 19 -1 1 86720 ultralytics.nn.modules.block.C3k2 [192, 128, 1, False] \n", + " 20 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] \n", + " 21 [-1, 10] 1 0 ultralytics.nn.modules.conv.Concat [1] \n", + " 22 -1 1 378880 ultralytics.nn.modules.block.C3k2 [384, 256, 1, True] \n", + " 23 [16, 19, 22] 1 464912 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]] \n", + "YOLO11n summary: 319 layers, 2,624,080 parameters, 2,624,064 gradients, 6.6 GFLOPs\n", "\n", - "Transferred 355/355 items from pretrained weights\n", + "Transferred 499/499 items from pretrained weights\n", "\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/detect/train', view at http://localhost:6006/\n", - "Freezing layer 'model.22.dfl.conv.weight'\n", - "\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...\n", + "Freezing layer 'model.23.dfl.conv.weight'\n", + "\u001b[34m\u001b[1mAMP: \u001b[0mrunning Automatic Mixed Precision (AMP) checks with YOLO11n...\n", "\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed โœ…\n", - "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco8/labels/train... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00<00:00, 837.19it/s]\n", - "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco8/labels/train.cache\n", - "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n", - "/usr/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", + "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100% 4/4 [00:00\n" ], @@ -463,7 +471,7 @@ "source": [ "## 1. Detection\n", "\n", - "YOLOv8 _detection_ models have no suffix and are the default YOLOv8 models, i.e. `yolov8n.pt` and are pretrained on COCO. See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for full details.\n" + "YOLO11 _detection_ models have no suffix and are the default YOLO11 models, i.e. `yolo11n.pt` and are pretrained on COCO. See [Detection Docs](https://docs.ultralytics.com/tasks/detect/) for full details.\n" ], "metadata": { "id": "yq26lwpYK1lq" @@ -472,10 +480,10 @@ { "cell_type": "code", "source": [ - "# Load YOLOv8n, train it on COCO128 for 3 epochs and predict an image with it\n", + "# Load YOLO11n, train it on COCO128 for 3 epochs and predict an image with it\n", "from ultralytics import YOLO\n", "\n", - "model = YOLO('yolov8n.pt') # load a pretrained YOLOv8n detection model\n", + "model = YOLO('yolo11n.pt') # load a pretrained YOLO detection model\n", "model.train(data='coco8.yaml', epochs=3) # train the model\n", "model('https://ultralytics.com/images/bus.jpg') # predict on an image" ], @@ -490,7 +498,7 @@ "source": [ "## 2. Segmentation\n", "\n", - "YOLOv8 _segmentation_ models use the `-seg` suffix, i.e. `yolov8n-seg.pt` and are pretrained on COCO. See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for full details.\n" + "YOLO11 _segmentation_ models use the `-seg` suffix, i.e. `yolo11n-seg.pt` and are pretrained on COCO. See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for full details.\n" ], "metadata": { "id": "7ZW58jUzK66B" @@ -499,10 +507,10 @@ { "cell_type": "code", "source": [ - "# Load YOLOv8n-seg, train it on COCO128-seg for 3 epochs and predict an image with it\n", + "# Load YOLO11n-seg, train it on COCO128-seg for 3 epochs and predict an image with it\n", "from ultralytics import YOLO\n", "\n", - "model = YOLO('yolov8n-seg.pt') # load a pretrained YOLOv8n segmentation model\n", + "model = YOLO('yolo11n-seg.pt') # load a pretrained YOLO segmentation model\n", "model.train(data='coco8-seg.yaml', epochs=3) # train the model\n", "model('https://ultralytics.com/images/bus.jpg') # predict on an image" ], @@ -517,7 +525,7 @@ "source": [ "## 3. Classification\n", "\n", - "YOLOv8 _classification_ models use the `-cls` suffix, i.e. `yolov8n-cls.pt` and are pretrained on ImageNet. See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for full details.\n" + "YOLO11 _classification_ models use the `-cls` suffix, i.e. `yolo11n-cls.pt` and are pretrained on ImageNet. See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for full details.\n" ], "metadata": { "id": "ax3p94VNK9zR" @@ -526,10 +534,10 @@ { "cell_type": "code", "source": [ - "# Load YOLOv8n-cls, train it on mnist160 for 3 epochs and predict an image with it\n", + "# Load YOLO11n-cls, train it on mnist160 for 3 epochs and predict an image with it\n", "from ultralytics import YOLO\n", "\n", - "model = YOLO('yolov8n-cls.pt') # load a pretrained YOLOv8n classification model\n", + "model = YOLO('yolo11n-cls.pt') # load a pretrained YOLO classification model\n", "model.train(data='mnist160', epochs=3) # train the model\n", "model('https://ultralytics.com/images/bus.jpg') # predict on an image" ], @@ -544,7 +552,7 @@ "source": [ "## 4. Pose\n", "\n", - "YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details." + "YOLO11 _pose_ models use the `-pose` suffix, i.e. `yolo11n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details." ], "metadata": { "id": "SpIaFLiO11TG" @@ -553,10 +561,10 @@ { "cell_type": "code", "source": [ - "# Load YOLOv8n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n", + "# Load YOLO11n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n", "from ultralytics import YOLO\n", "\n", - "model = YOLO('yolov8n-pose.pt') # load a pretrained YOLOv8n pose model\n", + "model = YOLO('yolo11n-pose.pt') # load a pretrained YOLO pose model\n", "model.train(data='coco8-pose.yaml', epochs=3) # train the model\n", "model('https://ultralytics.com/images/bus.jpg') # predict on an image" ], @@ -571,7 +579,7 @@ "source": [ "## 4. Oriented Bounding Boxes (OBB)\n", "\n", - "YOLOv8 _OBB_ models use the `-obb` suffix, i.e. `yolov8n-obb.pt` and are pretrained on the DOTA dataset. See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for full details." + "YOLO11 _OBB_ models use the `-obb` suffix, i.e. `yolo11n-obb.pt` and are pretrained on the DOTA dataset. See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for full details." ], "metadata": { "id": "cf5j_T9-B5F0" @@ -580,12 +588,12 @@ { "cell_type": "code", "source": [ - "# Load YOLOv8n-obb, train it on DOTA8 for 3 epochs and predict an image with it\n", + "# Load YOLO11n-obb, train it on DOTA8 for 3 epochs and predict an image with it\n", "from ultralytics import YOLO\n", "\n", - "model = YOLO('yolov8n-obb.pt') # load a pretrained YOLOv8n OBB model\n", - "model.train(data='coco8-dota.yaml', epochs=3) # train the model\n", - "model('https://ultralytics.com/images/bus.jpg') # predict on an image" + "model = YOLO('yolo11n-obb.pt') # load a pretrained YOLO OBB model\n", + "model.train(data='dota8.yaml', epochs=3) # train the model\n", + "model('https://ultralytics.com/images/boats.jpg') # predict on an image" ], "metadata": { "id": "IJNKClOOB5YS" @@ -646,7 +654,7 @@ "source": [ "# Validate multiple models\n", "for x in 'nsmlx':\n", - " !yolo val model=yolov8{x}.pt data=coco.yaml" + " !yolo val model=yolo11{x}.pt data=coco.yaml" ], "metadata": { "id": "Wdc6t_bfzDDk" diff --git a/mkdocs.yml b/mkdocs.yml index ee6a25d21b5..3e735dfb466 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license # Configuration file for building the Ultralytics YOLO documentation site using MkDocs. # Provides settings to control site metadata, customize the appearance using the @@ -15,6 +15,7 @@ repo_name: ultralytics/ultralytics remote_name: https://github.com/ultralytics/docs docs_dir: "docs/en/" # where to find the markdown files site_dir: "site/" # where to publish to +use_directory_urls: true # don't display 'index.html' in slugs # Theme customization theme: @@ -22,7 +23,7 @@ theme: language: en custom_dir: docs/overrides/ logo: https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Reverse.svg - favicon: assets/favicon.ico + favicon: https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/logo/favicon-yolo.png icon: repo: fontawesome/brands/github # font: # disabled for faster page load times @@ -69,8 +70,9 @@ theme: - content.tabs.link # all code tabs change simultaneously # Customization -copyright: ยฉ 2024 Ultralytics Inc. All rights reserved. +copyright: ยฉ 2025 Ultralytics Inc. All rights reserved. extra: # version: + homepage: https://www.ultralytics.com/ # provider: mike # version drop-down menu robots: robots.txt analytics: @@ -90,14 +92,16 @@ extra: # version: - icon: fontawesome/brands/python link: https://pypi.org/project/ultralytics/ - icon: fontawesome/brands/discord - link: https://ultralytics.com/discord + link: https://discord.com/invite/ultralytics - icon: fontawesome/brands/reddit link: https://reddit.com/r/ultralytics extra_css: - stylesheets/style.css + extra_javascript: - javascript/extra.js + - javascript/giscus.js markdown_extensions: - admonition @@ -135,6 +139,10 @@ validation: unrecognized_links: warn # Primary navigation --------------------------------------------------------------------------------------------------- +not_in_nav: | + /compare + /macros + nav: - Home: - Home: index.md @@ -162,9 +170,7 @@ nav: - solutions/index.md - Guides: - guides/index.md - - Explorer: - - datasets/explorer/index.md - - NEW ๐Ÿš€ Live Inference: guides/streamlit-live-inference.md # for promotion of new pages + - YOLO11 ๐Ÿš€ NEW: models/yolo11.md # for promotion of new pages - Languages: - ๐Ÿ‡ฌ๐Ÿ‡ง  English: https://ultralytics.com/docs/ - ๐Ÿ‡จ๐Ÿ‡ณ  ็ฎ€ไฝ“ไธญๆ–‡: https://docs.ultralytics.com/zh/ @@ -251,7 +257,7 @@ nav: - YOLOv8: models/yolov8.md - YOLOv9: models/yolov9.md - YOLOv10: models/yolov10.md - - YOLO11: models/yolo11.md + - YOLO11 ๐Ÿš€ NEW: models/yolo11.md - SAM (Segment Anything Model): models/sam.md - SAM 2 (Segment Anything Model 2): models/sam-2.md - MobileSAM (Mobile Segment Anything Model): models/mobile-sam.md @@ -261,11 +267,6 @@ nav: - YOLO-World (Real-Time Open-Vocabulary Object Detection): models/yolo-world.md - Datasets: - datasets/index.md - - Explorer: - - datasets/explorer/index.md - - Explorer API: datasets/explorer/api.md - - Explorer Dashboard: datasets/explorer/dashboard.md - - VOC Exploration Example: datasets/explorer/explorer.ipynb - Detection: - datasets/detect/index.md - Argoverse: datasets/detect/argoverse.md @@ -279,10 +280,11 @@ nav: - VisDrone: datasets/detect/visdrone.md - VOC: datasets/detect/voc.md - xView: datasets/detect/xview.md - - Roboflow 100: datasets/detect/roboflow-100.md + - RF100: datasets/detect/roboflow-100.md - Brain-tumor: datasets/detect/brain-tumor.md - African-wildlife: datasets/detect/african-wildlife.md - Signature: datasets/detect/signature.md + - Medical-pills: datasets/detect/medical-pills.md - Segmentation: - datasets/segment/index.md - COCO: datasets/segment/coco.md @@ -296,6 +298,7 @@ nav: - COCO8-pose: datasets/pose/coco8-pose.md - Tiger-pose: datasets/pose/tiger-pose.md - Hand-keypoints: datasets/pose/hand-keypoints.md + - Dog-pose: datasets/pose/dog-pose.md - Classification: - datasets/classify/index.md - Caltech 101: datasets/classify/caltech101.md @@ -314,9 +317,8 @@ nav: - DOTA8: datasets/obb/dota8.md - Multi-Object Tracking: - datasets/track/index.md - - NEW ๐Ÿš€ Solutions: + - Solutions ๐Ÿš€ NEW: - solutions/index.md - - Analytics: guides/analytics.md - Object Counting: guides/object-counting.md - Object Cropping: guides/object-cropping.md - Object Blurring: guides/object-blurring.md @@ -330,7 +332,9 @@ nav: - Distance Calculation: guides/distance-calculation.md - Queue Management: guides/queue-management.md - Parking Management: guides/parking-management.md - - NEW ๐Ÿš€ Live Inference: guides/streamlit-live-inference.md + - Analytics: guides/analytics.md + - Live Inference: guides/streamlit-live-inference.md + - Track Objects in Zone ๐Ÿš€ NEW: guides/trackzone.md - Guides: - guides/index.md - YOLO Common Issues: guides/yolo-common-issues.md @@ -364,7 +368,7 @@ nav: - datasets/explorer/index.md - Explorer API: datasets/explorer/api.md - Explorer Dashboard Demo: datasets/explorer/dashboard.md - - VOC Exploration Example: datasets/explorer/explorer.ipynb + - VOC Exploration Example: datasets/explorer/explorer.md - YOLOv5: - yolov5/index.md - Quickstart: yolov5/quickstart_tutorial.md @@ -391,35 +395,40 @@ nav: - Clearml Logging: yolov5/tutorials/clearml_logging_integration.md - Integrations: - integrations/index.md - - TorchScript: integrations/torchscript.md + - Amazon SageMaker: integrations/amazon-sagemaker.md + - ClearML: integrations/clearml.md + - Comet ML: integrations/comet.md + - CoreML: integrations/coreml.md + - DVC: integrations/dvc.md + - Google Colab: integrations/google-colab.md + - Gradio: integrations/gradio.md + - IBM Watsonx: integrations/ibm-watsonx.md + - JupyterLab: integrations/jupyterlab.md + - Kaggle: integrations/kaggle.md + - MLflow: integrations/mlflow.md + - Neural Magic: integrations/neural-magic.md - ONNX: integrations/onnx.md - OpenVINO: integrations/openvino.md - - TensorRT: integrations/tensorrt.md - - CoreML: integrations/coreml.md - - TF SavedModel: integrations/tf-savedmodel.md - - TF GraphDef: integrations/tf-graphdef.md - - TFLite: integrations/tflite.md - - TFLite Edge TPU: integrations/edge-tpu.md - - TF.js: integrations/tfjs.md - PaddlePaddle: integrations/paddlepaddle.md + - MNN: integrations/mnn.md - NCNN: integrations/ncnn.md - - Comet ML: integrations/comet.md + - Paperspace Gradient: integrations/paperspace.md - Ray Tune: integrations/ray-tune.md - Roboflow: integrations/roboflow.md - - MLflow: integrations/mlflow.md - - ClearML: integrations/clearml.md - - DVC: integrations/dvc.md - - Weights & Biases: integrations/weights-biases.md - - Neural Magic: integrations/neural-magic.md - - Gradio: integrations/gradio.md + - TF GraphDef: integrations/tf-graphdef.md + - TF SavedModel: integrations/tf-savedmodel.md + - TF.js: integrations/tfjs.md + - TFLite: integrations/tflite.md + - TFLite Edge TPU: integrations/edge-tpu.md - TensorBoard: integrations/tensorboard.md - - Amazon SageMaker: integrations/amazon-sagemaker.md - - Paperspace Gradient: integrations/paperspace.md - - Google Colab: integrations/google-colab.md - - Kaggle: integrations/kaggle.md - - JupyterLab: integrations/jupyterlab.md - - IBM Watsonx: integrations/ibm-watsonx.md + - TensorRT: integrations/tensorrt.md + - TorchScript: integrations/torchscript.md - VS Code: integrations/vscode.md + - Weights & Biases: integrations/weights-biases.md + - Albumentations: integrations/albumentations.md + - SONY IMX500: integrations/sony-imx500.md + - Rockchip RKNN: integrations/rockchip-rknn.md + - Seeed Studio reCamera: integrations/seeedstudio-recamera.md - HUB: - hub/index.md - Web: @@ -475,11 +484,6 @@ nav: - build: reference/data/build.md - converter: reference/data/converter.md - dataset: reference/data/dataset.md - - explorer: - - explorer: reference/data/explorer/explorer.md - - gui: - - dash: reference/data/explorer/gui/dash.md - - utils: reference/data/explorer/utils.md - loaders: reference/data/loaders.md - split_dota: reference/data/split_dota.md - utils: reference/data/utils.md @@ -573,8 +577,12 @@ nav: - object_counter: reference/solutions/object_counter.md - parking_management: reference/solutions/parking_management.md - queue_management: reference/solutions/queue_management.md + - region_counter: reference/solutions/region_counter.md + - security_alarm: reference/solutions/security_alarm.md + - solutions: reference/solutions/solutions.md - speed_estimation: reference/solutions/speed_estimation.md - streamlit_inference: reference/solutions/streamlit_inference.md + - trackzone: reference/solutions/trackzone.md - trackers: - basetrack: reference/trackers/basetrack.md - bot_sort: reference/trackers/bot_sort.md @@ -621,8 +629,8 @@ nav: - Contributing Guide: help/contributing.md - Continuous Integration (CI) Guide: help/CI.md - Contributor License Agreement (CLA): help/CLA.md - - Minimum Reproducible Example (MRE) Guide: help/minimum_reproducible_example.md - - Code of Conduct: help/code_of_conduct.md + - Minimum Reproducible Example (MRE) Guide: help/minimum-reproducible-example.md + - Code of Conduct: help/code-of-conduct.md - Environmental, Health and Safety (EHS) Policy: help/environmental-health-safety.md - Security Policy: help/security.md - Privacy Policy: help/privacy.md @@ -630,8 +638,8 @@ nav: # Plugins including 301 redirects navigation --------------------------------------------------------------------------- plugins: - macros - - search: - lang: en + # - search: + # lang: en - mkdocstrings: enabled: true default_handler: python @@ -658,7 +666,7 @@ plugins: add_share_buttons: True add_css: False default_image: https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png - - mkdocs-jupyter + default_author: glenn.jocher@ultralytics.com - redirects: redirect_maps: hi/index.md: index.md @@ -697,6 +705,8 @@ plugins: tasks/keypoints.md: tasks/pose.md tasks/tracking.md: modes/track.md SECURITY.md: help/security.md + help/minimum_reproducible_example.md: help/minimum-reproducible-example.md + help/code_of_conduct.md: help/code-of-conduct.md tutorials/architecture-summary.md: yolov5/tutorials/architecture_description.md tutorials/clearml-logging.md: yolov5/tutorials/clearml_logging_integration.md tutorials/comet-logging.md: yolov5/tutorials/comet_logging_integration.md @@ -760,3 +770,6 @@ plugins: yolov5/environments/yolov5_amazon_web_services_quickstart_tutorial.md: yolov5/environments/aws_quickstart_tutorial.md yolov5/environments/yolov5_google_cloud_platform_quickstart_tutorial.md: yolov5/environments/google_cloud_quickstart_tutorial.md yolov5/environments/yolov5_docker_image_quickstart_tutorial.md: yolov5/environments/docker_image_quickstart_tutorial.md + reference/data/explorer/explorer.md: datasets/explorer/index.md + reference/data/explorer/gui/dash.md: datasets/explorer/index.md + reference/data/explorer/utils.md: datasets/explorer/index.md diff --git a/pyproject.toml b/pyproject.toml index a59b15ef04f..1d1cc60312d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license # Overview: # This pyproject.toml file manages the build, packaging, and distribution of the Ultralytics library. @@ -19,25 +19,24 @@ # For comprehensive documentation and usage instructions, visit: https://docs.ultralytics.com [build-system] -requires = ["setuptools>=57.0.0", "wheel"] +requires = ["setuptools>=70.0.0", "wheel"] build-backend = "setuptools.build_meta" # Project settings ----------------------------------------------------------------------------------------------------- [project] name = "ultralytics" dynamic = ["version"] -description = "Ultralytics YOLO for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification." +description = "Ultralytics YOLO ๐Ÿš€ for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification." readme = "README.md" requires-python = ">=3.8" license = { "text" = "AGPL-3.0" } keywords = ["machine-learning", "deep-learning", "computer-vision", "ML", "DL", "AI", "YOLO", "YOLOv3", "YOLOv5", "YOLOv8", "YOLOv9", "YOLOv10", "YOLO11", "HUB", "Ultralytics"] authors = [ - { name = "Glenn Jocher", email = "glenn.jocher@ultralytics.com"}, - { name = "Jing Qiu", email = "jing.qiu@ultralytics.com"}, - { name = "Ayush Chaurasia" } + { name = "Glenn Jocher", email = "glenn.jocher@ultralytics.com" }, + { name = "Jing Qiu", email = "jing.qiu@ultralytics.com" }, ] maintainers = [ - { name = "Ultralytics", email = "hello@ultralytics.com" } + { name = "Ultralytics", email = "hello@ultralytics.com" }, ] classifiers = [ "Development Status :: 4 - Beta", @@ -62,7 +61,7 @@ classifiers = [ # Required dependencies ------------------------------------------------------------------------------------------------ dependencies = [ - "numpy>=1.23.0,<2.0.0", # temporary patch for compat errors https://github.com/ultralytics/yolov5/actions/runs/9538130424/job/26286956354 + "numpy>=1.23.0,<=2.1.1", # OpenVINO and TFLite errors on '--slow' CI Tests https://github.com/ultralytics/ultralytics/pull/18943 "matplotlib>=3.3.0", "opencv-python>=4.6.0", "pillow>=7.1.2", @@ -70,7 +69,7 @@ dependencies = [ "requests>=2.23.0", "scipy>=1.4.1", "torch>=1.8.0", - "torch>=1.8.0,!=2.4.0; sys_platform == 'win32'", # Windows CPU errors w/ 2.4.0 https://github.com/ultralytics/ultralytics/issues/15049 + "torch>=1.8.0,!=2.4.0; sys_platform == 'win32'", # Windows CPU errors w/ 2.4.0 https://github.com/ultralytics/ultralytics/issues/15049 "torchvision>=0.9.0", "tqdm>=4.64.0", # progress bars "psutil", # system utilization @@ -88,29 +87,28 @@ dev = [ "pytest-cov", "coverage[toml]", "mkdocs>=1.6.0", + "beautifulsoup4<=4.12.3", # For docs https://github.com/ultralytics/ultralytics/pull/19067 "mkdocs-material>=9.5.9", "mkdocstrings[python]", - "mkdocs-jupyter", # notebooks "mkdocs-redirects", # 301 redirects - "mkdocs-ultralytics-plugin>=0.1.8", # for meta descriptions and images, dates and authors + "mkdocs-ultralytics-plugin>=0.1.16", # for meta descriptions and images, dates and authors "mkdocs-macros-plugin>=1.0.5" # duplicating content (i.e. export tables) in multiple places ] export = [ "onnx>=1.12.0", # ONNX export "coremltools>=7.0; platform_system != 'Windows' and python_version <= '3.11'", # CoreML supported on macOS and Linux + "scikit-learn>=1.3.2; platform_system != 'Windows' and python_version <= '3.11'", # CoreML k-means quantization "openvino>=2024.0.0", # OpenVINO export "tensorflow>=2.0.0", # TF bug https://github.com/ultralytics/ultralytics/issues/5161 "tensorflowjs>=3.9.0", # TF.js export, automatically installs tensorflow - "tensorstore>=0.1.63; platform_machine == 'aarch64' and python_version >= '3.9'", # for TF Raspberry Pi exports - "keras", # not installed automatically by tensorflow>=2.16 + "tensorstore>=0.1.63; platform_machine == 'aarch64' and python_version >= '3.9'", # for TF Raspberry Pi exports + "keras", # not installed automatically by tensorflow>=2.16 "flatbuffers>=23.5.26,<100; platform_machine == 'aarch64'", # update old 'flatbuffers' included inside tensorflow package - "numpy==1.23.5; platform_machine == 'aarch64'", # fix error: `np.bool` was a deprecated alias for the builtin `bool` when using TensorRT models on NVIDIA Jetson "h5py!=3.11.0; platform_machine == 'aarch64'", # fix h5py build issues due to missing aarch64 wheels in 3.11 release ] -explorer = [ - "lancedb", # vector search - "duckdb<=0.9.2", # SQL queries, duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181 - "streamlit", # visualizing with GUI +solutions = [ + "shapely>=2.0.0", # shapely for point and polygon data matching + "streamlit", # for live inference on web browser i.e `yolo streamlit-predict` ] logging = [ "comet", # https://docs.ultralytics.com/integrations/comet/ @@ -129,7 +127,7 @@ extra = [ "Source" = "https://github.com/ultralytics/ultralytics" "Documentation" = "https://docs.ultralytics.com" "Bug Reports" = "https://github.com/ultralytics/ultralytics/issues" -"Changelog" = "https://github.com/ultralytics/ultralytics/releases" +"Changelog" = "https://github.com/ultralytics/ultralytics/releases" [project.scripts] yolo = "ultralytics.cfg:entrypoint" diff --git a/tests/__init__.py b/tests/__init__.py index ea8afff5a80..9e86aa3c593 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import ASSETS, ROOT, WEIGHTS_DIR, checks @@ -17,7 +17,6 @@ "SOURCE", "SOURCES_LIST", "TMP", - "IS_TMP_WRITEABLE", "CUDA_IS_AVAILABLE", "CUDA_DEVICE_COUNT", ) diff --git a/tests/conftest.py b/tests/conftest.py index 7b0539b467f..8703d81fce7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import shutil from pathlib import Path @@ -74,10 +74,10 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config): # Remove files models = [path for x in ["*.onnx", "*.torchscript"] for path in WEIGHTS_DIR.rglob(x)] - for file in ["bus.jpg", "yolo11n.onnx", "yolo11n.torchscript"] + models: + for file in ["decelera_portrait_min.mov", "bus.jpg", "yolo11n.onnx", "yolo11n.torchscript"] + models: Path(file).unlink(missing_ok=True) # Remove directories models = [path for x in ["*.mlpackage", "*_openvino_model"] for path in WEIGHTS_DIR.rglob(x)] - for directory in [TMP.parents[1] / ".pytest_cache", TMP] + models: + for directory in [WEIGHTS_DIR / "path with spaces", TMP.parents[1] / ".pytest_cache", TMP] + models: shutil.rmtree(directory, ignore_errors=True) diff --git a/tests/test_cli.py b/tests/test_cli.py index 3eadf3c24e3..aab6d8b4ac7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import subprocess @@ -59,7 +59,8 @@ def test_rtdetr(task="detect", model="yolov8n-rtdetr.yaml", data="coco8.yaml"): run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk fraction=0.25") run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt") if TORCH_1_9: - run(f"yolo predict {task} model='rtdetr-l.pt' source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt") + weights = WEIGHTS_DIR / "rtdetr-l.pt" + run(f"yolo predict {task} model={weights} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt") @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="MobileSAM with CLIP is not supported in Python 3.12") @@ -97,9 +98,12 @@ def test_mobilesam(): # Source source = ASSETS / "zidane.jpg" - # Predict a segment based on a point prompt + # Predict a segment based on a 1D point prompt and 1D labels. model.predict(source, points=[900, 370], labels=[1]) + # Predict a segment based on 3D points and 2D labels (multiple points per object). + model.predict(source, points=[[[900, 370], [1000, 100]]], labels=[[1, 1]]) + # Predict a segment based on a box prompt model.predict(source, bboxes=[439, 437, 524, 709], save=True) diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 0b3429d0565..d94f95bd06b 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from itertools import product from pathlib import Path @@ -10,6 +10,7 @@ from ultralytics import YOLO from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS from ultralytics.utils import ASSETS, WEIGHTS_DIR +from ultralytics.utils.checks import check_amp def test_checks(): @@ -18,6 +19,13 @@ def test_checks(): assert torch.cuda.device_count() == CUDA_DEVICE_COUNT +@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") +def test_amp(): + """Test AMP training checks.""" + model = YOLO("yolo11n.pt").model.cuda() + assert check_amp(model) + + @pytest.mark.slow @pytest.mark.skipif(True, reason="CUDA export tests disabled pending additional Ultralytics GPU server availability") @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available") @@ -108,7 +116,7 @@ def test_predict_sam(): from ultralytics.models.sam import Predictor as SAMPredictor # Load a model - model = SAM(WEIGHTS_DIR / "sam_b.pt") + model = SAM(WEIGHTS_DIR / "sam2.1_b.pt") # Display model information (optional) model.info() @@ -119,9 +127,21 @@ def test_predict_sam(): # Run inference with bboxes prompt model(SOURCE, bboxes=[439, 437, 524, 709], device=0) - # Run inference with points prompt + # Run inference with no labels + model(ASSETS / "zidane.jpg", points=[900, 370], device=0) + + # Run inference with 1D points and 1D labels model(ASSETS / "zidane.jpg", points=[900, 370], labels=[1], device=0) + # Run inference with 2D points and 1D labels + model(ASSETS / "zidane.jpg", points=[[900, 370]], labels=[1], device=0) + + # Run inference with multiple 2D points and 1D labels + model(ASSETS / "zidane.jpg", points=[[400, 370], [900, 370]], labels=[1, 1], device=0) + + # Run inference with 3D points and 2D labels (multiple points per object) + model(ASSETS / "zidane.jpg", points=[[[900, 370], [1000, 100]]], labels=[[1, 1]], device=0) + # Create SAMPredictor overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model=WEIGHTS_DIR / "mobile_sam.pt") predictor = SAMPredictor(overrides=overrides) diff --git a/tests/test_engine.py b/tests/test_engine.py index aa4b671eaa0..fe95a5ca5dd 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import sys from unittest import mock diff --git a/tests/test_explorer.py b/tests/test_explorer.py deleted file mode 100644 index 45b0a31e369..00000000000 --- a/tests/test_explorer.py +++ /dev/null @@ -1,66 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -import PIL -import pytest - -from ultralytics import Explorer -from ultralytics.utils import ASSETS -from ultralytics.utils.torch_utils import TORCH_1_13 - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_similarity(): - """Test the correctness and response length of similarity calculations and SQL queries in the Explorer.""" - exp = Explorer(data="coco8.yaml") - exp.create_embeddings_table() - similar = exp.get_similar(idx=1) - assert len(similar) == 4 - similar = exp.get_similar(img=ASSETS / "bus.jpg") - assert len(similar) == 4 - similar = exp.get_similar(idx=[1, 2], limit=2) - assert len(similar) == 2 - sim_idx = exp.similarity_index() - assert len(sim_idx) == 4 - sql = exp.sql_query("WHERE labels LIKE '%zebra%'") - assert len(sql) == 1 - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_det(): - """Test detection functionalities and verify embedding table includes bounding boxes.""" - exp = Explorer(data="coco8.yaml", model="yolo11n.pt") - exp.create_embeddings_table(force=True) - assert len(exp.table.head()["bboxes"]) > 0 - similar = exp.get_similar(idx=[1, 2], limit=10) - assert len(similar) > 0 - # This is a loose test, just checks errors not correctness - similar = exp.plot_similar(idx=[1, 2], limit=10) - assert isinstance(similar, PIL.Image.Image) - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_seg(): - """Test segmentation functionalities and ensure the embedding table includes segmentation masks.""" - exp = Explorer(data="coco8-seg.yaml", model="yolo11n-seg.pt") - exp.create_embeddings_table(force=True) - assert len(exp.table.head()["masks"]) > 0 - similar = exp.get_similar(idx=[1, 2], limit=10) - assert len(similar) > 0 - similar = exp.plot_similar(idx=[1, 2], limit=10) - assert isinstance(similar, PIL.Image.Image) - - -@pytest.mark.slow -@pytest.mark.skipif(not TORCH_1_13, reason="Explorer requires torch>=1.13") -def test_pose(): - """Test pose estimation functionality and verify the embedding table includes keypoints.""" - exp = Explorer(data="coco8-pose.yaml", model="yolo11n-pose.pt") - exp.create_embeddings_table(force=True) - assert len(exp.table.head()["keypoints"]) > 0 - similar = exp.get_similar(idx=[1, 2], limit=10) - assert len(similar) > 0 - similar = exp.plot_similar(idx=[1, 2], limit=10) - assert isinstance(similar, PIL.Image.Image) diff --git a/tests/test_exports.py b/tests/test_exports.py index e6e2ec15986..0faba6d4c88 100644 --- a/tests/test_exports.py +++ b/tests/test_exports.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import shutil import uuid @@ -11,6 +11,7 @@ from ultralytics import YOLO from ultralytics.cfg import TASK2DATA, TASK2MODEL, TASKS from ultralytics.utils import ( + ARM64, IS_RASPBERRYPI, LINUX, MACOS, @@ -42,14 +43,16 @@ def test_export_openvino(): @pytest.mark.slow @pytest.mark.skipif(not TORCH_1_13, reason="OpenVINO requires torch>=1.13") @pytest.mark.parametrize( - "task, dynamic, int8, half, batch", - [ # generate all combinations but exclude those where both int8 and half are True - (task, dynamic, int8, half, batch) - for task, dynamic, int8, half, batch in product(TASKS, [True, False], [True, False], [True, False], [1, 2]) - if not (int8 and half) # exclude cases where both int8 and half are True + "task, dynamic, int8, half, batch, nms", + [ # generate all combinations except for exclusion cases + (task, dynamic, int8, half, batch, nms) + for task, dynamic, int8, half, batch, nms in product( + TASKS, [True, False], [True, False], [True, False], [1, 2], [True, False] + ) + if not ((int8 and half) or (task == "classify" and nms)) ], ) -def test_export_openvino_matrix(task, dynamic, int8, half, batch): +def test_export_openvino_matrix(task, dynamic, int8, half, batch, nms): """Test YOLO model exports to OpenVINO under various configuration matrix conditions.""" file = YOLO(TASK2MODEL[task]).export( format="openvino", @@ -59,6 +62,7 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch): half=half, batch=batch, data=TASK2DATA[task], + nms=nms, ) if WINDOWS: # Use unique filenames due to Windows file permissions bug possibly due to latent threaded use @@ -71,36 +75,39 @@ def test_export_openvino_matrix(task, dynamic, int8, half, batch): @pytest.mark.slow @pytest.mark.parametrize( - "task, dynamic, int8, half, batch, simplify", product(TASKS, [True, False], [False], [False], [1, 2], [True, False]) + "task, dynamic, int8, half, batch, simplify, nms", + [ # generate all combinations except for exclusion cases + (task, dynamic, int8, half, batch, simplify, nms) + for task, dynamic, int8, half, batch, simplify, nms in product( + TASKS, [True, False], [False], [False], [1, 2], [True, False], [True, False] + ) + if not ((int8 and half) or (task == "classify" and nms) or (task == "obb" and nms and not TORCH_1_13)) + ], ) -def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify): +def test_export_onnx_matrix(task, dynamic, int8, half, batch, simplify, nms): """Test YOLO exports to ONNX format with various configurations and parameters.""" file = YOLO(TASK2MODEL[task]).export( - format="onnx", - imgsz=32, - dynamic=dynamic, - int8=int8, - half=half, - batch=batch, - simplify=simplify, + format="onnx", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, simplify=simplify, nms=nms ) YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference Path(file).unlink() # cleanup @pytest.mark.slow -@pytest.mark.parametrize("task, dynamic, int8, half, batch", product(TASKS, [False], [False], [False], [1, 2])) -def test_export_torchscript_matrix(task, dynamic, int8, half, batch): +@pytest.mark.parametrize( + "task, dynamic, int8, half, batch, nms", + [ # generate all combinations except for exclusion cases + (task, dynamic, int8, half, batch, nms) + for task, dynamic, int8, half, batch, nms in product(TASKS, [False], [False], [False], [1, 2], [True, False]) + if not (task == "classify" and nms) + ], +) +def test_export_torchscript_matrix(task, dynamic, int8, half, batch, nms): """Tests YOLO model exports to TorchScript format under varied configurations.""" file = YOLO(TASK2MODEL[task]).export( - format="torchscript", - imgsz=32, - dynamic=dynamic, - int8=int8, - half=half, - batch=batch, + format="torchscript", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms ) - YOLO(file)([SOURCE] * 3, imgsz=64 if dynamic else 32) # exported model inference at batch=3 + YOLO(file)([SOURCE] * batch, imgsz=64 if dynamic else 32) # exported model inference Path(file).unlink() # cleanup @@ -110,10 +117,10 @@ def test_export_torchscript_matrix(task, dynamic, int8, half, batch): @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="CoreML not supported in Python 3.12") @pytest.mark.parametrize( "task, dynamic, int8, half, batch", - [ # generate all combinations but exclude those where both int8 and half are True + [ # generate all combinations except for exclusion cases (task, dynamic, int8, half, batch) for task, dynamic, int8, half, batch in product(TASKS, [False], [True, False], [True, False], [1]) - if not (int8 and half) # exclude cases where both int8 and half are True + if not (int8 and half) ], ) def test_export_coreml_matrix(task, dynamic, int8, half, batch): @@ -134,22 +141,19 @@ def test_export_coreml_matrix(task, dynamic, int8, half, batch): @pytest.mark.skipif(not checks.IS_PYTHON_MINIMUM_3_10, reason="TFLite export requires Python>=3.10") @pytest.mark.skipif(not LINUX, reason="Test disabled as TF suffers from install conflicts on Windows and macOS") @pytest.mark.parametrize( - "task, dynamic, int8, half, batch", - [ # generate all combinations but exclude those where both int8 and half are True - (task, dynamic, int8, half, batch) - for task, dynamic, int8, half, batch in product(TASKS, [False], [True, False], [True, False], [1]) - if not (int8 and half) # exclude cases where both int8 and half are True + "task, dynamic, int8, half, batch, nms", + [ # generate all combinations except for exclusion cases + (task, dynamic, int8, half, batch, nms) + for task, dynamic, int8, half, batch, nms in product( + TASKS, [False], [True, False], [True, False], [1], [True, False] + ) + if not ((int8 and half) or (task == "classify" and nms)) ], ) -def test_export_tflite_matrix(task, dynamic, int8, half, batch): +def test_export_tflite_matrix(task, dynamic, int8, half, batch, nms): """Test YOLO exports to TFLite format considering various export configurations.""" file = YOLO(TASK2MODEL[task]).export( - format="tflite", - imgsz=32, - dynamic=dynamic, - int8=int8, - half=half, - batch=batch, + format="tflite", imgsz=32, dynamic=dynamic, int8=int8, half=half, batch=batch, nms=nms ) YOLO(file)([SOURCE] * batch, imgsz=32) # exported model inference at batch=3 Path(file).unlink() # cleanup @@ -157,7 +161,7 @@ def test_export_tflite_matrix(task, dynamic, int8, half, batch): @pytest.mark.skipif(not TORCH_1_9, reason="CoreML>=7.2 not supported with PyTorch<=1.8") @pytest.mark.skipif(WINDOWS, reason="CoreML not supported on Windows") # RuntimeError: BlobWriter not loaded -@pytest.mark.skipif(IS_RASPBERRYPI, reason="CoreML not supported on Raspberry Pi") +@pytest.mark.skipif(LINUX and ARM64, reason="CoreML not supported on aarch64 Linux") @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="CoreML not supported in Python 3.12") def test_export_coreml(): """Test YOLO exports to CoreML format, optimized for macOS only.""" @@ -192,8 +196,25 @@ def test_export_paddle(): YOLO(MODEL).export(format="paddle", imgsz=32) +@pytest.mark.slow +@pytest.mark.skipif(IS_RASPBERRYPI, reason="MNN not supported on Raspberry Pi") +def test_export_mnn(): + """Test YOLO exports to MNN format (WARNING: MNN test must precede NCNN test or CI error on Windows).""" + file = YOLO(MODEL).export(format="mnn", imgsz=32) + YOLO(file)(SOURCE, imgsz=32) # exported model inference + + @pytest.mark.slow def test_export_ncnn(): """Test YOLO exports to NCNN format.""" file = YOLO(MODEL).export(format="ncnn", imgsz=32) YOLO(file)(SOURCE, imgsz=32) # exported model inference + + +@pytest.mark.skipif(True, reason="Test disabled as keras and tensorflow version conflicts with tflite export.") +@pytest.mark.skipif(not LINUX or MACOS, reason="Skipping test on Windows and Macos") +def test_export_imx(): + """Test YOLO exports to IMX format.""" + model = YOLO("yolov8n.pt") + file = model.export(format="imx", imgsz=32) + YOLO(file)(SOURCE, imgsz=32) diff --git a/tests/test_integrations.py b/tests/test_integrations.py index 4c8e066978a..8067a1787f1 100644 --- a/tests/test_integrations.py +++ b/tests/test_integrations.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import contextlib import os diff --git a/tests/test_python.py b/tests/test_python.py index 117e6f802e1..644176fb482 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import contextlib import csv @@ -409,15 +409,6 @@ def test_utils_torchutils(): time_sync() -@pytest.mark.slow -@pytest.mark.skipif(not ONLINE, reason="environment is offline") -def test_utils_downloads(): - """Test file download utilities from ultralytics.utils.downloads.""" - from ultralytics.utils.downloads import get_google_drive_file_info - - get_google_drive_file_info("https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link") - - def test_utils_ops(): """Test utility operations functions for coordinate transformation and normalization.""" from ultralytics.utils.ops import ( @@ -585,11 +576,11 @@ def test_model_embeddings(): @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="YOLOWorld with CLIP is not supported in Python 3.12") def test_yolo_world(): """Tests YOLO world models with CLIP support, including detection and training scenarios.""" - model = YOLO("yolov8s-world.pt") # no YOLO11n-world model yet + model = YOLO(WEIGHTS_DIR / "yolov8s-world.pt") # no YOLO11n-world model yet model.set_classes(["tree", "window"]) model(SOURCE, conf=0.01) - model = YOLO("yolov8s-worldv2.pt") # no YOLO11n-world model yet + model = YOLO(WEIGHTS_DIR / "yolov8s-worldv2.pt") # no YOLO11n-world model yet # Training from a pretrained model. Eval is included at the final stage of training. # Use dota8.yaml which has fewer categories to reduce the inference time of CLIP model model.train( diff --git a/tests/test_solutions.py b/tests/test_solutions.py index fabec621d36..056a056fbc1 100644 --- a/tests/test_solutions.py +++ b/tests/test_solutions.py @@ -1,58 +1,66 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import cv2 import pytest +from tests import TMP from ultralytics import YOLO, solutions +from ultralytics.utils import ASSETS_URL, WEIGHTS_DIR from ultralytics.utils.downloads import safe_download -MAJOR_SOLUTIONS_DEMO = "https://github.com/ultralytics/assets/releases/download/v0.0.0/solutions_ci_demo.mp4" -WORKOUTS_SOLUTION_DEMO = "https://github.com/ultralytics/assets/releases/download/v0.0.0/solution_ci_pose_demo.mp4" +DEMO_VIDEO = "solutions_ci_demo.mp4" +POSE_VIDEO = "solution_ci_pose_demo.mp4" @pytest.mark.slow def test_major_solutions(): - """Test the object counting, heatmap, speed estimation and queue management solution.""" - safe_download(url=MAJOR_SOLUTIONS_DEMO) - model = YOLO("yolo11n.pt") - names = model.names - cap = cv2.VideoCapture("solutions_ci_demo.mp4") + """Test the object counting, heatmap, speed estimation, trackzone and queue management solution.""" + safe_download(url=f"{ASSETS_URL}/{DEMO_VIDEO}", dir=TMP) + cap = cv2.VideoCapture(str(TMP / DEMO_VIDEO)) assert cap.isOpened(), "Error reading video file" - region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] - counter = solutions.ObjectCounter(reg_pts=region_points, names=names, view_img=False) - heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, names=names, view_img=False) - speed = solutions.SpeedEstimator(reg_pts=region_points, names=names, view_img=False) - queue = solutions.QueueManager(names=names, reg_pts=region_points, view_img=False) + region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] + counter = solutions.ObjectCounter(region=region_points, model="yolo11n.pt", show=False) # Test object counter + heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False) # Test heatmaps + heatmap_count = solutions.Heatmap( + colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False, region=region_points + ) # Test heatmaps with object counting + speed = solutions.SpeedEstimator(region=region_points, model="yolo11n.pt", show=False) # Test queue manager + queue = solutions.QueueManager(region=region_points, model="yolo11n.pt", show=False) # Test speed estimation + line_analytics = solutions.Analytics(analytics_type="line", model="yolo11n.pt", show=False) # line analytics + pie_analytics = solutions.Analytics(analytics_type="pie", model="yolo11n.pt", show=False) # line analytics + bar_analytics = solutions.Analytics(analytics_type="bar", model="yolo11n.pt", show=False) # line analytics + area_analytics = solutions.Analytics(analytics_type="area", model="yolo11n.pt", show=False) # line analytics + trackzone = solutions.TrackZone(region=region_points, model="yolo11n.pt", show=False) # Test trackzone + frame_count = 0 # Required for analytics while cap.isOpened(): success, im0 = cap.read() if not success: break + frame_count += 1 original_im0 = im0.copy() - tracks = model.track(im0, persist=True, show=False) - _ = counter.start_counting(original_im0.copy(), tracks) - _ = heatmap.generate_heatmap(original_im0.copy(), tracks) - _ = speed.estimate_speed(original_im0.copy(), tracks) - _ = queue.process_queue(original_im0.copy(), tracks) + _ = counter.count(original_im0.copy()) + _ = heatmap.generate_heatmap(original_im0.copy()) + _ = heatmap_count.generate_heatmap(original_im0.copy()) + _ = speed.estimate_speed(original_im0.copy()) + _ = queue.process_queue(original_im0.copy()) + _ = line_analytics.process_data(original_im0.copy(), frame_count) + _ = pie_analytics.process_data(original_im0.copy(), frame_count) + _ = bar_analytics.process_data(original_im0.copy(), frame_count) + _ = area_analytics.process_data(original_im0.copy(), frame_count) + _ = trackzone.trackzone(original_im0.copy()) cap.release() - cv2.destroyAllWindows() - -@pytest.mark.slow -def test_aigym(): - """Test the workouts monitoring solution.""" - safe_download(url=WORKOUTS_SOLUTION_DEMO) - model = YOLO("yolo11n-pose.pt") - cap = cv2.VideoCapture("solution_ci_pose_demo.mp4") + # Test workouts monitoring + safe_download(url=f"{ASSETS_URL}/{POSE_VIDEO}", dir=TMP) + cap = cv2.VideoCapture(str(TMP / POSE_VIDEO)) assert cap.isOpened(), "Error reading video file" - gym_object = solutions.AIGym(line_thickness=2, pose_type="squat", kpts_to_check=[5, 11, 13]) + gym = solutions.AIGym(kpts=[5, 11, 13], show=False) while cap.isOpened(): success, im0 = cap.read() if not success: break - results = model.track(im0, verbose=False) - _ = gym_object.start_counting(im0, results) + _ = gym.monitor(im0) cap.release() - cv2.destroyAllWindows() @pytest.mark.slow @@ -60,9 +68,9 @@ def test_instance_segmentation(): """Test the instance segmentation solution.""" from ultralytics.utils.plotting import Annotator, colors - model = YOLO("yolo11n-seg.pt") + model = YOLO(WEIGHTS_DIR / "yolo11n-seg.pt") names = model.names - cap = cv2.VideoCapture("solutions_ci_demo.mp4") + cap = cv2.VideoCapture(TMP / DEMO_VIDEO) assert cap.isOpened(), "Error reading video file" while cap.isOpened(): success, im0 = cap.read() @@ -83,4 +91,4 @@ def test_instance_segmentation(): @pytest.mark.slow def test_streamlit_predict(): """Test streamlit predict live inference solution.""" - solutions.inference() + solutions.Inference().inference() diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index daff29f8f4e..01631ca8f3c 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,13 +1,13 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -__version__ = "8.3.1" +__version__ = "8.3.71" import os -# Set ENV Variables (place before imports) -os.environ["OMP_NUM_THREADS"] = "1" # reduce CPU utilization during training +# Set ENV variables (place before imports) +if not os.environ.get("OMP_NUM_THREADS"): + os.environ["OMP_NUM_THREADS"] = "1" # default for reduced CPU utilization during training -from ultralytics.data.explorer.explorer import Explorer from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld from ultralytics.utils import ASSETS, SETTINGS from ultralytics.utils.checks import check_yolo as checks @@ -26,5 +26,4 @@ "checks", "download", "settings", - "Explorer", ) diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 74da337f91a..b4e9f3b060e 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import contextlib import shutil import subprocess import sys @@ -8,11 +7,14 @@ from types import SimpleNamespace from typing import Dict, List, Union +import cv2 + from ultralytics.utils import ( ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, + DEFAULT_SOL_DICT, IS_VSCODE, LOGGER, RANK, @@ -31,9 +33,22 @@ yaml_print, ) +# Define valid solutions +SOLUTION_MAP = { + "count": ("ObjectCounter", "count"), + "heatmap": ("Heatmap", "generate_heatmap"), + "queue": ("QueueManager", "process_queue"), + "speed": ("SpeedEstimator", "estimate_speed"), + "workout": ("AIGym", "monitor"), + "analytics": ("Analytics", "process_data"), + "trackzone": ("TrackZone", "trackzone"), + "inference": ("Inference", "inference"), + "help": None, +} + # Define valid tasks and modes -MODES = {"train", "val", "predict", "export", "track", "benchmark"} -TASKS = {"detect", "segment", "classify", "pose", "obb", "regress"} +MODES = frozenset({"train", "val", "predict", "export", "track", "benchmark"}) +TASKS = frozenset({"detect", "segment", "classify", "pose", "obb", "regress"}) TASK2DATA = { "detect": "coco8.yaml", "segment": "coco8-seg.yaml", @@ -58,11 +73,41 @@ "obb": "metrics/mAP50-95(B)", "regress": "metrics/MAE", } -MODELS = {TASK2MODEL[task] for task in TASKS} +MODELS = frozenset({TASK2MODEL[task] for task in TASKS}) ARGV = sys.argv or ["", ""] # sometimes sys.argv = [] +SOLUTIONS_HELP_MSG = f""" + Arguments received: {str(["yolo"] + ARGV[1:])}. Ultralytics 'yolo solutions' usage overview: + + yolo solutions SOLUTION ARGS + + Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())[:-1]} + ARGS (optional) are any number of custom 'arg=value' pairs like 'show_in=True' that override defaults + at https://docs.ultralytics.com/usage/cfg + + 1. Call object counting solution + yolo solutions count source="path/to/video/file.mp4" region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] + + 2. Call heatmaps solution + yolo solutions heatmap colormap=cv2.COLORMAP_PARULA model=yolo11n.pt + + 3. Call queue management solution + yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] model=yolo11n.pt + + 4. Call workouts monitoring solution for push-ups + yolo solutions workout model=yolo11n-pose.pt kpts=[6, 8, 10] + + 5. Generate analytical graphs + yolo solutions analytics analytics_type="pie" + + 6. Track objects within specific zones + yolo solutions trackzone source="path/to/video/file.mp4" region=[(150, 150), (1130, 150), (1130, 570), (150, 570)] + + 7. Streamlit real-time webcam inference GUI + yolo streamlit-predict + """ CLI_HELP_MSG = f""" - Arguments received: {str(['yolo'] + ARGV[1:])}. Ultralytics 'yolo' commands use the following syntax: + Arguments received: {str(["yolo"] + ARGV[1:])}. Ultralytics 'yolo' commands use the following syntax: yolo TASK MODE ARGS @@ -83,115 +128,120 @@ 4. Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required) yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128 - 5. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API - yolo explorer data=data.yaml model=yolo11n.pt - - 6. Streamlit real-time webcam inference GUI - yolo streamlit-predict - - 7. Run special commands: + 5. Ultralytics solutions usage + yolo solutions count or in {list(SOLUTION_MAP.keys())[1:-1]} source="path/to/video/file.mp4" + + 6. Run special commands: yolo help yolo checks yolo version yolo settings yolo copy-cfg yolo cfg + yolo solutions help Docs: https://docs.ultralytics.com + Solutions: https://docs.ultralytics.com/solutions/ Community: https://community.ultralytics.com GitHub: https://github.com/ultralytics/ultralytics """ # Define keys for arg type checks -CFG_FLOAT_KEYS = { # integer or float arguments, i.e. x=2 and x=2.0 - "warmup_epochs", - "box", - "cls", - "dfl", - "degrees", - "shear", - "time", - "workspace", - "batch", -} -CFG_FRACTION_KEYS = { # fractional float arguments with 0.0<=values<=1.0 - "dropout", - "lr0", - "lrf", - "momentum", - "weight_decay", - "warmup_momentum", - "warmup_bias_lr", - "label_smoothing", - "hsv_h", - "hsv_s", - "hsv_v", - "translate", - "scale", - "perspective", - "flipud", - "fliplr", - "bgr", - "mosaic", - "mixup", - "copy_paste", - "conf", - "iou", - "fraction", -} -CFG_INT_KEYS = { # integer-only arguments - "epochs", - "patience", - "workers", - "seed", - "close_mosaic", - "mask_ratio", - "max_det", - "vid_stride", - "line_width", - "nbs", - "save_period", - "max_ncalib_imgs", -} -CFG_BOOL_KEYS = { # boolean-only arguments - "save", - "exist_ok", - "verbose", - "deterministic", - "single_cls", - "rect", - "cos_lr", - "overlap_mask", - "val", - "save_json", - "save_hybrid", - "half", - "dnn", - "plots", - "show", - "save_txt", - "save_conf", - "save_crop", - "save_frames", - "show_labels", - "show_conf", - "visualize", - "augment", - "agnostic_nms", - "retina_masks", - "show_boxes", - "keras", - "optimize", - "int8", - "dynamic", - "simplify", - "nms", - "profile", - "multi_scale", - "separate_outputs", - "export_hw_optimized", - "uint8_io_dtype", -} +CFG_FLOAT_KEYS = frozenset( + { # integer or float arguments, i.e. x=2 and x=2.0 + "warmup_epochs", + "box", + "cls", + "dfl", + "degrees", + "shear", + "time", + "workspace", + "batch", + } +) +CFG_FRACTION_KEYS = frozenset( + { # fractional float arguments with 0.0<=values<=1.0 + "dropout", + "lr0", + "lrf", + "momentum", + "weight_decay", + "warmup_momentum", + "warmup_bias_lr", + "hsv_h", + "hsv_s", + "hsv_v", + "translate", + "scale", + "perspective", + "flipud", + "fliplr", + "bgr", + "mosaic", + "mixup", + "copy_paste", + "conf", + "iou", + "fraction", + } +) +CFG_INT_KEYS = frozenset( + { # integer-only arguments + "epochs", + "patience", + "workers", + "seed", + "close_mosaic", + "mask_ratio", + "max_det", + "vid_stride", + "line_width", + "nbs", + "save_period", + } +) +CFG_BOOL_KEYS = frozenset( + { # boolean-only arguments + "save", + "exist_ok", + "verbose", + "deterministic", + "single_cls", + "rect", + "cos_lr", + "overlap_mask", + "val", + "save_json", + "save_hybrid", + "half", + "dnn", + "plots", + "show", + "save_txt", + "save_conf", + "save_crop", + "save_frames", + "show_labels", + "show_conf", + "visualize", + "augment", + "agnostic_nms", + "retina_masks", + "show_boxes", + "keras", + "optimize", + "int8", + "dynamic", + "simplify", + "nms", + "profile", + "multi_scale", + "separate_outputs", + "export_hw_optimized", + "uint8_io_dtype", + } +) def cfg2dict(cfg): @@ -244,7 +294,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove Examples: >>> from ultralytics.cfg import get_cfg >>> config = get_cfg() # Load default configuration - >>> config = get_cfg("path/to/config.yaml", overrides={"epochs": 50, "batch_size": 16}) + >>> config_with_overrides = get_cfg("path/to/config.yaml", overrides={"epochs": 50, "batch_size": 16}) Notes: - If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence. @@ -267,7 +317,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove if k in cfg and isinstance(cfg[k], (int, float)): cfg[k] = str(cfg[k]) if cfg.get("name") == "model": # assign model to 'name' arg - cfg["name"] = cfg.get("model", "").split(".")[0] + cfg["name"] = str(cfg.get("model", "")).split(".")[0] LOGGER.warning(f"WARNING โš ๏ธ 'name=model' automatically updated to 'name={cfg['name']}'.") # Type and Value checks @@ -323,11 +373,11 @@ def check_cfg(cfg, hard=True): ) cfg[k] = v = float(v) if not (0.0 <= v <= 1.0): - raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.") + raise ValueError(f"'{k}={v}' is an invalid value. Valid '{k}' values are between 0.0 and 1.0.") elif k in CFG_INT_KEYS and not isinstance(v, int): if hard: raise TypeError( - f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')" + f"'{k}={v}' is of invalid type {type(v).__name__}. '{k}' must be an int (i.e. '{k}=8')" ) cfg[k] = int(v) elif k in CFG_BOOL_KEYS and not isinstance(v, bool): @@ -402,6 +452,9 @@ def _handle_deprecation(custom): if key == "line_thickness": deprecation_warn(key, "line_width") custom["line_width"] = custom.pop("line_thickness") + if key == "label_smoothing": + deprecation_warn(key) + custom.pop("label_smoothing") return custom @@ -433,9 +486,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None): - Prints detailed error messages for each mismatched key to help users correct their configurations. """ custom = _handle_deprecation(custom) - base_keys, custom_keys = (set(x.keys()) for x in (base, custom)) - mismatched = [k for k in custom_keys if k not in base_keys] - if mismatched: + base_keys, custom_keys = (frozenset(x.keys()) for x in (base, custom)) + if mismatched := [k for k in custom_keys if k not in base_keys]: from difflib import get_close_matches string = "" @@ -449,34 +501,60 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None): def merge_equals_args(args: List[str]) -> List[str]: """ - Merges arguments around isolated '=' in a list of strings, handling three cases: - 1. ['arg', '=', 'val'] becomes ['arg=val'], - 2. ['arg=', 'val'] becomes ['arg=val'], - 3. ['arg', '=val'] becomes ['arg=val']. + Merges arguments around isolated '=' in a list of strings and joins fragments with brackets. + + This function handles the following cases: + 1. ['arg', '=', 'val'] becomes ['arg=val'] + 2. ['arg=', 'val'] becomes ['arg=val'] + 3. ['arg', '=val'] becomes ['arg=val'] + 4. Joins fragments with brackets, e.g., ['imgsz=[3,', '640,', '640]'] becomes ['imgsz=[3,640,640]'] Args: - args (List[str]): A list of strings where each element represents an argument. + args (List[str]): A list of strings where each element represents an argument or fragment. Returns: - (List[str]): A list of strings where the arguments around isolated '=' are merged. + List[str]: A list of strings where the arguments around isolated '=' are merged and fragments with brackets are joined. Examples: - >>> args = ["arg1", "=", "value", "arg2=", "value2", "arg3", "=value3"] - >>> merge_equals_args(args) - ['arg1=value', 'arg2=value2', 'arg3=value3'] + >>> args = ["arg1", "=", "value", "arg2=", "value2", "arg3", "=value3", "imgsz=[3,", "640,", "640]"] + >>> merge_and_join_args(args) + ['arg1=value', 'arg2=value2', 'arg3=value3', 'imgsz=[3,640,640]'] """ new_args = [] - for i, arg in enumerate(args): + current = "" + depth = 0 + + i = 0 + while i < len(args): + arg = args[i] + + # Handle equals sign merging if arg == "=" and 0 < i < len(args) - 1: # merge ['arg', '=', 'val'] new_args[-1] += f"={args[i + 1]}" - del args[i + 1] + i += 2 + continue elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]: # merge ['arg=', 'val'] new_args.append(f"{arg}{args[i + 1]}") - del args[i + 1] + i += 2 + continue elif arg.startswith("=") and i > 0: # merge ['arg', '=val'] new_args[-1] += arg - else: - new_args.append(arg) + i += 1 + continue + + # Handle bracket joining + depth += arg.count("[") - arg.count("]") + current += arg + if depth == 0: + new_args.append(current) + current = "" + + i += 1 + + # Append any remaining current string + if current: + new_args.append(current) + return new_args @@ -493,7 +571,7 @@ def handle_yolo_hub(args: List[str]) -> None: Examples: ```bash - yolo hub login YOUR_API_KEY + yolo login YOUR_API_KEY ``` Notes: @@ -553,53 +631,122 @@ def handle_yolo_settings(args: List[str]) -> None: LOGGER.warning(f"WARNING โš ๏ธ settings error: '{e}'. Please see {url} for help.") -def handle_explorer(args: List[str]): +def handle_yolo_solutions(args: List[str]) -> None: """ - Launches a graphical user interface that provides tools for interacting with and analyzing datasets using the - Ultralytics Explorer API. It checks for the required 'streamlit' package and informs the user that the Explorer - dashboard is loading. + Processes YOLO solutions arguments and runs the specified computer vision solutions pipeline. Args: - args (List[str]): A list of optional command line arguments. + args (List[str]): Command-line arguments for configuring and running the Ultralytics YOLO + solutions: https://docs.ultralytics.com/solutions/, It can include solution name, source, + and other configuration parameters. + + Returns: + None: The function processes video frames and saves the output but doesn't return any value. Examples: - ```bash - yolo explorer data=data.yaml model=yolo11n.pt - ``` + Run people counting solution with default settings: + >>> handle_yolo_solutions(["count"]) + + Run analytics with custom configuration: + >>> handle_yolo_solutions(["analytics", "conf=0.25", "source=path/to/video/file.mp4"]) + + Run inference with custom configuration, requires Streamlit version 1.29.0 or higher. + >>> handle_yolo_solutions(["inference", "model=yolo11n.pt"]) Notes: - - Requires 'streamlit' package version 1.29.0 or higher. - - The function does not take any arguments or return any values. - - It is typically called from the command line interface using the 'yolo explorer' command. + - Default configurations are merged from DEFAULT_SOL_DICT and DEFAULT_CFG_DICT + - Arguments can be provided in the format 'key=value' or as boolean flags + - Available solutions are defined in SOLUTION_MAP with their respective classes and methods + - If an invalid solution is provided, defaults to 'count' solution + - Output videos are saved in 'runs/solution/{solution_name}' directory + - For 'analytics' solution, frame numbers are tracked for generating analytical graphs + - Video processing can be interrupted by pressing 'q' + - Processes video frames sequentially and saves output in .avi format + - If no source is specified, downloads and uses a default sample video\ + - The inference solution will be launched using the 'streamlit run' command. + - The Streamlit app file is located in the Ultralytics package directory. """ - checks.check_requirements("streamlit>=1.29.0") - LOGGER.info("๐Ÿ’ก Loading Explorer dashboard...") - cmd = ["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"] - new = dict(parse_key_value_pair(a) for a in args) - check_dict_alignment(base={k: DEFAULT_CFG_DICT[k] for k in ["model", "data"]}, custom=new) - for k, v in new.items(): - cmd += [k, v] - subprocess.run(cmd) + full_args_dict = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT} # arguments dictionary + overrides = {} + + # check dictionary alignment + for arg in merge_equals_args(args): + arg = arg.lstrip("-").rstrip(",") + if "=" in arg: + try: + k, v = parse_key_value_pair(arg) + overrides[k] = v + except (NameError, SyntaxError, ValueError, AssertionError) as e: + check_dict_alignment(full_args_dict, {arg: ""}, e) + elif arg in full_args_dict and isinstance(full_args_dict.get(arg), bool): + overrides[arg] = True + check_dict_alignment(full_args_dict, overrides) # dict alignment + + # Get solution name + if args and args[0] in SOLUTION_MAP: + if args[0] != "help": + s_n = args.pop(0) # Extract the solution name directly + else: + LOGGER.info(SOLUTIONS_HELP_MSG) + else: + LOGGER.warning( + f"โš ๏ธ No valid solution provided. Using default 'count'. Available: {', '.join(SOLUTION_MAP.keys())}" + ) + s_n = "count" # Default solution if none provided + if args and args[0] == "help": # Add check for return if user call `yolo solutions help` + return -def handle_streamlit_inference(): - """ - Open the Ultralytics Live Inference Streamlit app for real-time object detection. + if s_n == "inference": + checks.check_requirements("streamlit>=1.29.0") + LOGGER.info("๐Ÿ’ก Loading Ultralytics live inference app...") + subprocess.run( + [ # Run subprocess with Streamlit custom argument + "streamlit", + "run", + str(ROOT / "solutions/streamlit_inference.py"), + "--server.headless", + "true", + overrides.pop("model", "yolo11n.pt"), + ] + ) + else: + cls, method = SOLUTION_MAP[s_n] # solution class name, method name and default source - This function initializes and runs a Streamlit application designed for performing live object detection using - Ultralytics models. It checks for the required Streamlit package and launches the app. + from ultralytics import solutions # import ultralytics solutions - Examples: - >>> handle_streamlit_inference() + solution = getattr(solutions, cls)(IS_CLI=True, **overrides) # get solution class i.e ObjectCounter + process = getattr( + solution, method + ) # get specific function of class for processing i.e, count from ObjectCounter - Notes: - - Requires Streamlit version 1.29.0 or higher. - - The app is launched using the 'streamlit run' command. - - The Streamlit app file is located in the Ultralytics package directory. - """ - checks.check_requirements("streamlit>=1.29.0") - LOGGER.info("๐Ÿ’ก Loading Ultralytics Live Inference app...") - subprocess.run(["streamlit", "run", ROOT / "solutions/streamlit_inference.py", "--server.headless", "true"]) + cap = cv2.VideoCapture(solution.CFG["source"]) # read the video file + + # extract width, height and fps of the video file, create save directory and initialize video writer + import os # for directory creation + from pathlib import Path + + from ultralytics.utils.files import increment_path # for output directory path update + + w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS)) + if s_n == "analytics": # analytical graphs follow fixed shape for output i.e w=1920, h=1080 + w, h = 1920, 1080 + save_dir = increment_path(Path("runs") / "solutions" / "exp", exist_ok=False) + save_dir.mkdir(parents=True, exist_ok=True) # create the output directory + vw = cv2.VideoWriter(os.path.join(save_dir, "solution.avi"), cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) + + try: # Process video frames + f_n = 0 # frame number, required for analytical graphs + while cap.isOpened(): + success, frame = cap.read() + if not success: + break + frame = process(frame, f_n := f_n + 1) if s_n == "analytics" else process(frame) + vw.write(frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + finally: + cap.release() def parse_key_value_pair(pair: str = "key=value"): @@ -610,9 +757,8 @@ def parse_key_value_pair(pair: str = "key=value"): pair (str): A string containing a key-value pair in the format "key=value". Returns: - (tuple): A tuple containing two elements: - - key (str): The parsed key. - - value (str): The parsed value. + key (str): The parsed key. + value (str): The parsed value. Raises: AssertionError: If the value is missing or empty. @@ -676,9 +822,10 @@ def smart_value(v): elif v_lower == "false": return False else: - with contextlib.suppress(Exception): + try: return eval(v) - return v + except Exception: + return v def entrypoint(debug=""): @@ -721,8 +868,7 @@ def entrypoint(debug=""): "login": lambda: handle_yolo_hub(args), "logout": lambda: handle_yolo_hub(args), "copy-cfg": copy_default_cfg, - "explorer": lambda: handle_explorer(args[1:]), - "streamlit-predict": lambda: handle_streamlit_inference(), + "solutions": lambda: handle_yolo_solutions(args[1:]), } full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special} @@ -782,7 +928,13 @@ def entrypoint(debug=""): task = overrides.pop("task", None) if task: if task not in TASKS: - raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}") + if task == "track": + LOGGER.warning( + "WARNING โš ๏ธ invalid 'task=track', setting 'task=detect' and 'mode=track'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}." + ) + task, mode = "detect", "track" + else: + raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}") if "model" not in overrides: overrides["model"] = TASK2MODEL[task] @@ -801,7 +953,7 @@ def entrypoint(debug=""): from ultralytics import FastSAM model = FastSAM(model) - elif "sam_" in stem or "sam2_" in stem: + elif "sam_" in stem or "sam2_" in stem or "sam2.1_" in stem: from ultralytics import SAM model = SAM(model) @@ -823,7 +975,9 @@ def entrypoint(debug=""): # Mode if mode in {"predict", "track"} and "source" not in overrides: - overrides["source"] = DEFAULT_CFG.source or ASSETS + overrides["source"] = ( + "https://ultralytics.com/images/boats.jpg" if task == "obb" else DEFAULT_CFG.source or ASSETS + ) LOGGER.warning(f"WARNING โš ๏ธ 'source' argument is missing. Using default 'source={overrides['source']}'.") elif mode in {"train", "val"}: if "data" not in overrides and "resume" not in overrides: diff --git a/ultralytics/cfg/datasets/Argoverse.yaml b/ultralytics/cfg/datasets/Argoverse.yaml index 43755f76870..5e05023d779 100644 --- a/ultralytics/cfg/datasets/Argoverse.yaml +++ b/ultralytics/cfg/datasets/Argoverse.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI # Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/ # Example usage: yolo train data=Argoverse.yaml diff --git a/ultralytics/cfg/datasets/DOTAv1.5.yaml b/ultralytics/cfg/datasets/DOTAv1.5.yaml index b59ff8816c1..26c73808d7b 100644 --- a/ultralytics/cfg/datasets/DOTAv1.5.yaml +++ b/ultralytics/cfg/datasets/DOTAv1.5.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/ # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml diff --git a/ultralytics/cfg/datasets/DOTAv1.yaml b/ultralytics/cfg/datasets/DOTAv1.yaml index d1c950b9957..5e71d2188d5 100644 --- a/ultralytics/cfg/datasets/DOTAv1.yaml +++ b/ultralytics/cfg/datasets/DOTAv1.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/ # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml diff --git a/ultralytics/cfg/datasets/GlobalWheat2020.yaml b/ultralytics/cfg/datasets/GlobalWheat2020.yaml index 95749a11b46..9dff73d7cd2 100644 --- a/ultralytics/cfg/datasets/GlobalWheat2020.yaml +++ b/ultralytics/cfg/datasets/GlobalWheat2020.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan # Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/ # Example usage: yolo train data=GlobalWheat2020.yaml diff --git a/ultralytics/cfg/datasets/ImageNet.yaml b/ultralytics/cfg/datasets/ImageNet.yaml index 0dc344abbaf..92e398a8fa8 100644 --- a/ultralytics/cfg/datasets/ImageNet.yaml +++ b/ultralytics/cfg/datasets/ImageNet.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels # Documentation: https://docs.ultralytics.com/datasets/classify/imagenet/ diff --git a/ultralytics/cfg/datasets/Objects365.yaml b/ultralytics/cfg/datasets/Objects365.yaml index 4994fd5f296..89921364a52 100644 --- a/ultralytics/cfg/datasets/Objects365.yaml +++ b/ultralytics/cfg/datasets/Objects365.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Objects365 dataset https://www.objects365.org/ by Megvii # Documentation: https://docs.ultralytics.com/datasets/detect/objects365/ # Example usage: yolo train data=Objects365.yaml diff --git a/ultralytics/cfg/datasets/SKU-110K.yaml b/ultralytics/cfg/datasets/SKU-110K.yaml index fff1baa4831..a2c94ced1bc 100644 --- a/ultralytics/cfg/datasets/SKU-110K.yaml +++ b/ultralytics/cfg/datasets/SKU-110K.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail # Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/ # Example usage: yolo train data=SKU-110K.yaml diff --git a/ultralytics/cfg/datasets/VOC.yaml b/ultralytics/cfg/datasets/VOC.yaml index 7311d8917e9..2eb06ffdeb4 100644 --- a/ultralytics/cfg/datasets/VOC.yaml +++ b/ultralytics/cfg/datasets/VOC.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford # Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/ # Example usage: yolo train data=VOC.yaml diff --git a/ultralytics/cfg/datasets/VisDrone.yaml b/ultralytics/cfg/datasets/VisDrone.yaml index 9c28d918769..9fc7b45e435 100644 --- a/ultralytics/cfg/datasets/VisDrone.yaml +++ b/ultralytics/cfg/datasets/VisDrone.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University # Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/ # Example usage: yolo train data=VisDrone.yaml diff --git a/ultralytics/cfg/datasets/african-wildlife.yaml b/ultralytics/cfg/datasets/african-wildlife.yaml index eaccb1a85a3..b825f8f068b 100644 --- a/ultralytics/cfg/datasets/african-wildlife.yaml +++ b/ultralytics/cfg/datasets/african-wildlife.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # African-wildlife dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/ # Example usage: yolo train data=african-wildlife.yaml diff --git a/ultralytics/cfg/datasets/brain-tumor.yaml b/ultralytics/cfg/datasets/brain-tumor.yaml index 115532a32ed..7a448e84afc 100644 --- a/ultralytics/cfg/datasets/brain-tumor.yaml +++ b/ultralytics/cfg/datasets/brain-tumor.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Brain-tumor dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/ # Example usage: yolo train data=brain-tumor.yaml diff --git a/ultralytics/cfg/datasets/carparts-seg.yaml b/ultralytics/cfg/datasets/carparts-seg.yaml index d15da6e5b2e..9f15f9b0662 100644 --- a/ultralytics/cfg/datasets/carparts-seg.yaml +++ b/ultralytics/cfg/datasets/carparts-seg.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Carparts-seg dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/ # Example usage: yolo train data=carparts-seg.yaml diff --git a/ultralytics/cfg/datasets/coco-pose.yaml b/ultralytics/cfg/datasets/coco-pose.yaml index 7d71c83de47..353dcd721b0 100644 --- a/ultralytics/cfg/datasets/coco-pose.yaml +++ b/ultralytics/cfg/datasets/coco-pose.yaml @@ -1,5 +1,6 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# COCO 2017 dataset https://cocodataset.org by Microsoft +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# COCO 2017 Keypoints dataset https://cocodataset.org by Microsoft # Documentation: https://docs.ultralytics.com/datasets/pose/coco/ # Example usage: yolo train data=coco-pose.yaml # parent @@ -9,9 +10,9 @@ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/coco-pose # dataset root dir -train: train2017.txt # train images (relative to 'path') 118287 images -val: val2017.txt # val images (relative to 'path') 5000 images -test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 +train: train2017.txt # train images (relative to 'path') 56599 images +val: val2017.txt # val images (relative to 'path') 2346 images +test: test-dev2017.txt # 20288 of 40670 images, submit to https://codalab.lisn.upsaclay.fr/competitions/7403 # Keypoints kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) diff --git a/ultralytics/cfg/datasets/coco.yaml b/ultralytics/cfg/datasets/coco.yaml index 3bb9aacc3f4..cb6dff744bd 100644 --- a/ultralytics/cfg/datasets/coco.yaml +++ b/ultralytics/cfg/datasets/coco.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # COCO 2017 dataset https://cocodataset.org by Microsoft # Documentation: https://docs.ultralytics.com/datasets/detect/coco/ # Example usage: yolo train data=coco.yaml diff --git a/ultralytics/cfg/datasets/coco128-seg.yaml b/ultralytics/cfg/datasets/coco128-seg.yaml index dcd961c6e5f..b023c676300 100644 --- a/ultralytics/cfg/datasets/coco128-seg.yaml +++ b/ultralytics/cfg/datasets/coco128-seg.yaml @@ -1,5 +1,6 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# COCO128-seg dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/segment/coco/ # Example usage: yolo train data=coco128.yaml # parent diff --git a/ultralytics/cfg/datasets/coco128.yaml b/ultralytics/cfg/datasets/coco128.yaml index 1b515592f2e..12ff0511bcd 100644 --- a/ultralytics/cfg/datasets/coco128.yaml +++ b/ultralytics/cfg/datasets/coco128.yaml @@ -1,5 +1,6 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# COCO128 dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/detect/coco/ # Example usage: yolo train data=coco128.yaml # parent diff --git a/ultralytics/cfg/datasets/coco8-pose.yaml b/ultralytics/cfg/datasets/coco8-pose.yaml index 68678fa76d3..3e8af1e3448 100644 --- a/ultralytics/cfg/datasets/coco8-pose.yaml +++ b/ultralytics/cfg/datasets/coco8-pose.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/ # Example usage: yolo train data=coco8-pose.yaml diff --git a/ultralytics/cfg/datasets/coco8-seg.yaml b/ultralytics/cfg/datasets/coco8-seg.yaml index 42fc02b08d7..1ea6b31004c 100644 --- a/ultralytics/cfg/datasets/coco8-seg.yaml +++ b/ultralytics/cfg/datasets/coco8-seg.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/ # Example usage: yolo train data=coco8-seg.yaml diff --git a/ultralytics/cfg/datasets/coco8.yaml b/ultralytics/cfg/datasets/coco8.yaml index 50a1133cdc3..8200738b46d 100644 --- a/ultralytics/cfg/datasets/coco8.yaml +++ b/ultralytics/cfg/datasets/coco8.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/detect/coco8/ # Example usage: yolo train data=coco8.yaml diff --git a/ultralytics/cfg/datasets/crack-seg.yaml b/ultralytics/cfg/datasets/crack-seg.yaml index f6fe9aa2297..11bdd5f575f 100644 --- a/ultralytics/cfg/datasets/crack-seg.yaml +++ b/ultralytics/cfg/datasets/crack-seg.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Crack-seg dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/ # Example usage: yolo train data=crack-seg.yaml diff --git a/ultralytics/cfg/datasets/dog-pose.yaml b/ultralytics/cfg/datasets/dog-pose.yaml new file mode 100644 index 00000000000..447e542ce6c --- /dev/null +++ b/ultralytics/cfg/datasets/dog-pose.yaml @@ -0,0 +1,24 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Dogs dataset http://vision.stanford.edu/aditya86/ImageNetDogs/ by Stanford +# Documentation: https://docs.ultralytics.com/datasets/pose/dog-pose/ +# Example usage: yolo train data=dog-pose.yaml +# parent +# โ”œโ”€โ”€ ultralytics +# โ””โ”€โ”€ datasets +# โ””โ”€โ”€ dog-pose โ† downloads here (337 MB) + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/dog-pose # dataset root dir +train: train # train images (relative to 'path') 6773 images +val: val # val images (relative to 'path') 1703 images + +# Keypoints +kpt_shape: [24, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) + +# Classes +names: + 0: dog + +# Download script/URL (optional) +download: https://github.com/ultralytics/assets/releases/download/v0.0.0/dog-pose.zip diff --git a/ultralytics/cfg/datasets/dota8.yaml b/ultralytics/cfg/datasets/dota8.yaml index a4dbe61ca47..486d9e2effb 100644 --- a/ultralytics/cfg/datasets/dota8.yaml +++ b/ultralytics/cfg/datasets/dota8.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/obb/dota8/ # Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml diff --git a/ultralytics/cfg/datasets/hand-keypoints.yaml b/ultralytics/cfg/datasets/hand-keypoints.yaml index 475a7c01379..6d2f765c789 100644 --- a/ultralytics/cfg/datasets/hand-keypoints.yaml +++ b/ultralytics/cfg/datasets/hand-keypoints.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Hand Keypoints dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/pose/hand-keypoints/ # Example usage: yolo train data=hand-keypoints.yaml diff --git a/ultralytics/cfg/datasets/lvis.yaml b/ultralytics/cfg/datasets/lvis.yaml index 9a79bde621b..22030ac9079 100644 --- a/ultralytics/cfg/datasets/lvis.yaml +++ b/ultralytics/cfg/datasets/lvis.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # LVIS dataset http://www.lvisdataset.org by Facebook AI Research. # Documentation: https://docs.ultralytics.com/datasets/detect/lvis/ # Example usage: yolo train data=lvis.yaml @@ -11,7 +12,7 @@ path: ../datasets/lvis # dataset root dir train: train.txt # train images (relative to 'path') 100170 images val: val.txt # val images (relative to 'path') 19809 images -minival: minival.txt # minval images (relative to 'path') 5000 images +minival: minival.txt # minival images (relative to 'path') 5000 images names: 0: aerosol can/spray can diff --git a/ultralytics/cfg/datasets/medical-pills.yaml b/ultralytics/cfg/datasets/medical-pills.yaml new file mode 100644 index 00000000000..25507c8b9be --- /dev/null +++ b/ultralytics/cfg/datasets/medical-pills.yaml @@ -0,0 +1,22 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Medical-pills dataset by Ultralytics +# Documentation: https://docs.ultralytics.com/datasets/detect/medical-pills/ +# Example usage: yolo train data=medical-pills.yaml +# parent +# โ”œโ”€โ”€ ultralytics +# โ””โ”€โ”€ datasets +# โ””โ”€โ”€ medical-pills โ† downloads here (8.19 MB) + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/medical-pills # dataset root dir +train: train/images # train images (relative to 'path') 92 images +val: valid/images # val images (relative to 'path') 23 images +test: # test images (relative to 'path') + +# Classes +names: + 0: pill + +# Download script/URL (optional) +download: https://github.com/ultralytics/assets/releases/download/v0.0.0/medical-pills.zip diff --git a/ultralytics/cfg/datasets/open-images-v7.yaml b/ultralytics/cfg/datasets/open-images-v7.yaml index d9cad9f1d76..6bd4e0bdcf5 100644 --- a/ultralytics/cfg/datasets/open-images-v7.yaml +++ b/ultralytics/cfg/datasets/open-images-v7.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google # Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/ # Example usage: yolo train data=open-images-v7.yaml diff --git a/ultralytics/cfg/datasets/package-seg.yaml b/ultralytics/cfg/datasets/package-seg.yaml index 6c2a6b60bab..433ca04c7fe 100644 --- a/ultralytics/cfg/datasets/package-seg.yaml +++ b/ultralytics/cfg/datasets/package-seg.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Package-seg dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/ # Example usage: yolo train data=package-seg.yaml @@ -9,8 +10,8 @@ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/package-seg # dataset root dir -train: images/train # train images (relative to 'path') 1920 images -val: images/val # val images (relative to 'path') 89 images +train: train/images # train images (relative to 'path') 1920 images +val: valid/images # val images (relative to 'path') 89 images test: test/images # test images (relative to 'path') 188 images # Classes diff --git a/ultralytics/cfg/datasets/signature.yaml b/ultralytics/cfg/datasets/signature.yaml index d838fd7872d..5c9d5c338e9 100644 --- a/ultralytics/cfg/datasets/signature.yaml +++ b/ultralytics/cfg/datasets/signature.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Signature dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/detect/signature/ # Example usage: yolo train data=signature.yaml diff --git a/ultralytics/cfg/datasets/tiger-pose.yaml b/ultralytics/cfg/datasets/tiger-pose.yaml index dbcda757780..2b3f7b71761 100644 --- a/ultralytics/cfg/datasets/tiger-pose.yaml +++ b/ultralytics/cfg/datasets/tiger-pose.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # Tiger Pose dataset by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/ # Example usage: yolo train data=tiger-pose.yaml diff --git a/ultralytics/cfg/datasets/xView.yaml b/ultralytics/cfg/datasets/xView.yaml index d2e957ad5e7..ccef985974b 100644 --- a/ultralytics/cfg/datasets/xView.yaml +++ b/ultralytics/cfg/datasets/xView.yaml @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA) # -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! -------- # Documentation: https://docs.ultralytics.com/datasets/detect/xview/ diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index ac21e1114c4..0c7d69794cc 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -1,7 +1,9 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# Default training settings and hyperparameters for medium-augmentation COCO training +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -task: detect # (str) YOLO task, i.e. detect, segment, classify, pose +# Global configuration YAML with settings and hyperparameters for YOLO training, validation, prediction and export +# For documentation see https://docs.ultralytics.com/usage/cfg/ + +task: detect # (str) YOLO task, i.e. detect, segment, classify, pose, obb mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark # Train settings ------------------------------------------------------------------------------------------------------- @@ -36,7 +38,7 @@ profile: False # (bool) profile ONNX and TensorRT speeds during training for log freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training multi_scale: False # (bool) Whether to use multiscale during training # Segmentation -overlap_mask: True # (bool) masks should overlap during training (segment train only) +overlap_mask: True # (bool) merge object masks into a single image mask during training (segment train only) mask_ratio: 4 # (int) mask downsample ratio (segment train only) # Classification dropout: 0.0 # (float) use dropout regularization (classify train only) @@ -84,7 +86,7 @@ int8: False # (bool) CoreML/TF INT8 quantization dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes simplify: True # (bool) ONNX: simplify model using `onnxslim` opset: # (int, optional) ONNX: opset version -workspace: 4 # (int) TensorRT: workspace size (GB) +workspace: None # (float, optional) TensorRT: workspace size (GiB), `None` will let TensorRT auto-allocate memory nms: False # (bool) CoreML: add NMS separate_outputs: False # export model with 6 outputs - no concatenation export_hw_optimized: False # optimize c2f block for faster inference on some hardware @@ -105,7 +107,6 @@ cls: 0.5 # (float) cls loss gain (scale with pixels) dfl: 1.5 # (float) dfl loss gain pose: 12.0 # (float) pose loss gain kobj: 1.0 # (float) keypoint obj loss gain -label_smoothing: 0.0 # (float) label smoothing (fraction) nbs: 64 # (int) nominal batch size hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction) hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction) diff --git a/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml new file mode 100644 index 00000000000..e2fbcfac106 --- /dev/null +++ b/ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml @@ -0,0 +1,17 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11-cls image classification model with ResNet18 backbone +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/classify + +# Parameters +nc: 10 # number of classes + +# ResNet18 backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, TorchVision, [512, resnet18, DEFAULT, True, 2]] # truncate two layers from the end + +# YOLO11n head +head: + - [-1, 1, Classify, [nc]] # Classify diff --git a/ultralytics/cfg/models/11/yolo11-cls.yaml b/ultralytics/cfg/models/11/yolo11-cls.yaml index ea21e7922f8..7a6457c6d6d 100644 --- a/ultralytics/cfg/models/11/yolo11-cls.yaml +++ b/ultralytics/cfg/models/11/yolo11-cls.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLO11-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11-cls image classification model +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/classify # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/11/yolo11-obb.yaml b/ultralytics/cfg/models/11/yolo11-obb.yaml index 5540ed753d5..8625c7cfdac 100644 --- a/ultralytics/cfg/models/11/yolo11-obb.yaml +++ b/ultralytics/cfg/models/11/yolo11-obb.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLO11 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/obb +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11-obb Oriented Bounding Boxes (OBB) model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/obb # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/11/yolo11-pose.yaml b/ultralytics/cfg/models/11/yolo11-pose.yaml index a744a33b6be..7470edac2fa 100644 --- a/ultralytics/cfg/models/11/yolo11-pose.yaml +++ b/ultralytics/cfg/models/11/yolo11-pose.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLO11-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11-pose keypoints/pose estimation model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/pose # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/11/yolo11-seg.yaml b/ultralytics/cfg/models/11/yolo11-seg.yaml index 0f02d96c063..a569f4af84d 100644 --- a/ultralytics/cfg/models/11/yolo11-seg.yaml +++ b/ultralytics/cfg/models/11/yolo11-seg.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLO11-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11-seg instance segmentation model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/segment # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/11/yolo11.yaml b/ultralytics/cfg/models/11/yolo11.yaml index 8d06a129912..409465a1bb7 100644 --- a/ultralytics/cfg/models/11/yolo11.yaml +++ b/ultralytics/cfg/models/11/yolo11.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLO11 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo11 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/README.md b/ultralytics/cfg/models/README.md index bcaf8deda41..68a9238384e 100644 --- a/ultralytics/cfg/models/README.md +++ b/ultralytics/cfg/models/README.md @@ -11,8 +11,8 @@ To get started, simply browse through the models in this directory and find one Model `*.yaml` files may be used directly in the [Command Line Interface (CLI)](https://docs.ultralytics.com/usage/cli/) with a `yolo` command: ```bash -# Train a YOLOv8n model using the coco8 dataset for 100 epochs -yolo task=detect mode=train model=yolov8n.yaml data=coco8.yaml epochs=100 +# Train a YOLO11n model using the coco8 dataset for 100 epochs +yolo task=detect mode=train model=yolo11n.yaml data=coco8.yaml epochs=100 ``` They may also be used directly in a Python environment, and accept the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above: @@ -20,7 +20,7 @@ They may also be used directly in a Python environment, and accept the same [arg ```python from ultralytics import YOLO -# Initialize a YOLOv8n model from a YAML configuration file +# Initialize a YOLO11n model from a YAML configuration file model = YOLO("model.yaml") # If a pre-trained model is available, use it instead diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml index c6eb0b3eaf1..d8d6b4f410b 100644 --- a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +++ b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics RT-DETR-l hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/rtdetr +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml index a68bb5ddae1..b13e94512bd 100644 --- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# RT-DETR-ResNet101 object detection model with P3-P5 outputs. +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics RT-DETR-ResNet101 hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/rtdetr +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml index 7145910417a..8172ad4ed4c 100644 --- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# RT-DETR-ResNet50 object detection model with P3-P5 outputs. +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics RT-DETR-ResNet50 hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/rtdetr +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml index 0e819b0a06d..f9c4a19c8ab 100644 --- a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +++ b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics RT-DETR-x hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/rtdetr +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v10/yolov10b.yaml b/ultralytics/cfg/models/v10/yolov10b.yaml index 57e0462959b..750379128cc 100644 --- a/ultralytics/cfg/models/v10/yolov10b.yaml +++ b/ultralytics/cfg/models/v10/yolov10b.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv10b object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov10 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v10/yolov10l.yaml b/ultralytics/cfg/models/v10/yolov10l.yaml index add507aed3b..1dedd752e23 100644 --- a/ultralytics/cfg/models/v10/yolov10l.yaml +++ b/ultralytics/cfg/models/v10/yolov10l.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv10l object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov10 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v10/yolov10m.yaml b/ultralytics/cfg/models/v10/yolov10m.yaml index 2712dfab008..6ba4020b330 100644 --- a/ultralytics/cfg/models/v10/yolov10m.yaml +++ b/ultralytics/cfg/models/v10/yolov10m.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv10m object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov10 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v10/yolov10n.yaml b/ultralytics/cfg/models/v10/yolov10n.yaml index 094f1384a00..a9aa7018950 100644 --- a/ultralytics/cfg/models/v10/yolov10n.yaml +++ b/ultralytics/cfg/models/v10/yolov10n.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv10n object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov10 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v10/yolov10s.yaml b/ultralytics/cfg/models/v10/yolov10s.yaml index b80f02bb857..dbb678b277d 100644 --- a/ultralytics/cfg/models/v10/yolov10s.yaml +++ b/ultralytics/cfg/models/v10/yolov10s.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv10s object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov10 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v10/yolov10x.yaml b/ultralytics/cfg/models/v10/yolov10x.yaml index 2e5d98a97ce..57482133863 100644 --- a/ultralytics/cfg/models/v10/yolov10x.yaml +++ b/ultralytics/cfg/models/v10/yolov10x.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv10 object detection model. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv10x object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov10 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v3/yolov3-spp.yaml b/ultralytics/cfg/models/v3/yolov3-spp.yaml index 6724f4e9f12..6aef25ab748 100644 --- a/ultralytics/cfg/models/v3/yolov3-spp.yaml +++ b/ultralytics/cfg/models/v3/yolov3-spp.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv3-SPP object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov3 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v3/yolov3-tiny.yaml b/ultralytics/cfg/models/v3/yolov3-tiny.yaml index f3fe257806b..91a0bb03f7d 100644 --- a/ultralytics/cfg/models/v3/yolov3-tiny.yaml +++ b/ultralytics/cfg/models/v3/yolov3-tiny.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv3-tiiny object detection model with P4/16 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov3 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v3/yolov3.yaml b/ultralytics/cfg/models/v3/yolov3.yaml index 716866a97cc..95c99de52be 100644 --- a/ultralytics/cfg/models/v3/yolov3.yaml +++ b/ultralytics/cfg/models/v3/yolov3.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv3 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov3 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v5/yolov5-p6.yaml b/ultralytics/cfg/models/v5/yolov5-p6.yaml index 2fd3ac71baa..376d1aba90c 100644 --- a/ultralytics/cfg/models/v5/yolov5-p6.yaml +++ b/ultralytics/cfg/models/v5/yolov5-p6.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv5 object detection model with P3/8 - P6/64 outputs +# Model docs: https://docs.ultralytics.com/models/yolov5 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v5/yolov5-relu6.yaml b/ultralytics/cfg/models/v5/yolov5-relu6.yaml index e8d811d431f..175ff7ef723 100644 --- a/ultralytics/cfg/models/v5/yolov5-relu6.yaml +++ b/ultralytics/cfg/models/v5/yolov5-relu6.yaml @@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call l: [1.00, 1.00, 1024] x: [1.33, 1.25, 1024] -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv5 v6.0 backbone backbone: diff --git a/ultralytics/cfg/models/v5/yolov5.yaml b/ultralytics/cfg/models/v5/yolov5.yaml index 8fdc79ebf73..76a4749ae4f 100644 --- a/ultralytics/cfg/models/v5/yolov5.yaml +++ b/ultralytics/cfg/models/v5/yolov5.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv5 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov5 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v6/yolov6.yaml b/ultralytics/cfg/models/v6/yolov6.yaml index f39dfb49274..4a45224e570 100644 --- a/ultralytics/cfg/models/v6/yolov6.yaml +++ b/ultralytics/cfg/models/v6/yolov6.yaml @@ -1,9 +1,12 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Meituan YOLOv6 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov6 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes -activation: nn.ReLU() # (optional) model default activation function +activation: torch.nn.ReLU() # (optional) model default activation function scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n' # [depth, width, max_channels] n: [0.33, 0.25, 1024] diff --git a/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml b/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml index fcf111497df..c45d0f30da3 100644 --- a/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml +++ b/ultralytics/cfg/models/v8/relu6-yolov8-cls.yaml @@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will c l: [1.00, 1.00, 1024] x: [1.00, 1.25, 1024] -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv8.0n backbone backbone: diff --git a/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml b/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml index 1aaca1caec7..28f7d01753b 100644 --- a/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml +++ b/ultralytics/cfg/models/v8/relu6-yolov8-regress.yaml @@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-regress.yaml' wi l: [1.00, 1.00, 1024] x: [1.00, 1.25, 1024] -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv8.0n backbone backbone: diff --git a/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml b/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml index 892fa644549..8cc2cb52730 100644 --- a/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml +++ b/ultralytics/cfg/models/v8/relu6-yolov8-regress6.yaml @@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-regress6.yaml' w l: [1.00, 1.00, 1024] x: [1.00, 1.25, 1024] -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv8.0n backbone backbone: diff --git a/ultralytics/cfg/models/v8/relu6-yolov8.yaml b/ultralytics/cfg/models/v8/relu6-yolov8.yaml index 397525a2e4e..aa0e67153dc 100644 --- a/ultralytics/cfg/models/v8/relu6-yolov8.yaml +++ b/ultralytics/cfg/models/v8/relu6-yolov8.yaml @@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv8.0n backbone backbone: diff --git a/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml b/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml index 6867f88848f..44cc00ebf22 100644 --- a/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-cls image classification model with ResNet101 backbone +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/classify # Parameters nc: 1000 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml b/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml index 8ffd111f02f..1d05e0753fc 100644 --- a/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-cls image classification model with ResNet50 backbone +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/classify # Parameters nc: 1000 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-cls.yaml b/ultralytics/cfg/models/v8/yolov8-cls.yaml index 180fc65a59f..e346e5e1b76 100644 --- a/ultralytics/cfg/models/v8/yolov8-cls.yaml +++ b/ultralytics/cfg/models/v8/yolov8-cls.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-cls image classification model with YOLO backbone +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/classify # Parameters nc: 1000 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml b/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml index aee209349c9..a98f23837bf 100644 --- a/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +++ b/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml @@ -1,5 +1,9 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8 object detection model with P2/4 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/detect +# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2 # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml b/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml index b35f4cdb6a9..956c2f0ad66 100644 --- a/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +++ b/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml @@ -1,5 +1,9 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8 object detection model with P3/8 - P6/64 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/detect +# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2 # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-ghost.yaml b/ultralytics/cfg/models/v8/yolov8-ghost.yaml index adc180232b4..5888fb39bd0 100644 --- a/ultralytics/cfg/models/v8/yolov8-ghost.yaml +++ b/ultralytics/cfg/models/v8/yolov8-ghost.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/detect # Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2 # Parameters diff --git a/ultralytics/cfg/models/v8/yolov8-obb.yaml b/ultralytics/cfg/models/v8/yolov8-obb.yaml index 7a7f60caef1..909324c5bec 100644 --- a/ultralytics/cfg/models/v8/yolov8-obb.yaml +++ b/ultralytics/cfg/models/v8/yolov8-obb.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-obb Oriented Bounding Boxes (OBB) model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/obb # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-p2.yaml b/ultralytics/cfg/models/v8/yolov8-p2.yaml index 5392774bb55..676bc8348c4 100644 --- a/ultralytics/cfg/models/v8/yolov8-p2.yaml +++ b/ultralytics/cfg/models/v8/yolov8-p2.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8 object detection model with P2/4 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-p6.yaml b/ultralytics/cfg/models/v8/yolov8-p6.yaml index 6a76612a2ae..3fde34981f8 100644 --- a/ultralytics/cfg/models/v8/yolov8-p6.yaml +++ b/ultralytics/cfg/models/v8/yolov8-p6.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8 object detection model with P3/8 - P6/64 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml index 60007ace158..447a21aab07 100644 --- a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +++ b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-pose keypoints/pose estimation model with P3/8 - P6/64 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/pose # Parameters nc: 1 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml b/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml index 90a41181b1d..74617dcc2ac 100644 --- a/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml +++ b/ultralytics/cfg/models/v8/yolov8-pose-relu6.yaml @@ -12,7 +12,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will l: [1.00, 1.00, 512] x: [1.00, 1.25, 512] -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv8.0n backbone backbone: diff --git a/ultralytics/cfg/models/v8/yolov8-pose.yaml b/ultralytics/cfg/models/v8/yolov8-pose.yaml index 60388ef595a..c22bc435b57 100644 --- a/ultralytics/cfg/models/v8/yolov8-pose.yaml +++ b/ultralytics/cfg/models/v8/yolov8-pose.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-pose keypoints/pose estimation model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/pose # Parameters nc: 1 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-relu6.yaml b/ultralytics/cfg/models/v8/yolov8-relu6.yaml index e11c4a05e48..76c85284862 100644 --- a/ultralytics/cfg/models/v8/yolov8-relu6.yaml +++ b/ultralytics/cfg/models/v8/yolov8-relu6.yaml @@ -11,7 +11,7 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs -activation: nn.ReLU6() +activation: torch.nn.ReLU6() # YOLOv8.0n backbone backbone: diff --git a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml index 27b790b1074..50ec129ac18 100644 --- a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +++ b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-RTDETR hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/rtdetr +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml index 78c0444c8b6..4c7ba9bf4dd 100644 --- a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +++ b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-seg instance segmentation model with P3/8 - P6/64 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/segment # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-seg.yaml b/ultralytics/cfg/models/v8/yolov8-seg.yaml index 700b7951de5..52b1c7e9aed 100644 --- a/ultralytics/cfg/models/v8/yolov8-seg.yaml +++ b/ultralytics/cfg/models/v8/yolov8-seg.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-seg instance segmentation model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/segment # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-world.yaml b/ultralytics/cfg/models/v8/yolov8-world.yaml index c21a7f00205..3c92e824ce6 100644 --- a/ultralytics/cfg/models/v8/yolov8-world.yaml +++ b/ultralytics/cfg/models/v8/yolov8-world.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-World hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo-world +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8-worldv2.yaml b/ultralytics/cfg/models/v8/yolov8-worldv2.yaml index 322b97d4b98..c6aaa277331 100644 --- a/ultralytics/cfg/models/v8/yolov8-worldv2.yaml +++ b/ultralytics/cfg/models/v8/yolov8-worldv2.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8-Worldv2 hybrid object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolo-world +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v8/yolov8.yaml b/ultralytics/cfg/models/v8/yolov8.yaml index b328e98a14a..c7b9938ec34 100644 --- a/ultralytics/cfg/models/v8/yolov8.yaml +++ b/ultralytics/cfg/models/v8/yolov8.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Ultralytics YOLOv8 object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov8 +# Task docs: https://docs.ultralytics.com/tasks/detect # Parameters nc: 80 # number of classes diff --git a/ultralytics/cfg/models/v9/yolov9c-seg.yaml b/ultralytics/cfg/models/v9/yolov9c-seg.yaml index 48b0ad7289d..14122cb8393 100644 --- a/ultralytics/cfg/models/v9/yolov9c-seg.yaml +++ b/ultralytics/cfg/models/v9/yolov9c-seg.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9c-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9c-seg instance segmentation model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/segment # 654 layers, 27897120 parameters, 159.4 GFLOPs # Parameters diff --git a/ultralytics/cfg/models/v9/yolov9c.yaml b/ultralytics/cfg/models/v9/yolov9c.yaml index 4ea8997d420..4fc1fcd13fd 100644 --- a/ultralytics/cfg/models/v9/yolov9c.yaml +++ b/ultralytics/cfg/models/v9/yolov9c.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9c object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9c object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/detect # 618 layers, 25590912 parameters, 104.0 GFLOPs # Parameters diff --git a/ultralytics/cfg/models/v9/yolov9e-seg.yaml b/ultralytics/cfg/models/v9/yolov9e-seg.yaml index b149b4e2e20..4361daac293 100644 --- a/ultralytics/cfg/models/v9/yolov9e-seg.yaml +++ b/ultralytics/cfg/models/v9/yolov9e-seg.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9e-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9e-seg instance segmentation model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/segment # 1261 layers, 60512800 parameters, 248.4 GFLOPs # Parameters diff --git a/ultralytics/cfg/models/v9/yolov9e.yaml b/ultralytics/cfg/models/v9/yolov9e.yaml index c90843e093e..bba5597d0cf 100644 --- a/ultralytics/cfg/models/v9/yolov9e.yaml +++ b/ultralytics/cfg/models/v9/yolov9e.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9e object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9e object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/detect # 1225 layers, 58206592 parameters, 193.0 GFLOPs # Parameters diff --git a/ultralytics/cfg/models/v9/yolov9m.yaml b/ultralytics/cfg/models/v9/yolov9m.yaml index fd2c49ecbd4..89bed65bebb 100644 --- a/ultralytics/cfg/models/v9/yolov9m.yaml +++ b/ultralytics/cfg/models/v9/yolov9m.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9m object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9m object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/detect # 603 layers, 20216160 parameters, 77.9 GFLOPs # Parameters diff --git a/ultralytics/cfg/models/v9/yolov9s.yaml b/ultralytics/cfg/models/v9/yolov9s.yaml index d86eb0d938d..28891f4cebc 100644 --- a/ultralytics/cfg/models/v9/yolov9s.yaml +++ b/ultralytics/cfg/models/v9/yolov9s.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9s object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9s object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/detect # 917 layers, 7318368 parameters, 27.6 GFLOPs # Parameters diff --git a/ultralytics/cfg/models/v9/yolov9t.yaml b/ultralytics/cfg/models/v9/yolov9t.yaml index 0ce5f95e74f..21a5bad86b9 100644 --- a/ultralytics/cfg/models/v9/yolov9t.yaml +++ b/ultralytics/cfg/models/v9/yolov9t.yaml @@ -1,5 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# YOLOv9t object detection model. For Usage examples see https://docs.ultralytics.com/models/yolov9 +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# YOLOv9t object detection model with P3/8 - P5/32 outputs +# Model docs: https://docs.ultralytics.com/models/yolov9 +# Task docs: https://docs.ultralytics.com/tasks/detect # 917 layers, 2128720 parameters, 8.5 GFLOPs # Parameters diff --git a/ultralytics/cfg/solutions/default.yaml b/ultralytics/cfg/solutions/default.yaml new file mode 100644 index 00000000000..a4afb49b324 --- /dev/null +++ b/ultralytics/cfg/solutions/default.yaml @@ -0,0 +1,24 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Global configuration YAML with settings and arguments for Ultralytics Solutions +# For documentation see https://docs.ultralytics.com/solutions/ + +# Object counting settings -------------------------------------------------------------------------------------------- +region: # list[tuple[int, int]] object counting, queue or speed estimation region points. +show_in: True # (bool) flag to display objects moving *into* the defined region +show_out: True # (bool) flag to display objects moving *out of* the defined region + +# Heatmaps settings ---------------------------------------------------------------------------------------------------- +colormap: # (int | str) colormap for heatmap, Only OPENCV supported colormaps can be used. + +# Workouts monitoring settings ----------------------------------------------------------------------------------------- +up_angle: 145.0 # (float) Workouts up_angle for counts, 145.0 is default value. +down_angle: 90 # (float) Workouts down_angle for counts, 90 is default value. Y +kpts: [6, 8, 10] # (list[int]) keypoints for workouts monitoring, i.e. for push-ups kpts have values of [6, 8, 10]. + +# Analytics settings --------------------------------------------------------------------------------------------------- +analytics_type: "line" # (str) analytics type i.e "line", "pie", "bar" or "area" charts. +json_file: # (str) parking system regions file path. + +# Security alarm system settings --------------------------------------------------------------------------------------- +records: 5 # (int) Total detections count to send an email about security diff --git a/ultralytics/cfg/trackers/botsort.yaml b/ultralytics/cfg/trackers/botsort.yaml index 01cebb64789..aedcee4860f 100644 --- a/ultralytics/cfg/trackers/botsort.yaml +++ b/ultralytics/cfg/trackers/botsort.yaml @@ -1,10 +1,13 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Default Ultralytics settings for BoT-SORT tracker when using mode="track" +# For documentation and examples see https://docs.ultralytics.com/modes/track/ +# For BoT-SORT source code see https://github.com/NirAharon/BoT-SORT tracker_type: botsort # tracker type, ['botsort', 'bytetrack'] -track_high_thresh: 0.5 # threshold for the first association +track_high_thresh: 0.25 # threshold for the first association track_low_thresh: 0.1 # threshold for the second association -new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks +new_track_thresh: 0.25 # threshold for init new track if the detection does not match any tracks track_buffer: 30 # buffer to calculate the time when to remove tracks match_thresh: 0.8 # threshold for matching tracks fuse_score: True # Whether to fuse confidence scores with the iou distances before matching diff --git a/ultralytics/cfg/trackers/bytetrack.yaml b/ultralytics/cfg/trackers/bytetrack.yaml index 49ab3f697bb..62071a3022d 100644 --- a/ultralytics/cfg/trackers/bytetrack.yaml +++ b/ultralytics/cfg/trackers/bytetrack.yaml @@ -1,10 +1,13 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +# Default Ultralytics settings for ByteTrack tracker when using mode="track" +# For documentation and examples see https://docs.ultralytics.com/modes/track/ +# For ByteTrack source code see https://github.com/ifzhang/ByteTrack tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack'] -track_high_thresh: 0.5 # threshold for the first association +track_high_thresh: 0.25 # threshold for the first association track_low_thresh: 0.1 # threshold for the second association -new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks +new_track_thresh: 0.25 # threshold for init new track if the detection does not match any tracks track_buffer: 30 # buffer to calculate the time when to remove tracks match_thresh: 0.8 # threshold for matching tracks fuse_score: True # Whether to fuse confidence scores with the iou distances before matching diff --git a/ultralytics/data/__init__.py b/ultralytics/data/__init__.py index daab4875254..7e88dc9982d 100644 --- a/ultralytics/data/__init__.py +++ b/ultralytics/data/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .base import BaseDataset from .build import build_dataloader, build_grounding, build_yolo_dataset, load_inference_source diff --git a/ultralytics/data/annotator.py b/ultralytics/data/annotator.py index 5cb0058dcb1..982e5de5ab1 100644 --- a/ultralytics/data/annotator.py +++ b/ultralytics/data/annotator.py @@ -1,11 +1,22 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from pathlib import Path from ultralytics import SAM, YOLO -def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None): +def auto_annotate( + data, + det_model="yolo11x.pt", + sam_model="sam_b.pt", + device="", + conf=0.25, + iou=0.45, + imgsz=640, + max_det=300, + classes=None, + output_dir=None, +): """ Automatically annotates images using a YOLO object detection model and a SAM segmentation model. @@ -17,11 +28,16 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", det_model (str): Path or name of the pre-trained YOLO detection model. sam_model (str): Path or name of the pre-trained SAM segmentation model. device (str): Device to run the models on (e.g., 'cpu', 'cuda', '0'). + conf (float): Confidence threshold for detection model; default is 0.25. + iou (float): IoU threshold for filtering overlapping boxes in detection results; default is 0.45. + imgsz (int): Input image resize dimension; default is 640. + max_det (int): Limits detections per image to control outputs in dense scenes. + classes (list): Filters predictions to specified class IDs, returning only relevant detections. output_dir (str | None): Directory to save the annotated results. If None, a default directory is created. Examples: >>> from ultralytics.data.annotator import auto_annotate - >>> auto_annotate(data="ultralytics/assets", det_model="yolov8n.pt", sam_model="mobile_sam.pt") + >>> auto_annotate(data="ultralytics/assets", det_model="yolo11n.pt", sam_model="mobile_sam.pt") Notes: - The function creates a new directory for output if not specified. @@ -36,7 +52,9 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir = data.parent / f"{data.stem}_auto_annotate_labels" Path(output_dir).mkdir(exist_ok=True, parents=True) - det_results = det_model(data, stream=True, device=device) + det_results = det_model( + data, stream=True, device=device, conf=conf, iou=iou, imgsz=imgsz, max_det=max_det, classes=classes + ) for result in det_results: class_ids = result.boxes.cls.int().tolist() # noqa diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index 49bdc92235b..1ab14a647eb 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import math import random @@ -271,9 +271,9 @@ def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None: """ assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}" if isinstance(index, list): - assert isinstance( - value, list - ), f"The indices should be the same type as values, but got {type(index)} and {type(value)}" + assert isinstance(value, list), ( + f"The indices should be the same type as values, but got {type(index)} and {type(value)}" + ) if isinstance(index, int): index, value = [index], [value] for i, v in zip(index, value): @@ -441,7 +441,8 @@ def get_indexes(self): """ raise NotImplementedError - def _update_label_text(self, labels): + @staticmethod + def _update_label_text(labels): """ Updates label text and class IDs for mixed labels in image augmentation. @@ -641,7 +642,7 @@ def _mosaic3(self, labels): c = s - w, s + h0 - h, s, s + h0 padw, padh = c[:2] - x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coordinates img3[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :] # img3[ymin:ymax, xmin:xmax] # hp, wp = h, w # height, width previous for next iteration @@ -770,7 +771,7 @@ def _mosaic9(self, labels): c = s - w, s + h0 - hp - h, s, s + h0 - hp padw, padh = c[:2] - x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coordinates # Image img9[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :] # img9[ymin:ymax, xmin:xmax] @@ -1259,7 +1260,8 @@ def __call__(self, labels): labels["resized_shape"] = img.shape[:2] return labels - def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): + @staticmethod + def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): """ Compute candidate boxes for further processing based on size and aspect ratio criteria. @@ -1281,7 +1283,7 @@ def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e- eps (float): Small epsilon value to prevent division by zero. Returns: - (numpy.ndarray): Boolean array of shape (n,) indicating which boxes are candidates. + (numpy.ndarray): Boolean array of shape (n) indicating which boxes are candidates. True values correspond to boxes that meet all criteria. Examples: @@ -1318,7 +1320,7 @@ class RandomHSV: >>> augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5) >>> image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) >>> labels = {"img": image} - >>> augmented_labels = augmenter(labels) + >>> augmenter(labels) >>> augmented_image = augmented_labels["img"] """ @@ -1335,7 +1337,7 @@ def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: Examples: >>> hsv_aug = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5) - >>> augmented_image = hsv_aug(image) + >>> hsv_aug(image) """ self.hgain = hgain self.sgain = sgain @@ -1417,7 +1419,7 @@ def __init__(self, p=0.5, direction="horizontal", flip_idx=None) -> None: Examples: >>> flip = RandomFlip(p=0.5, direction="horizontal") - >>> flip = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4]) + >>> flip_with_idx = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4]) """ assert direction in {"horizontal", "vertical"}, f"Support direction `horizontal` or `vertical`, got {direction}" assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}." @@ -1591,14 +1593,15 @@ def __call__(self, labels=None, image=None): labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluation if len(labels): - labels = self._update_labels(labels, ratio, dw, dh) + labels = self._update_labels(labels, ratio, left, top) labels["img"] = img labels["resized_shape"] = new_shape return labels else: return img - def _update_labels(self, labels, ratio, padw, padh): + @staticmethod + def _update_labels(labels, ratio, padw, padh): """ Updates labels after applying letterboxing to an image. @@ -1847,7 +1850,7 @@ def __init__(self, p=1.0): A.CLAHE(p=0.01), A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), - A.ImageCompression(quality_lower=75, p=0.0), + A.ImageCompression(quality_range=(75, 100), p=0.0), ] # Compose transforms @@ -1857,6 +1860,9 @@ def __init__(self, p=1.0): if self.contains_spatial else A.Compose(T) ) + if hasattr(self.transform, "set_random_seed"): + # Required for deterministic transforms in albumentations>=1.4.21 + self.transform.set_random_seed(torch.initial_seed()) LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p)) except ImportError: # package not installed, skip pass @@ -2019,7 +2025,7 @@ def __call__(self, labels): Returns: (Dict): A dictionary with formatted data, including: - 'img': Formatted image tensor. - - 'cls': Class labels tensor. + - 'cls': Class label's tensor. - 'bboxes': Bounding boxes tensor in the specified format. - 'masks': Instance masks tensor (if return_mask is True). - 'keypoints': Keypoints tensor (if return_keypoint is True). @@ -2111,10 +2117,9 @@ def _format_segments(self, instances, cls, w, h): h (int): Height of the image. Returns: - (tuple): Tuple containing: - masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True. - instances (Instances): Updated instances object with sorted segments if mask_overlap is True. - cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True. + masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True. + instances (Instances): Updated instances object with sorted segments if mask_overlap is True. + cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True. Notes: - If self.mask_overlap is True, masks are overlapped and sorted by area. @@ -2280,7 +2285,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False): Args: dataset (Dataset): The dataset object containing image data and annotations. imgsz (int): The target image size for resizing. - hyp (Dict): A dictionary of hyperparameters controlling various aspects of the transformations. + hyp (Namespace): A dictionary of hyperparameters controlling various aspects of the transformations. stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing. Returns: @@ -2288,8 +2293,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False): Examples: >>> from ultralytics.data.dataset import YOLODataset + >>> from ultralytics.utils import IterableSimpleNamespace >>> dataset = YOLODataset(img_path="path/to/images", imgsz=640) - >>> hyp = {"mosaic": 1.0, "copy_paste": 0.5, "degrees": 10.0, "translate": 0.2, "scale": 0.9} + >>> hyp = IterableSimpleNamespace(mosaic=1.0, copy_paste=0.5, degrees=10.0, translate=0.2, scale=0.9) >>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp) >>> augmented_data = transforms(dataset[0]) """ diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py index f18c2d54dce..446b4ecf88b 100644 --- a/ultralytics/data/base.py +++ b/ultralytics/data/base.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import glob import math @@ -90,13 +90,15 @@ def __init__( self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files] self.cache = cache.lower() if isinstance(cache, str) else "ram" if cache is True else None - if (self.cache == "ram" and self.check_cache_ram()) or self.cache == "disk": - if self.cache == "ram" and hyp.deterministic: + if self.cache == "ram" and self.check_cache_ram(): + if hyp.deterministic: LOGGER.warning( "WARNING โš ๏ธ cache='ram' may produce non-deterministic training results. " "Consider cache='disk' as a deterministic alternative if your disk space allows." ) self.cache_images() + elif self.cache == "disk" and self.check_cache_disk(): + self.cache_images() # Transforms self.transforms = self.build_transforms(hyp=hyp) @@ -206,25 +208,55 @@ def cache_images_to_disk(self, i): if not f.exists(): np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False) + def check_cache_disk(self, safety_margin=0.5): + """Check image caching requirements vs available disk space.""" + import shutil + + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + n = min(self.ni, 30) # extrapolate from 30 random images + for _ in range(n): + im_file = random.choice(self.im_files) + im = cv2.imread(im_file) + if im is None: + continue + b += im.nbytes + if not os.access(Path(im_file).parent, os.W_OK): + self.cache = None + LOGGER.info(f"{self.prefix}Skipping caching images to disk, directory not writeable โš ๏ธ") + return False + disk_required = b * self.ni / n * (1 + safety_margin) # bytes required to cache dataset to disk + total, used, free = shutil.disk_usage(Path(self.im_files[0]).parent) + if disk_required > free: + self.cache = None + LOGGER.info( + f"{self.prefix}{disk_required / gb:.1f}GB disk space required, " + f"with {int(safety_margin * 100)}% safety margin but only " + f"{free / gb:.1f}/{total / gb:.1f}GB free, not caching images to disk โš ๏ธ" + ) + return False + return True + def check_cache_ram(self, safety_margin=0.5): """Check image caching requirements vs available memory.""" b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes n = min(self.ni, 30) # extrapolate from 30 random images for _ in range(n): im = cv2.imread(random.choice(self.im_files)) # sample image + if im is None: + continue ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio b += im.nbytes * ratio**2 mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM mem = psutil.virtual_memory() - success = mem_required < mem.available # to cache or not to cache, that is the question - if not success: + if mem_required > mem.available: self.cache = None LOGGER.info( f"{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images " f"with {int(safety_margin * 100)}% safety margin but only " f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, not caching images โš ๏ธ" ) - return success + return False + return True def set_rectangle(self): """Sets the shape of bounding boxes for YOLO detections as rectangles.""" diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py index df3425037cd..468238308b7 100644 --- a/ultralytics/data/build.py +++ b/ultralytics/data/build.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import os import random @@ -47,6 +47,18 @@ def __iter__(self): for _ in range(len(self)): yield next(self.iterator) + def __del__(self): + """Ensure that workers are terminated.""" + try: + if not hasattr(self.iterator, "_workers"): + return + for w in self.iterator._workers: # force terminate + if w.is_alive(): + w.terminate() + self.iterator._shutdown_workers() # cleanup + except Exception: + pass + def reset(self): """ Reset iterator. diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index 03dbf0ade1e..05a316b4858 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -1,13 +1,18 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import json +import random +import shutil from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import cv2 import numpy as np +from PIL import Image -from ultralytics.utils import LOGGER, TQDM +from ultralytics.utils import DATASETS_DIR, LOGGER, NUM_THREADS, TQDM +from ultralytics.utils.downloads import download from ultralytics.utils.files import increment_path @@ -236,8 +241,10 @@ def convert_coco( ```python from ultralytics.data.converter import convert_coco - convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=True) - convert_coco("../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True) + convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False) + convert_coco( + "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True + ) ``` Output: @@ -261,11 +268,11 @@ def convert_coco( # since LVIS val set contains images from COCO 2017 train in addition to the COCO 2017 val split. (fn / "train2017").mkdir(parents=True, exist_ok=True) (fn / "val2017").mkdir(parents=True, exist_ok=True) - with open(json_file) as f: + with open(json_file, encoding="utf-8") as f: data = json.load(f) # Create image dict - images = {f'{x["id"]:d}': x for x in data["images"]} + images = {f"{x['id']:d}": x for x in data["images"]} # Create image-annotations dict imgToAnns = defaultdict(list) for ann in data["annotations"]: @@ -372,7 +379,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes): """ pixel_to_class_mapping = {i + 1: i for i in range(classes)} for mask_path in Path(masks_dir).iterdir(): - if mask_path.suffix == ".png": + if mask_path.suffix in {".png", ".jpg"}: mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) # Read the mask image in grayscale img_height, img_width = mask.shape # Get image dimensions LOGGER.info(f"Processing {mask_path} imgsz = {img_height} x {img_width}") @@ -572,7 +579,7 @@ def merge_multi_segment(segments): return s -def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): +def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None): """ Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) in YOLO format. Generates segmentation data using SAM auto-annotator as needed. @@ -582,21 +589,20 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): save_dir (str | Path): Path to save the generated labels, labels will be saved into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None. sam_model (str): Segmentation model to use for intermediate segmentation data; optional. + device (int | str): The specific device to run SAM models. Default: None. Notes: The input directory structure assumed for dataset: - im_dir โ”œโ”€ 001.jpg - โ”œโ”€ .. + โ”œโ”€ ... โ””โ”€ NNN.jpg - labels โ”œโ”€ 001.txt - โ”œโ”€ .. + โ”œโ”€ ... โ””โ”€ NNN.txt """ - from tqdm import tqdm - from ultralytics import SAM from ultralytics.data import YOLODataset from ultralytics.utils import LOGGER @@ -610,7 +616,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): LOGGER.info("Detection labels detected, generating segment labels by SAM model!") sam_model = SAM(sam_model) - for label in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"): + for label in TQDM(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"): h, w = label["shape"] boxes = label["bboxes"] if len(boxes) == 0: # skip empty labels @@ -618,7 +624,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): boxes[:, [0, 2]] *= w boxes[:, [1, 3]] *= h im = cv2.imread(label["im_file"]) - sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False) + sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False, device=device) label["segments"] = sam_results[0].masks.xyn save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment" @@ -629,9 +635,68 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"): txt_file = save_dir / lb_name cls = label["cls"] for i, s in enumerate(label["segments"]): + if len(s) == 0: + continue line = (int(cls[i]), *s.reshape(-1)) texts.append(("%g " * len(line)).rstrip() % line) - if texts: - with open(txt_file, "a") as f: - f.writelines(text + "\n" for text in texts) + with open(txt_file, "a") as f: + f.writelines(text + "\n" for text in texts) LOGGER.info(f"Generated segment labels saved in {save_dir}") + + +def create_synthetic_coco_dataset(): + """ + Creates a synthetic COCO dataset with random images based on filenames from label lists. + + This function downloads COCO labels, reads image filenames from label list files, + creates synthetic images for train2017 and val2017 subsets, and organizes + them in the COCO dataset structure. It uses multithreading to generate images efficiently. + + Examples: + >>> from ultralytics.data.converter import create_synthetic_coco_dataset + >>> create_synthetic_coco_dataset() + + Notes: + - Requires internet connection to download label files. + - Generates random RGB images of varying sizes (480x480 to 640x640 pixels). + - Existing test2017 directory is removed as it's not needed. + - Reads image filenames from train2017.txt and val2017.txt files. + """ + + def create_synthetic_image(image_file): + """Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes.""" + if not image_file.exists(): + size = (random.randint(480, 640), random.randint(480, 640)) + Image.new( + "RGB", + size=size, + color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), + ).save(image_file) + + # Download labels + dir = DATASETS_DIR / "coco" + url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/" + label_zip = "coco2017labels-segments.zip" + download([url + label_zip], dir=dir.parent) + + # Create synthetic images + shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) # Remove test2017 directory as not needed + with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: + for subset in ["train2017", "val2017"]: + subset_dir = dir / "images" / subset + subset_dir.mkdir(parents=True, exist_ok=True) + + # Read image filenames from label list file + label_list_file = dir / f"{subset}.txt" + if label_list_file.exists(): + with open(label_list_file) as f: + image_files = [dir / line.strip() for line in f] + + # Submit all tasks + futures = [executor.submit(create_synthetic_image, image_file) for image_file in image_files] + for _ in TQDM(as_completed(futures), total=len(futures), desc=f"Generating images for {subset}"): + pass # The actual work is done in the background + else: + print(f"Warning: Labels file {label_list_file} does not exist. Skipping image creation for {subset}.") + + print("Synthetic COCO dataset created successfully.") diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index 01cffd4a20f..0db566c01ed 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import contextlib import json from collections import defaultdict from itertools import repeat @@ -70,7 +69,7 @@ def cache_labels(self, path=Path("./labels.cache")): Cache dataset labels, check images and read shapes. Args: - path (Path): Path where to save the cache file. Default is Path('./labels.cache'). + path (Path): Path where to save the cache file. Default is Path("./labels.cache"). Returns: (dict): labels. @@ -220,8 +219,10 @@ def update_labels_info(self, label): # NOTE: do NOT resample oriented boxes segment_resamples = 100 if self.use_obb else 1000 if len(segments) > 0: - # list[np.array(1000, 2)] * num_samples - # (N, 1000, 2) + # make sure segments interpolate correctly if original length is greater than segment_resamples + max_len = max(len(s) for s in segments) + segment_resamples = (max_len + 1) if segment_resamples < max_len else segment_resamples + # list[np.array(segment_resamples, 2)] * num_samples segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0) else: segments = np.zeros((0, segment_resamples, 2), dtype=np.float32) @@ -299,7 +300,7 @@ def get_labels(self): LOGGER.info("Loading annotation file...") with open(self.json_file) as f: annotations = json.load(f) - images = {f'{x["id"]:d}': x for x in annotations["images"]} + images = {f"{x['id']:d}": x for x in annotations["images"]} img_to_anns = defaultdict(list) for ann in annotations["annotations"]: img_to_anns[ann["image_id"]].append(ann) @@ -323,7 +324,8 @@ def get_labels(self): if box[2] <= 0 or box[3] <= 0: continue - cat_name = " ".join([img["caption"][t[0] : t[1]] for t in ann["tokens_positive"]]) + caption = img["caption"] + cat_name = " ".join([caption[t[0] : t[1]] for t in ann["tokens_positive"]]) if cat_name not in cat2id: cat2id[cat_name] = len(cat2id) texts.append([cat_name]) @@ -484,7 +486,7 @@ def verify_images(self): desc = f"{self.prefix}Scanning {self.root}..." path = Path(self.root).with_suffix(".cache") # *.cache file path - with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError): + try: cache = load_dataset_cache_file(path) # attempt to load a *.cache file assert cache["version"] == DATASET_CACHE_VERSION # matches current version assert cache["hash"] == get_hash([x[0] for x in self.samples]) # identical hash @@ -496,27 +498,29 @@ def verify_images(self): LOGGER.info("\n".join(cache["msgs"])) # display warnings return samples - # Run scan if *.cache retrieval failed - nf, nc, msgs, samples, x = 0, 0, [], [], {} - with ThreadPool(NUM_THREADS) as pool: - results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix))) - pbar = TQDM(results, desc=desc, total=len(self.samples)) - for sample, nf_f, nc_f, msg in pbar: - if nf_f: - samples.append(sample) - if msg: - msgs.append(msg) - nf += nf_f - nc += nc_f - pbar.desc = f"{desc} {nf} images, {nc} corrupt" - pbar.close() - if msgs: - LOGGER.info("\n".join(msgs)) - x["hash"] = get_hash([x[0] for x in self.samples]) - x["results"] = nf, nc, len(samples), samples - x["msgs"] = msgs # warnings - save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) - return samples + except (FileNotFoundError, AssertionError, AttributeError): + # Run scan if *.cache retrieval failed + nf, nc, msgs, samples, x = 0, 0, [], [], {} + with ThreadPool(NUM_THREADS) as pool: + results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix))) + pbar = TQDM(results, desc=desc, total=len(self.samples)) + for sample, nf_f, nc_f, msg in pbar: + if nf_f: + samples.append(sample) + if msg: + msgs.append(msg) + nf += nf_f + nc += nc_f + pbar.desc = f"{desc} {nf} images, {nc} corrupt" + pbar.close() + if msgs: + LOGGER.info("\n".join(msgs)) + x["hash"] = get_hash([x[0] for x in self.samples]) + x["results"] = nf, nc, len(samples), samples + x["msgs"] = msgs # warnings + save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) + return samples + # Regression dataloaders ------------------------------------------------------------------------------------------- class RegressionDataset: @@ -648,4 +652,4 @@ def verify_images(self): x["results"] = nf, nc, len(samples), samples x["msgs"] = msgs # warnings save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION) - return samples \ No newline at end of file + return samples diff --git a/ultralytics/data/explorer/__init__.py b/ultralytics/data/explorer/__init__.py deleted file mode 100644 index ce594dc1fd5..00000000000 --- a/ultralytics/data/explorer/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -from .utils import plot_query_result - -__all__ = ["plot_query_result"] diff --git a/ultralytics/data/explorer/explorer.py b/ultralytics/data/explorer/explorer.py deleted file mode 100644 index 0407c1a288a..00000000000 --- a/ultralytics/data/explorer/explorer.py +++ /dev/null @@ -1,460 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -from io import BytesIO -from pathlib import Path -from typing import Any, List, Tuple, Union - -import cv2 -import numpy as np -import torch -from matplotlib import pyplot as plt -from PIL import Image -from tqdm import tqdm - -from ultralytics.data.augment import Format -from ultralytics.data.dataset import YOLODataset -from ultralytics.data.utils import check_det_dataset -from ultralytics.models.yolo.model import YOLO -from ultralytics.utils import LOGGER, USER_CONFIG_DIR, IterableSimpleNamespace, checks - -from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch - - -class ExplorerDataset(YOLODataset): - """Extends YOLODataset for advanced data exploration and manipulation in model training workflows.""" - - def __init__(self, *args, data: dict = None, **kwargs) -> None: - """Initializes the ExplorerDataset with the provided data arguments, extending the YOLODataset class.""" - super().__init__(*args, data=data, **kwargs) - - def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]: - """Loads 1 image from dataset index 'i' without any resize ops.""" - im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] - if im is None: # not cached in RAM - if fn.exists(): # load npy - im = np.load(fn) - else: # read image - im = cv2.imread(f) # BGR - if im is None: - raise FileNotFoundError(f"Image Not Found {f}") - h0, w0 = im.shape[:2] # orig hw - return im, (h0, w0), im.shape[:2] - - return self.ims[i], self.im_hw0[i], self.im_hw[i] - - def build_transforms(self, hyp: IterableSimpleNamespace = None): - """Creates transforms for dataset images without resizing.""" - return Format( - bbox_format="xyxy", - normalize=False, - return_mask=self.use_segments, - return_keypoint=self.use_keypoints, - batch_idx=True, - mask_ratio=hyp.mask_ratio, - mask_overlap=hyp.overlap_mask, - ) - - -class Explorer: - """Utility class for image embedding, table creation, and similarity querying using LanceDB and YOLO models.""" - - def __init__( - self, - data: Union[str, Path] = "coco128.yaml", - model: str = "yolov8n.pt", - uri: str = USER_CONFIG_DIR / "explorer", - ) -> None: - """Initializes the Explorer class with dataset path, model, and URI for database connection.""" - # Note duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181 - checks.check_requirements(["lancedb>=0.4.3", "duckdb<=0.9.2"]) - import lancedb - - self.connection = lancedb.connect(uri) - self.table_name = f"{Path(data).name.lower()}_{model.lower()}" - self.sim_idx_base_name = ( - f"{self.table_name}_sim_idx".lower() - ) # Use this name and append thres and top_k to reuse the table - self.model = YOLO(model) - self.data = data # None - self.choice_set = None - - self.table = None - self.progress = 0 - - def create_embeddings_table(self, force: bool = False, split: str = "train") -> None: - """ - Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it - already exists. Pass force=True to overwrite the existing table. - - Args: - force (bool): Whether to overwrite the existing table or not. Defaults to False. - split (str): Split of the dataset to use. Defaults to 'train'. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - ``` - """ - if self.table is not None and not force: - LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.") - return - if self.table_name in self.connection.table_names() and not force: - LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.") - self.table = self.connection.open_table(self.table_name) - self.progress = 1 - return - if self.data is None: - raise ValueError("Data must be provided to create embeddings table") - - data_info = check_det_dataset(self.data) - if split not in data_info: - raise ValueError( - f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}" - ) - - choice_set = data_info[split] - choice_set = choice_set if isinstance(choice_set, list) else [choice_set] - self.choice_set = choice_set - dataset = ExplorerDataset(img_path=choice_set, data=data_info, augment=False, cache=False, task=self.model.task) - - # Create the table schema - batch = dataset[0] - vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0] - table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite") - table.add( - self._yield_batches( - dataset, - data_info, - self.model, - exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"], - ) - ) - - self.table = table - - def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]): - """Generates batches of data for embedding, excluding specified keys.""" - for i in tqdm(range(len(dataset))): - self.progress = float(i + 1) / len(dataset) - batch = dataset[i] - for k in exclude_keys: - batch.pop(k, None) - batch = sanitize_batch(batch, data_info) - batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist() - yield [batch] - - def query( - self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25 - ) -> Any: # pyarrow.Table - """ - Query the table for similar images. Accepts a single image or a list of images. - - Args: - imgs (str or list): Path to the image or a list of paths to the images. - limit (int): Number of results to return. - - Returns: - (pyarrow.Table): An arrow table containing the results. Supports converting to: - - pandas dataframe: `result.to_pandas()` - - dict of lists: `result.to_pydict()` - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - similar = exp.query(img="https://ultralytics.com/images/zidane.jpg") - ``` - """ - if self.table is None: - raise ValueError("Table is not created. Please create the table first.") - if isinstance(imgs, str): - imgs = [imgs] - assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}" - embeds = self.model.embed(imgs) - # Get avg if multiple images are passed (len > 1) - embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy() - return self.table.search(embeds).limit(limit).to_arrow() - - def sql_query( - self, query: str, return_type: str = "pandas" - ) -> Union[Any, None]: # pandas.DataFrame or pyarrow.Table - """ - Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown. - - Args: - query (str): SQL query to run. - return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'. - - Returns: - (pyarrow.Table): An arrow table containing the results. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'" - result = exp.sql_query(query) - ``` - """ - assert return_type in { - "pandas", - "arrow", - }, f"Return type should be either `pandas` or `arrow`, but got {return_type}" - import duckdb - - if self.table is None: - raise ValueError("Table is not created. Please create the table first.") - - # Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this. - table = self.table.to_arrow() # noqa NOTE: Don't comment this. This line is used by DuckDB - if not query.startswith("SELECT") and not query.startswith("WHERE"): - raise ValueError( - f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE " - f"clause. found {query}" - ) - if query.startswith("WHERE"): - query = f"SELECT * FROM 'table' {query}" - LOGGER.info(f"Running query: {query}") - - rs = duckdb.sql(query) - if return_type == "arrow": - return rs.arrow() - elif return_type == "pandas": - return rs.df() - - def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image: - """ - Plot the results of a SQL-Like query on the table. - - Args: - query (str): SQL query to run. - labels (bool): Whether to plot the labels or not. - - Returns: - (PIL.Image): Image containing the plot. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'" - result = exp.plot_sql_query(query) - ``` - """ - result = self.sql_query(query, return_type="arrow") - if len(result) == 0: - LOGGER.info("No results found.") - return None - img = plot_query_result(result, plot_labels=labels) - return Image.fromarray(img) - - def get_similar( - self, - img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, - idx: Union[int, List[int]] = None, - limit: int = 25, - return_type: str = "pandas", - ) -> Any: # pandas.DataFrame or pyarrow.Table - """ - Query the table for similar images. Accepts a single image or a list of images. - - Args: - img (str or list): Path to the image or a list of paths to the images. - idx (int or list): Index of the image in the table or a list of indexes. - limit (int): Number of results to return. Defaults to 25. - return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'. - - Returns: - (pandas.DataFrame): A dataframe containing the results. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - similar = exp.get_similar(img="https://ultralytics.com/images/zidane.jpg") - ``` - """ - assert return_type in {"pandas", "arrow"}, f"Return type should be `pandas` or `arrow`, but got {return_type}" - img = self._check_imgs_or_idxs(img, idx) - similar = self.query(img, limit=limit) - - if return_type == "arrow": - return similar - elif return_type == "pandas": - return similar.to_pandas() - - def plot_similar( - self, - img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, - idx: Union[int, List[int]] = None, - limit: int = 25, - labels: bool = True, - ) -> Image.Image: - """ - Plot the similar images. Accepts images or indexes. - - Args: - img (str or list): Path to the image or a list of paths to the images. - idx (int or list): Index of the image in the table or a list of indexes. - labels (bool): Whether to plot the labels or not. - limit (int): Number of results to return. Defaults to 25. - - Returns: - (PIL.Image): Image containing the plot. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - similar = exp.plot_similar(img="https://ultralytics.com/images/zidane.jpg") - ``` - """ - similar = self.get_similar(img, idx, limit, return_type="arrow") - if len(similar) == 0: - LOGGER.info("No results found.") - return None - img = plot_query_result(similar, plot_labels=labels) - return Image.fromarray(img) - - def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Any: # pd.DataFrame - """ - Calculate the similarity index of all the images in the table. Here, the index will contain the data points that - are max_dist or closer to the image in the embedding space at a given index. - - Args: - max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2. - top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit. - vector search. Defaults: None. - force (bool): Whether to overwrite the existing similarity index or not. Defaults to True. - - Returns: - (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, - and columns include indices of similar images and their respective distances. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - sim_idx = exp.similarity_index() - ``` - """ - if self.table is None: - raise ValueError("Table is not created. Please create the table first.") - sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower() - if sim_idx_table_name in self.connection.table_names() and not force: - LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.") - return self.connection.open_table(sim_idx_table_name).to_pandas() - - if top_k and not (1.0 >= top_k >= 0.0): - raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}") - if max_dist < 0.0: - raise ValueError(f"max_dist must be greater than 0. Got {max_dist}") - - top_k = int(top_k * len(self.table)) if top_k else len(self.table) - top_k = max(top_k, 1) - features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict() - im_files = features["im_file"] - embeddings = features["vector"] - - sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite") - - def _yield_sim_idx(): - """Generates a dataframe with similarity indices and distances for images.""" - for i in tqdm(range(len(embeddings))): - sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}") - yield [ - { - "idx": i, - "im_file": im_files[i], - "count": len(sim_idx), - "sim_im_files": sim_idx["im_file"].tolist(), - } - ] - - sim_table.add(_yield_sim_idx()) - self.sim_index = sim_table - return sim_table.to_pandas() - - def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image: - """ - Plot the similarity index of all the images in the table. Here, the index will contain the data points that are - max_dist or closer to the image in the embedding space at a given index. - - Args: - max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2. - top_k (float): Percentage of closest data points to consider when counting. Used to apply limit when - running vector search. Defaults to 0.01. - force (bool): Whether to overwrite the existing similarity index or not. Defaults to True. - - Returns: - (PIL.Image): Image containing the plot. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - - similarity_idx_plot = exp.plot_similarity_index() - similarity_idx_plot.show() # view image preview - similarity_idx_plot.save("path/to/save/similarity_index_plot.png") # save contents to file - ``` - """ - sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force) - sim_count = sim_idx["count"].tolist() - sim_count = np.array(sim_count) - - indices = np.arange(len(sim_count)) - - # Create the bar plot - plt.bar(indices, sim_count) - - # Customize the plot (optional) - plt.xlabel("data idx") - plt.ylabel("Count") - plt.title("Similarity Count") - buffer = BytesIO() - plt.savefig(buffer, format="png") - buffer.seek(0) - - # Use Pillow to open the image from the buffer - return Image.fromarray(np.array(Image.open(buffer))) - - def _check_imgs_or_idxs( - self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]] - ) -> List[np.ndarray]: - """Determines whether to fetch images or indexes based on provided arguments and returns image paths.""" - if img is None and idx is None: - raise ValueError("Either img or idx must be provided.") - if img is not None and idx is not None: - raise ValueError("Only one of img or idx must be provided.") - if idx is not None: - idx = idx if isinstance(idx, list) else [idx] - img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"] - - return img if isinstance(img, list) else [img] - - def ask_ai(self, query): - """ - Ask AI a question. - - Args: - query (str): Question to ask. - - Returns: - (pandas.DataFrame): A dataframe containing filtered results to the SQL query. - - Example: - ```python - exp = Explorer() - exp.create_embeddings_table() - answer = exp.ask_ai("Show images with 1 person and 2 dogs") - ``` - """ - result = prompt_sql_query(query) - try: - return self.sql_query(result) - except Exception as e: - LOGGER.error("AI generated query is not valid. Please try again with a different prompt") - LOGGER.error(e) - return None diff --git a/ultralytics/data/explorer/gui/__init__.py b/ultralytics/data/explorer/gui/__init__.py deleted file mode 100644 index 9e68dc12245..00000000000 --- a/ultralytics/data/explorer/gui/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license diff --git a/ultralytics/data/explorer/gui/dash.py b/ultralytics/data/explorer/gui/dash.py deleted file mode 100644 index 81f1f62a8a4..00000000000 --- a/ultralytics/data/explorer/gui/dash.py +++ /dev/null @@ -1,282 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -import sys -import time -from threading import Thread - -from ultralytics import Explorer -from ultralytics.utils import ROOT, SETTINGS -from ultralytics.utils.checks import check_requirements - -check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3")) - -import streamlit as st -from streamlit_select import image_select - - -def _get_explorer(): - """Initializes and returns an instance of the Explorer class.""" - exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model")) - thread = Thread( - target=exp.create_embeddings_table, - kwargs={"force": st.session_state.get("force_recreate_embeddings"), "split": st.session_state.get("split")}, - ) - thread.start() - progress_bar = st.progress(0, text="Creating embeddings table...") - while exp.progress < 1: - time.sleep(0.1) - progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%") - thread.join() - st.session_state["explorer"] = exp - progress_bar.empty() - - -def init_explorer_form(data=None, model=None): - """Initializes an Explorer instance and creates embeddings table with progress tracking.""" - if data is None: - datasets = ROOT / "cfg" / "datasets" - ds = [d.name for d in datasets.glob("*.yaml")] - else: - ds = [data] - - if model is None: - models = [ - "yolov8n.pt", - "yolov8s.pt", - "yolov8m.pt", - "yolov8l.pt", - "yolov8x.pt", - "yolov8n-seg.pt", - "yolov8s-seg.pt", - "yolov8m-seg.pt", - "yolov8l-seg.pt", - "yolov8x-seg.pt", - "yolov8n-pose.pt", - "yolov8s-pose.pt", - "yolov8m-pose.pt", - "yolov8l-pose.pt", - "yolov8x-pose.pt", - ] - else: - models = [model] - - splits = ["train", "val", "test"] - - with st.form(key="explorer_init_form"): - col1, col2, col3 = st.columns(3) - with col1: - st.selectbox("Select dataset", ds, key="dataset") - with col2: - st.selectbox("Select model", models, key="model") - with col3: - st.selectbox("Select split", splits, key="split") - st.checkbox("Force recreate embeddings", key="force_recreate_embeddings") - - st.form_submit_button("Explore", on_click=_get_explorer) - - -def query_form(): - """Sets up a form in Streamlit to initialize Explorer with dataset and model selection.""" - with st.form("query_form"): - col1, col2 = st.columns([0.8, 0.2]) - with col1: - st.text_input( - "Query", - "WHERE labels LIKE '%person%' AND labels LIKE '%dog%'", - label_visibility="collapsed", - key="query", - ) - with col2: - st.form_submit_button("Query", on_click=run_sql_query) - - -def ai_query_form(): - """Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection.""" - with st.form("ai_query_form"): - col1, col2 = st.columns([0.8, 0.2]) - with col1: - st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query") - with col2: - st.form_submit_button("Ask AI", on_click=run_ai_query) - - -def find_similar_imgs(imgs): - """Initializes a Streamlit form for AI-based image querying with custom input.""" - exp = st.session_state["explorer"] - similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow") - paths = similar.to_pydict()["im_file"] - st.session_state["imgs"] = paths - st.session_state["res"] = similar - - -def similarity_form(selected_imgs): - """Initializes a form for AI-based image querying with custom input in Streamlit.""" - st.write("Similarity Search") - with st.form("similarity_form"): - subcol1, subcol2 = st.columns([1, 1]) - with subcol1: - st.number_input( - "limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit" - ) - - with subcol2: - disabled = not len(selected_imgs) - st.write("Selected: ", len(selected_imgs)) - st.form_submit_button( - "Search", - disabled=disabled, - on_click=find_similar_imgs, - args=(selected_imgs,), - ) - if disabled: - st.error("Select at least one image to search.") - - -# def persist_reset_form(): -# with st.form("persist_reset"): -# col1, col2 = st.columns([1, 1]) -# with col1: -# st.form_submit_button("Reset", on_click=reset) -# -# with col2: -# st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True)) - - -def run_sql_query(): - """Executes an SQL query and returns the results.""" - st.session_state["error"] = None - query = st.session_state.get("query") - if query.rstrip().lstrip(): - exp = st.session_state["explorer"] - res = exp.sql_query(query, return_type="arrow") - st.session_state["imgs"] = res.to_pydict()["im_file"] - st.session_state["res"] = res - - -def run_ai_query(): - """Execute SQL query and update session state with query results.""" - if not SETTINGS["openai_api_key"]: - st.session_state["error"] = ( - 'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."' - ) - return - import pandas # scope for faster 'import ultralytics' - - st.session_state["error"] = None - query = st.session_state.get("ai_query") - if query.rstrip().lstrip(): - exp = st.session_state["explorer"] - res = exp.ask_ai(query) - if not isinstance(res, pandas.DataFrame) or res.empty: - st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it." - return - st.session_state["imgs"] = res["im_file"].to_list() - st.session_state["res"] = res - - -def reset_explorer(): - """Resets the explorer to its initial state by clearing session variables.""" - st.session_state["explorer"] = None - st.session_state["imgs"] = None - st.session_state["error"] = None - - -def utralytics_explorer_docs_callback(): - """Resets the explorer to its initial state by clearing session variables.""" - with st.container(border=True): - st.image( - "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg", - width=100, - ) - st.markdown( - "

This demo is built using Ultralytics Explorer API. Visit API docs to try examples & learn more

", - unsafe_allow_html=True, - help=None, - ) - st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/") - - -def layout(data=None, model=None): - """Resets explorer session variables and provides documentation with a link to API docs.""" - st.set_page_config(layout="wide", initial_sidebar_state="collapsed") - st.markdown("

Ultralytics Explorer Demo

", unsafe_allow_html=True) - - if st.session_state.get("explorer") is None: - init_explorer_form(data, model) - return - - st.button(":arrow_backward: Select Dataset", on_click=reset_explorer) - exp = st.session_state.get("explorer") - col1, col2 = st.columns([0.75, 0.25], gap="small") - imgs = [] - if st.session_state.get("error"): - st.error(st.session_state["error"]) - elif st.session_state.get("imgs"): - imgs = st.session_state.get("imgs") - else: - imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"] - st.session_state["res"] = exp.table.to_arrow() - total_imgs, selected_imgs = len(imgs), [] - with col1: - subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5) - with subcol1: - st.write("Max Images Displayed:") - with subcol2: - num = st.number_input( - "Max Images Displayed", - min_value=0, - max_value=total_imgs, - value=min(500, total_imgs), - key="num_imgs_displayed", - label_visibility="collapsed", - ) - with subcol3: - st.write("Start Index:") - with subcol4: - start_idx = st.number_input( - "Start Index", - min_value=0, - max_value=total_imgs, - value=0, - key="start_index", - label_visibility="collapsed", - ) - with subcol5: - reset = st.button("Reset", use_container_width=False, key="reset") - if reset: - st.session_state["imgs"] = None - st.experimental_rerun() - - query_form() - ai_query_form() - if total_imgs: - labels, boxes, masks, kpts, classes = None, None, None, None, None - task = exp.model.task - if st.session_state.get("display_labels"): - labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num] - boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num] - masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num] - kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num] - classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num] - imgs_displayed = imgs[start_idx : start_idx + num] - selected_imgs = image_select( - f"Total samples: {total_imgs}", - images=imgs_displayed, - use_container_width=False, - # indices=[i for i in range(num)] if select_all else None, - labels=labels, - classes=classes, - bboxes=boxes, - masks=masks if task == "segment" else None, - kpts=kpts if task == "pose" else None, - ) - - with col2: - similarity_form(selected_imgs) - st.checkbox("Labels", value=False, key="display_labels") - utralytics_explorer_docs_callback() - - -if __name__ == "__main__": - kwargs = dict(zip(sys.argv[1::2], sys.argv[2::2])) - layout(**kwargs) diff --git a/ultralytics/data/explorer/utils.py b/ultralytics/data/explorer/utils.py deleted file mode 100644 index 76f25572759..00000000000 --- a/ultralytics/data/explorer/utils.py +++ /dev/null @@ -1,167 +0,0 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -import getpass -from typing import List - -import cv2 -import numpy as np - -from ultralytics.data.augment import LetterBox -from ultralytics.utils import LOGGER as logger -from ultralytics.utils import SETTINGS -from ultralytics.utils.checks import check_requirements -from ultralytics.utils.ops import xyxy2xywh -from ultralytics.utils.plotting import plot_images - - -def get_table_schema(vector_size): - """Extracts and returns the schema of a database table.""" - from lancedb.pydantic import LanceModel, Vector - - class Schema(LanceModel): - im_file: str - labels: List[str] - cls: List[int] - bboxes: List[List[float]] - masks: List[List[List[int]]] - keypoints: List[List[List[float]]] - vector: Vector(vector_size) - - return Schema - - -def get_sim_index_schema(): - """Returns a LanceModel schema for a database table with specified vector size.""" - from lancedb.pydantic import LanceModel - - class Schema(LanceModel): - idx: int - im_file: str - count: int - sim_im_files: List[str] - - return Schema - - -def sanitize_batch(batch, dataset_info): - """Sanitizes input batch for inference, ensuring correct format and dimensions.""" - batch["cls"] = batch["cls"].flatten().int().tolist() - box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1]) - batch["bboxes"] = [box for box, _ in box_cls_pair] - batch["cls"] = [cls for _, cls in box_cls_pair] - batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]] - batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]] - batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]] - return batch - - -def plot_query_result(similar_set, plot_labels=True): - """ - Plot images from the similar set. - - Args: - similar_set (list): Pyarrow or pandas object containing the similar data points - plot_labels (bool): Whether to plot labels or not - """ - import pandas # scope for faster 'import ultralytics' - - similar_set = ( - similar_set.to_dict(orient="list") if isinstance(similar_set, pandas.DataFrame) else similar_set.to_pydict() - ) - empty_masks = [[[]]] - empty_boxes = [[]] - images = similar_set.get("im_file", []) - bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else [] - masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else [] - kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else [] - cls = similar_set.get("cls", []) - - plot_size = 640 - imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], [] - for i, imf in enumerate(images): - im = cv2.imread(imf) - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - h, w = im.shape[:2] - r = min(plot_size / h, plot_size / w) - imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1)) - if plot_labels: - if len(bboxes) > i and len(bboxes[i]) > 0: - box = np.array(bboxes[i], dtype=np.float32) - box[:, [0, 2]] *= r - box[:, [1, 3]] *= r - plot_boxes.append(box) - if len(masks) > i and len(masks[i]) > 0: - mask = np.array(masks[i], dtype=np.uint8)[0] - plot_masks.append(LetterBox(plot_size, center=False)(image=mask)) - if len(kpts) > i and kpts[i] is not None: - kpt = np.array(kpts[i], dtype=np.float32) - kpt[:, :, :2] *= r - plot_kpts.append(kpt) - batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i) - imgs = np.stack(imgs, axis=0) - masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8) - kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32) - boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32) - batch_idx = np.concatenate(batch_idx, axis=0) - cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0) - - return plot_images( - imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False - ) - - -def prompt_sql_query(query): - """Plots images with optional labels from a similar data set.""" - check_requirements("openai>=1.6.1") - from openai import OpenAI - - if not SETTINGS["openai_api_key"]: - logger.warning("OpenAI API key not found in settings. Please enter your API key below.") - openai_api_key = getpass.getpass("OpenAI API key: ") - SETTINGS.update({"openai_api_key": openai_api_key}) - openai = OpenAI(api_key=SETTINGS["openai_api_key"]) - - messages = [ - { - "role": "system", - "content": """ - You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on - the following schema and a user request. You only need to output the format with fixed selection - statement that selects everything from "'table'", like `SELECT * from 'table'` - - Schema: - im_file: string not null - labels: list not null - child 0, item: string - cls: list not null - child 0, item: int64 - bboxes: list> not null - child 0, item: list - child 0, item: double - masks: list>> not null - child 0, item: list> - child 0, item: list - child 0, item: int64 - keypoints: list>> not null - child 0, item: list> - child 0, item: list - child 0, item: double - vector: fixed_size_list[256] not null - child 0, item: float - - Some details about the schema: - - the "labels" column contains the string values like 'person' and 'dog' for the respective objects - in each image - - the "cls" column contains the integer values on these classes that map them the labels - - Example of a correct query: - request - Get all data points that contain 2 or more people and at least one dog - correct query- - SELECT * FROM 'table' WHERE ARRAY_LENGTH(cls) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1; - """, - }, - {"role": "user", "content": f"{query}"}, - ] - - response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages) - return response.choices[0].message.content diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py index e91f2082c5e..3a04bb0383d 100644 --- a/ultralytics/data/loaders.py +++ b/ultralytics/data/loaders.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import glob import math @@ -18,11 +18,29 @@ from ultralytics.data.utils import FORMATS_HELP_MSG, IMG_FORMATS, VID_FORMATS from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, ops from ultralytics.utils.checks import check_requirements +from ultralytics.utils.patches import imread @dataclass class SourceTypes: - """Class to represent various types of input sources for predictions.""" + """ + Class to represent various types of input sources for predictions. + + This class uses dataclass to define boolean flags for different types of input sources that can be used for + making predictions with YOLO models. + + Attributes: + stream (bool): Flag indicating if the input source is a video stream. + screenshot (bool): Flag indicating if the input source is a screenshot. + from_img (bool): Flag indicating if the input source is an image file. + + Examples: + >>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False) + >>> print(source_types.stream) + True + >>> print(source_types.from_img) + False + """ stream: bool = False screenshot: bool = False @@ -32,38 +50,47 @@ class SourceTypes: class LoadStreams: """ - Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams. + Stream Loader for various types of video streams. + + Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video + streams simultaneously, making it suitable for real-time video analysis tasks. Attributes: - sources (str): The source input paths or URLs for the video streams. - vid_stride (int): Video frame-rate stride, defaults to 1. - buffer (bool): Whether to buffer input streams, defaults to False. + sources (List[str]): The source input paths or URLs for the video streams. + vid_stride (int): Video frame-rate stride. + buffer (bool): Whether to buffer input streams. running (bool): Flag to indicate if the streaming thread is running. mode (str): Set to 'stream' indicating real-time capture. - imgs (list): List of image frames for each stream. - fps (list): List of FPS for each stream. - frames (list): List of total frames for each stream. - threads (list): List of threads for each stream. - shape (list): List of shapes for each stream. - caps (list): List of cv2.VideoCapture objects for each stream. + imgs (List[List[np.ndarray]]): List of image frames for each stream. + fps (List[float]): List of FPS for each stream. + frames (List[int]): List of total frames for each stream. + threads (List[Thread]): List of threads for each stream. + shape (List[Tuple[int, int, int]]): List of shapes for each stream. + caps (List[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream. bs (int): Batch size for processing. Methods: - __init__: Initialize the stream loader. update: Read stream frames in daemon thread. close: Close stream loader and release resources. __iter__: Returns an iterator object for the class. __next__: Returns source paths, transformed, and original images for processing. __len__: Return the length of the sources object. - Example: - ```bash - yolo predict source='rtsp://example.com/media.mp4' - ``` + Examples: + >>> stream_loader = LoadStreams("rtsp://example.com/stream1.mp4") + >>> for sources, imgs, _ in stream_loader: + ... # Process the images + ... pass + >>> stream_loader.close() + + Notes: + - The class uses threading to efficiently load frames from multiple streams simultaneously. + - It automatically handles YouTube links, converting them to the best available stream URL. + - The class implements a buffer system to manage frame storage and retrieval. """ def __init__(self, sources="file.streams", vid_stride=1, buffer=False): - """Initialize instance variables and check for consistent input stream shapes.""" + """Initialize stream loader for multiple video sources, supporting various stream types.""" torch.backends.cudnn.benchmark = True # faster for fixed-size inference self.buffer = buffer # buffer input streams self.running = True # running flag for Thread @@ -114,7 +141,7 @@ def __init__(self, sources="file.streams", vid_stride=1, buffer=False): LOGGER.info("") # newline def update(self, i, cap, stream): - """Read stream `i` frames in daemon thread.""" + """Read stream frames in daemon thread and update image buffer.""" n, f = 0, self.frames[i] # frame number, frame array while self.running and cap.isOpened() and n < (f - 1): if len(self.imgs[i]) < 30: # keep a <=30-image buffer @@ -134,7 +161,7 @@ def update(self, i, cap, stream): time.sleep(0.01) # wait until the buffer is empty def close(self): - """Close stream loader and release resources.""" + """Terminates stream loader, stops threads, and releases video capture resources.""" self.running = False # stop flag for Thread for thread in self.threads: if thread.is_alive(): @@ -152,7 +179,7 @@ def __iter__(self): return self def __next__(self): - """Returns source paths, transformed and original images for processing.""" + """Returns the next batch of frames from multiple video streams for processing.""" self.count += 1 images = [] @@ -179,16 +206,16 @@ def __next__(self): return self.sources, images, [""] * self.bs def __len__(self): - """Return the length of the sources object.""" + """Return the number of video streams in the LoadStreams object.""" return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years class LoadScreenshots: """ - YOLOv8 screenshot dataloader. + Ultralytics screenshot dataloader for capturing and processing screen images. - This class manages the loading of screenshot images for processing with YOLOv8. - Suitable for use with `yolo predict source=screen`. + This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with + `yolo predict source=screen`. Attributes: source (str): The source input indicating which screen to capture. @@ -201,15 +228,21 @@ class LoadScreenshots: frame (int): Counter for captured frames. sct (mss.mss): Screen capture object from `mss` library. bs (int): Batch size, set to 1. - monitor (dict): Monitor configuration details. + fps (int): Frames per second, set to 30. + monitor (Dict[str, int]): Monitor configuration details. Methods: __iter__: Returns an iterator object. __next__: Captures the next screenshot and returns it. + + Examples: + >>> loader = LoadScreenshots("0 100 100 640 480") # screen 0, top-left (100,100), 640x480 + >>> for source, im, im0s, vid_cap, s in loader: + ... print(f"Captured frame: {im.shape}") """ def __init__(self, source): - """Source = [screen_number left top width height] (pixels).""" + """Initialize screenshot capture with specified screen and region parameters.""" check_requirements("mss") import mss # noqa @@ -236,11 +269,11 @@ def __init__(self, source): self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} def __iter__(self): - """Returns an iterator of the object.""" + """Yields the next screenshot image from the specified screen or region for processing.""" return self def __next__(self): - """Screen capture with 'mss' to get raw pixels from the screen as np array.""" + """Captures and returns the next screenshot as a numpy array using the mss library.""" im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " @@ -250,29 +283,45 @@ def __next__(self): class LoadImagesAndVideos: """ - YOLOv8 image/video dataloader. + A class for loading and processing images and videos for YOLO object detection. - This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from - various formats, including single image files, video files, and lists of image and video paths. + This class manages the loading and pre-processing of image and video data from various sources, including + single image files, video files, and lists of image and video paths. Attributes: - files (list): List of image and video file paths. + files (List[str]): List of image and video file paths. nf (int): Total number of files (images and videos). - video_flag (list): Flags indicating whether a file is a video (True) or an image (False). + video_flag (List[bool]): Flags indicating whether a file is a video (True) or an image (False). mode (str): Current mode, 'image' or 'video'. - vid_stride (int): Stride for video frame-rate, defaults to 1. - bs (int): Batch size, set to 1 for this class. + vid_stride (int): Stride for video frame-rate. + bs (int): Batch size. cap (cv2.VideoCapture): Video capture object for OpenCV. frame (int): Frame counter for video. frames (int): Total number of frames in the video. - count (int): Counter for iteration, initialized at 0 during `__iter__()`. + count (int): Counter for iteration, initialized at 0 during __iter__(). + ni (int): Number of images. Methods: - _new_video(path): Create a new cv2.VideoCapture object for a given video path. + __init__: Initialize the LoadImagesAndVideos object. + __iter__: Returns an iterator object for VideoStream or ImageFolder. + __next__: Returns the next batch of images or video frames along with their paths and metadata. + _new_video: Creates a new video capture object for the given path. + __len__: Returns the number of batches in the object. + + Examples: + >>> loader = LoadImagesAndVideos("path/to/data", batch=32, vid_stride=1) + >>> for paths, imgs, info in loader: + ... # Process batch of images or video frames + ... pass + + Notes: + - Supports various image formats including HEIC. + - Handles both local files and directories. + - Can read from a text file containing paths to images and videos. """ def __init__(self, path, batch=1, vid_stride=1): - """Initialize the Dataloader and raise FileNotFoundError if file not found.""" + """Initialize dataloader for images and videos, supporting various input formats.""" parent = None if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line parent = Path(path).parent @@ -305,7 +354,7 @@ def __init__(self, path, batch=1, vid_stride=1): self.nf = ni + nv # number of files self.ni = ni # number of images self.video_flag = [False] * ni + [True] * nv - self.mode = "image" + self.mode = "video" if ni == 0 else "image" # default to video if no images self.vid_stride = vid_stride # video frame-rate stride self.bs = batch if any(videos): @@ -316,12 +365,12 @@ def __init__(self, path, batch=1, vid_stride=1): raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}") def __iter__(self): - """Returns an iterator object for VideoStream or ImageFolder.""" + """Iterates through image/video files, yielding source paths, images, and metadata.""" self.count = 0 return self def __next__(self): - """Returns the next batch of images or video frames along with their paths and metadata.""" + """Returns the next batch of images or video frames with their paths and metadata.""" paths, imgs, info = [], [], [] while len(imgs) < self.bs: if self.count >= self.nf: # end of file list @@ -336,6 +385,7 @@ def __next__(self): if not self.cap or not self.cap.isOpened(): self._new_video(path) + success = False for _ in range(self.vid_stride): success = self.cap.grab() if not success: @@ -359,8 +409,19 @@ def __next__(self): if self.count < self.nf: self._new_video(self.files[self.count]) else: + # Handle image files (including HEIC) self.mode = "image" - im0 = cv2.imread(path) # BGR + if path.split(".")[-1].lower() == "heic": + # Load HEIC image using Pillow with pillow-heif + check_requirements("pillow-heif") + + from pillow_heif import register_heif_opener + + register_heif_opener() # Register HEIF opener with Pillow + with Image.open(path) as img: + im0 = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # convert image to BGR nparray + else: + im0 = imread(path) # BGR if im0 is None: LOGGER.warning(f"WARNING โš ๏ธ Image Read Error {path}") else: @@ -374,7 +435,7 @@ def __next__(self): return paths, imgs, info def _new_video(self, path): - """Creates a new video capture object for the given path.""" + """Creates a new video capture object for the given path and initializes video-related attributes.""" self.frame = 0 self.cap = cv2.VideoCapture(path) self.fps = int(self.cap.get(cv2.CAP_PROP_FPS)) @@ -383,40 +444,50 @@ def _new_video(self, path): self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) def __len__(self): - """Returns the number of batches in the object.""" - return math.ceil(self.nf / self.bs) # number of files + """Returns the number of files (images and videos) in the dataset.""" + return math.ceil(self.nf / self.bs) # number of batches class LoadPilAndNumpy: """ Load images from PIL and Numpy arrays for batch processing. - This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats. - It performs basic validation and format conversion to ensure that the images are in the required format for - downstream processing. + This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic + validation and format conversion to ensure that the images are in the required format for downstream processing. Attributes: - paths (list): List of image paths or autogenerated filenames. - im0 (list): List of images stored as Numpy arrays. - mode (str): Type of data being processed, defaults to 'image'. + paths (List[str]): List of image paths or autogenerated filenames. + im0 (List[np.ndarray]): List of images stored as Numpy arrays. + mode (str): Type of data being processed, set to 'image'. bs (int): Batch size, equivalent to the length of `im0`. Methods: - _single_check(im): Validate and format a single image to a Numpy array. + _single_check: Validate and format a single image to a Numpy array. + + Examples: + >>> from PIL import Image + >>> import numpy as np + >>> pil_img = Image.new("RGB", (100, 100)) + >>> np_img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + >>> loader = LoadPilAndNumpy([pil_img, np_img]) + >>> paths, images, _ = next(iter(loader)) + >>> print(f"Loaded {len(images)} images") + Loaded 2 images """ def __init__(self, im0): - """Initialize PIL and Numpy Dataloader.""" + """Initializes a loader for PIL and Numpy images, converting inputs to a standardized format.""" if not isinstance(im0, list): im0 = [im0] - self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)] + # use `image{i}.jpg` when Image.filename returns an empty path. + self.paths = [getattr(im, "filename", "") or f"image{i}.jpg" for i, im in enumerate(im0)] self.im0 = [self._single_check(im) for im in im0] self.mode = "image" self.bs = len(self.im0) @staticmethod def _single_check(im): - """Validate and format an image to numpy array.""" + """Validate and format an image to numpy array, ensuring RGB order and contiguous memory.""" assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}" if isinstance(im, Image.Image): if im.mode != "RGB": @@ -426,41 +497,48 @@ def _single_check(im): return im def __len__(self): - """Returns the length of the 'im0' attribute.""" + """Returns the length of the 'im0' attribute, representing the number of loaded images.""" return len(self.im0) def __next__(self): - """Returns batch paths, images, processed images, None, ''.""" + """Returns the next batch of images, paths, and metadata for processing.""" if self.count == 1: # loop only once as it's batch inference raise StopIteration self.count += 1 return self.paths, self.im0, [""] * self.bs def __iter__(self): - """Enables iteration for class LoadPilAndNumpy.""" + """Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing.""" self.count = 0 return self class LoadTensor: """ - Load images from torch.Tensor data. + A class for loading and processing tensor data for object detection tasks. - This class manages the loading and pre-processing of image data from PyTorch tensors for further processing. + This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for + further processing in object detection pipelines. Attributes: - im0 (torch.Tensor): The input tensor containing the image(s). + im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W). bs (int): Batch size, inferred from the shape of `im0`. - mode (str): Current mode, set to 'image'. - paths (list): List of image paths or filenames. - count (int): Counter for iteration, initialized at 0 during `__iter__()`. + mode (str): Current processing mode, set to 'image'. + paths (List[str]): List of image paths or auto-generated filenames. Methods: - _single_check(im, stride): Validate and possibly modify the input tensor. + _single_check: Validates and formats an input tensor. + + Examples: + >>> import torch + >>> tensor = torch.rand(1, 3, 640, 640) + >>> loader = LoadTensor(tensor) + >>> paths, images, info = next(iter(loader)) + >>> print(f"Processed {len(images)} images") """ def __init__(self, im0) -> None: - """Initialize Tensor Dataloader.""" + """Initialize LoadTensor object for processing torch.Tensor image data.""" self.im0 = self._single_check(im0) self.bs = self.im0.shape[0] self.mode = "image" @@ -468,7 +546,7 @@ def __init__(self, im0) -> None: @staticmethod def _single_check(im, stride=32): - """Validate and format an image to torch.Tensor.""" + """Validates and formats a single image tensor, ensuring correct shape and normalization.""" s = ( f"WARNING โš ๏ธ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) " f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible." @@ -490,24 +568,24 @@ def _single_check(im, stride=32): return im def __iter__(self): - """Returns an iterator object.""" + """Yields an iterator object for iterating through tensor image data.""" self.count = 0 return self def __next__(self): - """Return next item in the iterator.""" + """Yields the next batch of tensor images and metadata for processing.""" if self.count == 1: raise StopIteration self.count += 1 return self.paths, self.im0, [""] * self.bs def __len__(self): - """Returns the batch size.""" + """Returns the batch size of the tensor input.""" return self.bs def autocast_list(source): - """Merges a list of source of different types into a list of numpy arrays or PIL images.""" + """Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction.""" files = [] for im in source: if isinstance(im, (str, Path)): # filename or uri @@ -527,21 +605,24 @@ def get_best_youtube_url(url, method="pytube"): """ Retrieves the URL of the best quality MP4 video stream from a given YouTube video. - This function uses the specified method to extract the video info from YouTube. It supports the following methods: - - "pytube": Uses the pytube library to fetch the video streams. - - "pafy": Uses the pafy library to fetch the video streams. - - "yt-dlp": Uses the yt-dlp library to fetch the video streams. - - The function then finds the highest quality MP4 format that has a video codec but no audio codec, and returns the - URL of this video stream. - Args: url (str): The URL of the YouTube video. - method (str): The method to use for extracting video info. Default is "pytube". Other options are "pafy" and - "yt-dlp". + method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp". + Defaults to "pytube". Returns: - (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found. + (str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found. + + Examples: + >>> url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + >>> best_url = get_best_youtube_url(url) + >>> print(best_url) + https://rr4---sn-q4flrnek.googlevideo.com/videoplayback?expire=... + + Notes: + - Requires additional libraries based on the chosen method: pytubefix, pafy, or yt-dlp. + - The function prioritizes streams with at least 1080p resolution when available. + - For the "yt-dlp" method, it looks for formats with video codec, no audio, and *.mp4 extension. """ if method == "pytube": # Switched from pytube to pytubefix to resolve https://github.com/pytube/pytube/issues/1954 diff --git a/ultralytics/data/scripts/download_weights.sh b/ultralytics/data/scripts/download_weights.sh index 87db31fe1e6..f8a739f6d61 100755 --- a/ultralytics/data/scripts/download_weights.sh +++ b/ultralytics/data/scripts/download_weights.sh @@ -11,8 +11,8 @@ python - <>> label_map = {0: "cat", 1: "dog", 2: "bird"} # It should include all annotated classes details + >>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map) + """ + import matplotlib.pyplot as plt + + from ultralytics.utils.plotting import colors + + img = np.array(Image.open(image_path)) + img_height, img_width = img.shape[:2] + annotations = [] + with open(txt_path) as file: + for line in file: + class_id, x_center, y_center, width, height = map(float, line.split()) + x = (x_center - width / 2) * img_width + y = (y_center - height / 2) * img_height + w = width * img_width + h = height * img_height + annotations.append((x, y, w, h, int(class_id))) + fig, ax = plt.subplots(1) # Plot the image and annotations + for x, y, w, h, label in annotations: + color = tuple(c / 255 for c in colors(label, True)) # Get and normalize the RGB color + rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color, facecolor="none") # Create a rectangle + ax.add_patch(rect) + luminance = 0.2126 * color[0] + 0.7152 * color[1] + 0.0722 * color[2] # Formula for luminance + ax.text(x, y - 5, label_map[label], color="white" if luminance < 0.5 else "black", backgroundcolor=color) + ax.imshow(img) + plt.show() + + def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1): """ Convert a list of polygons to a binary mask of the specified image size. @@ -216,7 +264,7 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): ms = [] for si in range(len(segments)): mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1) - ms.append(mask) + ms.append(mask.astype(masks.dtype)) areas.append(mask.sum()) areas = np.asarray(areas) index = np.argsort(-areas) @@ -401,7 +449,7 @@ def check_cls_dataset(dataset, split=""): # Print to console for k, v in {"train": train_set, "val": val_set, "test": test_set}.items(): - prefix = f'{colorstr(f"{k}:")} {v}...' + prefix = f"{colorstr(f'{k}:')} {v}..." if v is None: LOGGER.info(prefix) else: @@ -535,12 +583,12 @@ def __init__(self, path="coco8.yaml", task="detect", autodownload=False): path = Path(path).resolve() LOGGER.info(f"Starting HUB dataset checks for {path}....") - self.task = task # detect, segment, pose, classify + self.task = task # detect, segment, pose, classify, obb if self.task == "classify": unzip_dir = unzip_file(path) data = check_cls_dataset(unzip_dir) data["path"] = unzip_dir - else: # detect, segment, pose + else: # detect, segment, pose, obb _, data_dir, yaml_path = self._unzip(Path(path)) try: # Load YAML with checks @@ -552,7 +600,7 @@ def __init__(self, path="coco8.yaml", task="detect", autodownload=False): except Exception as e: raise Exception("error/HUB/dataset_stats/init") from e - self.hub_dir = Path(f'{data["path"]}-hub') + self.hub_dir = Path(f"{data['path']}-hub") self.im_dir = self.hub_dir / "images" self.stats = {"nc": len(data["names"]), "names": list(data["names"].values())} # statistics dictionary self.data = data @@ -564,7 +612,7 @@ def _unzip(path): return False, None, path unzip_dir = unzip_file(path, path=path.parent) assert unzip_dir.is_dir(), ( - f"Error unzipping {path}, {unzip_dir} not found. " f"path/to/abc.zip MUST unzip to path/to/abc/" + f"Error unzipping {path}, {unzip_dir} not found. path/to/abc.zip MUST unzip to path/to/abc/" ) return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path @@ -602,7 +650,7 @@ def _round(labels): # Get dataset statistics if self.task == "classify": - from torchvision.datasets import ImageFolder + from torchvision.datasets import ImageFolder # scope for faster 'import ultralytics' dataset = ImageFolder(self.data[split]) diff --git a/ultralytics/engine/__init__.py b/ultralytics/engine/__init__.py index 9e68dc12245..77a19dcf0f8 100644 --- a/ultralytics/engine/__init__.py +++ b/ultralytics/engine/__init__.py @@ -1 +1 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 313cb7d076b..75c5d3d1337 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -1,52 +1,57 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ -Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit. +Export a YOLO PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit. Format | `format=argument` | Model --- | --- | --- -PyTorch | - | yolov8n.pt -TorchScript | `torchscript` | yolov8n.torchscript -ONNX | `onnx` | yolov8n.onnx -OpenVINO | `openvino` | yolov8n_openvino_model/ -TensorRT | `engine` | yolov8n.engine -CoreML | `coreml` | yolov8n.mlpackage -TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ -TensorFlow GraphDef | `pb` | yolov8n.pb -TensorFlow Lite | `tflite` | yolov8n.tflite -TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite -TensorFlow.js | `tfjs` | yolov8n_web_model/ -PaddlePaddle | `paddle` | yolov8n_paddle_model/ -NCNN | `ncnn` | yolov8n_ncnn_model/ +PyTorch | - | yolo11n.pt +TorchScript | `torchscript` | yolo11n.torchscript +ONNX | `onnx` | yolo11n.onnx +OpenVINO | `openvino` | yolo11n_openvino_model/ +TensorRT | `engine` | yolo11n.engine +CoreML | `coreml` | yolo11n.mlpackage +TensorFlow SavedModel | `saved_model` | yolo11n_saved_model/ +TensorFlow GraphDef | `pb` | yolo11n.pb +TensorFlow Lite | `tflite` | yolo11n.tflite +TensorFlow Edge TPU | `edgetpu` | yolo11n_edgetpu.tflite +TensorFlow.js | `tfjs` | yolo11n_web_model/ +PaddlePaddle | `paddle` | yolo11n_paddle_model/ +MNN | `mnn` | yolo11n.mnn +NCNN | `ncnn` | yolo11n_ncnn_model/ +IMX | `imx` | yolo11n_imx_model/ +RKNN | `rknn` | yolo11n_rknn_model/ Requirements: $ pip install "ultralytics[export]" Python: from ultralytics import YOLO - model = YOLO('yolov8n.pt') + model = YOLO('yolo11n.pt') results = model.export(format='onnx') CLI: - $ yolo mode=export model=yolov8n.pt format=onnx + $ yolo mode=export model=yolo11n.pt format=onnx Inference: - $ yolo predict model=yolov8n.pt # PyTorch - yolov8n.torchscript # TorchScript - yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True - yolov8n_openvino_model # OpenVINO - yolov8n.engine # TensorRT - yolov8n.mlpackage # CoreML (macOS-only) - yolov8n_saved_model # TensorFlow SavedModel - yolov8n.pb # TensorFlow GraphDef - yolov8n.tflite # TensorFlow Lite - yolov8n_edgetpu.tflite # TensorFlow Edge TPU - yolov8n_paddle_model # PaddlePaddle - yolov8n_ncnn_model # NCNN + $ yolo predict model=yolo11n.pt # PyTorch + yolo11n.torchscript # TorchScript + yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolo11n_openvino_model # OpenVINO + yolo11n.engine # TensorRT + yolo11n.mlpackage # CoreML (macOS-only) + yolo11n_saved_model # TensorFlow SavedModel + yolo11n.pb # TensorFlow GraphDef + yolo11n.tflite # TensorFlow Lite + yolo11n_edgetpu.tflite # TensorFlow Edge TPU + yolo11n_paddle_model # PaddlePaddle + yolo11n.mnn # MNN + yolo11n_ncnn_model # NCNN + yolo11n_imx_model # IMX TensorFlow.js: $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example $ npm install - $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model + $ ln -s ../../yolo11n_web_model public/yolo11n_web_model $ npm start """ @@ -72,15 +77,17 @@ from ultralytics.nn.autobackend import check_class_names, default_class_names from ultralytics.nn.modules import C2f, Detect, Pose, Segment, RTDETRDecoder from ultralytics.nn.modules import Regress6 -from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel +from ultralytics.nn.tasks import ClassificationModel, DetectionModel, SegmentationModel, WorldModel from ultralytics.utils import ( ARM64, DEFAULT_CFG, + IS_COLAB, IS_JETSON, LINUX, LOGGER, MACOS, PYTHON_VERSION, + RKNN_CHIPS, ROOT, WINDOWS, __version__, @@ -89,31 +96,64 @@ get_default_args, yaml_save, ) -from ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requirements, check_version +from ultralytics.utils.checks import ( + check_imgsz, + check_is_path_safe, + check_requirements, + check_version, + is_sudo_available, +) from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download from ultralytics.utils.files import file_size, spaces_in_path -from ultralytics.utils.ops import Profile -from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device, smart_inference_mode +from ultralytics.utils.ops import Profile, nms_rotated, xywh2xyxy +from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device def export_formats(): """Ultralytics YOLO export formats.""" x = [ - ["PyTorch", "-", ".pt", True, True], - ["TorchScript", "torchscript", ".torchscript", True, True], - ["ONNX", "onnx", ".onnx", True, True], - ["OpenVINO", "openvino", "_openvino_model", True, False], - ["TensorRT", "engine", ".engine", False, True], - ["CoreML", "coreml", ".mlpackage", True, False], - ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True], - ["TensorFlow GraphDef", "pb", ".pb", True, True], - ["TensorFlow Lite", "tflite", ".tflite", True, False], - ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False], - ["TensorFlow.js", "tfjs", "_web_model", True, False], - ["PaddlePaddle", "paddle", "_paddle_model", True, True], - ["NCNN", "ncnn", "_ncnn_model", True, True], + ["PyTorch", "-", ".pt", True, True, []], + ["TorchScript", "torchscript", ".torchscript", True, True, ["batch", "optimize", "nms"]], + ["ONNX", "onnx", ".onnx", True, True, ["batch", "dynamic", "half", "opset", "simplify", "nms"]], + ["OpenVINO", "openvino", "_openvino_model", True, False, ["batch", "dynamic", "half", "int8", "nms"]], + ["TensorRT", "engine", ".engine", False, True, ["batch", "dynamic", "half", "int8", "simplify", "nms"]], + ["CoreML", "coreml", ".mlpackage", True, False, ["batch", "half", "int8", "nms"]], + ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True, ["batch", "int8", "keras", "nms"]], + ["TensorFlow GraphDef", "pb", ".pb", True, True, ["batch"]], + ["TensorFlow Lite", "tflite", ".tflite", True, False, ["batch", "half", "int8", "nms"]], + ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False, []], + ["TensorFlow.js", "tfjs", "_web_model", True, False, ["batch", "half", "int8", "nms"]], + ["PaddlePaddle", "paddle", "_paddle_model", True, True, ["batch"]], + ["MNN", "mnn", ".mnn", True, True, ["batch", "half", "int8"]], + ["NCNN", "ncnn", "_ncnn_model", True, True, ["batch", "half"]], + ["IMX", "imx", "_imx_model", True, True, ["int8"]], + ["RKNN", "rknn", "_rknn_model", False, False, ["batch", "name"]], ] - return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x))) + return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU", "Arguments"], zip(*x))) + + +def validate_args(format, passed_args, valid_args): + """ + Validates arguments based on format. + + Args: + format (str): The export format. + passed_args (Namespace): The arguments used during export. + valid_args (dict): List of valid arguments for the format. + + Raises: + AssertionError: If an argument that's not supported by the export format is used, or if format doesn't have the supported arguments listed. + """ + # Only check valid usage of these args + export_args = ["half", "int8", "dynamic", "keras", "nms", "batch"] + + assert valid_args is not None, f"ERROR โŒ๏ธ valid arguments for '{format}' not listed." + custom = {"batch": 1, "data": None, "device": None} # exporter defaults + default_args = get_cfg(DEFAULT_CFG, custom) + for arg in export_args: + not_default = getattr(passed_args, arg, None) != getattr(default_args, arg, None) + if not_default: + assert arg in valid_args, f"ERROR โŒ๏ธ argument '{arg}' is not supported for format='{format}'" def gd_outputs(gd): @@ -126,7 +166,7 @@ def gd_outputs(gd): def try_export(inner_func): - """YOLOv8 export decorator, i.e. @try_export.""" + """YOLO export decorator, i.e. @try_export.""" inner_args = get_default_args(inner_func) def outer_func(*args, **kwargs): @@ -169,7 +209,6 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): self.callbacks = _callbacks or callbacks.get_default_callbacks() callbacks.add_integration_callbacks(self) - @smart_inference_mode() def __call__(self, model=None) -> str: """Returns list of exported files/dirs after running callbacks.""" self.run_callbacks("on_export_start") @@ -179,20 +218,43 @@ def __call__(self, model=None) -> str: fmt = "engine" if fmt in {"mlmodel", "mlpackage", "mlprogram", "apple", "ios", "coreml"}: # 'coreml' aliases fmt = "coreml" - fmts = tuple(export_formats()["Argument"][1:]) # available export formats + fmts_dict = export_formats() + fmts = tuple(fmts_dict["Argument"][1:]) # available export formats + if fmt not in fmts: + import difflib + + # Get the closest match if format is invalid + matches = difflib.get_close_matches(fmt, fmts, n=1, cutoff=0.6) # 60% similarity required to match + if not matches: + raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}") + LOGGER.warning(f"WARNING โš ๏ธ Invalid export format='{fmt}', updating to format='{matches[0]}'") + fmt = matches[0] flags = [x == fmt for x in fmts] if sum(flags) != 1: raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}") - jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn = flags # export booleans + (jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, mnn, ncnn, imx, rknn) = ( + flags # export booleans + ) + is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs)) # Device + dla = None if fmt == "engine" and self.args.device is None: LOGGER.warning("WARNING โš ๏ธ TensorRT requires GPU export, automatically assigning device=0") self.args.device = "0" + if fmt == "engine" and "dla" in str(self.args.device): # convert int/list to str first + dla = self.args.device.split(":")[-1] + self.args.device = "0" # update device to "0" + assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}." self.device = select_device("cpu" if self.args.device is None else self.args.device) - # Checks + # Argument compatibility checks + fmt_keys = fmts_dict["Arguments"][flags.index(True) + 1] + validate_args(fmt, self.args, fmt_keys) + if imx and not self.args.int8: + LOGGER.warning("WARNING โš ๏ธ IMX only supports int8 export, setting int8=True.") + self.args.int8 = True if not hasattr(model, "names"): model.names = default_class_names() model.names = check_class_names(model.names) @@ -209,6 +271,24 @@ def __call__(self, model=None) -> str: if self.args.optimize: assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False" assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'" + if rknn: + if not self.args.name: + LOGGER.warning( + "WARNING โš ๏ธ Rockchip RKNN export requires a missing 'name' arg for processor type. Using default name='rk3588'." + ) + self.args.name = "rk3588" + self.args.name = self.args.name.lower() + assert self.args.name in RKNN_CHIPS, ( + f"Invalid processor name '{self.args.name}' for Rockchip RKNN export. Valid names are {RKNN_CHIPS}." + ) + if self.args.int8 and tflite: + assert not getattr(model, "end2end", False), "TFLite INT8 export not supported for end2end models." + if self.args.nms: + assert not isinstance(model, ClassificationModel), "'nms=True' is not valid for classification models." + if getattr(model, "end2end", False): + LOGGER.warning("WARNING โš ๏ธ 'nms=True' is not available for end2end models. Forcing 'nms=False'.") + self.args.nms = False + self.args.conf = self.args.conf or 0.25 # set conf default value for nms export if edgetpu: if not LINUX: raise SystemError("Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler") @@ -230,12 +310,14 @@ def __call__(self, model=None) -> str: "(torchscript, onnx, openvino, engine, coreml) formats. " "See https://docs.ultralytics.com/models/yolo-world for details." ) + model.clip_model = None # openvino int8 export error: https://github.com/ultralytics/ultralytics/pull/18445 if self.args.int8 and not self.args.data: self.args.data = DEFAULT_CFG.data or TASK2DATA[getattr(model, "task", "detect")] # assign default data LOGGER.warning( "WARNING โš ๏ธ INT8 export requires a missing 'data' arg for calibration. " f"Using default 'data={self.args.data}'." ) + # Input im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device) file = Path( @@ -251,7 +333,14 @@ def __call__(self, model=None) -> str: model.eval() model.float() model = model.fuse() + + if imx: + from ultralytics.utils.torch_utils import FXModel + + model = FXModel(model) for m in model.modules(): + if isinstance(m, Classify): + m.export = True if isinstance(m, (Detect, RTDETRDecoder)): # includes all Detect subclasses like Segment, Pose, OBB m.dynamic = self.args.dynamic m.export = True @@ -270,9 +359,19 @@ def __call__(self, model=None) -> str: elif isinstance(m, Regress6): m.export = True + if isinstance(m, Detect) and imx: + from ultralytics.utils.tal import make_anchors + + m.anchors, m.strides = ( + x.transpose(0, 1) + for x in make_anchors( + torch.cat([s / m.stride.unsqueeze(-1) for s in self.imgsz], dim=1), m.stride, 0.5 + ) + ) + y = None - for _ in range(2): - y = model(im) # dry runs + for _ in range(2): # dry runs + y = NMSModel(model, self.args)(im) if self.args.nms and not coreml else model(im) if self.args.half and onnx and self.device.type != "cpu": im, model = im.half(), model.half() # to FP16 @@ -292,7 +391,7 @@ def __call__(self, model=None) -> str: ) self.pretty_name = Path(self.model.yaml.get("yaml_file", self.file)).stem.replace("yolo", "YOLO") data = model.args["data"] if hasattr(model, "args") and isinstance(model.args, dict) else "" - description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}' + description = f"Ultralytics {self.pretty_name} model {f'trained on {data}' if data else ''}" self.metadata = { "description": description, "author": "Ultralytics", @@ -305,6 +404,7 @@ def __call__(self, model=None) -> str: "batch": self.args.batch, "imgsz": self.imgsz, "names": model.names, + "args": {k: v for k, v in self.args if k in fmt_keys}, } # model metadata if model.task == "pose": self.metadata["kpt_shape"] = model.model[-1].kpt_shape @@ -314,7 +414,7 @@ def __call__(self, model=None) -> str: LOGGER.info( f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and " - f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)' + f"output shape(s) {self.output_shape} ({file_size(file):.1f} MB)" ) # Exports @@ -322,7 +422,7 @@ def __call__(self, model=None) -> str: if jit or ncnn: # TorchScript f[0], _ = self.export_torchscript() if engine: # TensorRT required before ONNX - f[1], _ = self.export_engine() + f[1], _ = self.export_engine(dla=dla) if onnx: # ONNX f[2], _ = self.export_onnx() if xml: # OpenVINO @@ -342,8 +442,14 @@ def __call__(self, model=None) -> str: f[9], _ = self.export_tfjs() if paddle: # PaddlePaddle f[10], _ = self.export_paddle() + if mnn: # MNN + f[11], _ = self.export_mnn() if ncnn: # NCNN - f[11], _ = self.export_ncnn() + f[12], _ = self.export_ncnn() + if imx: + f[13], _ = self.export_imx() + if rknn: + f[14], _ = self.export_rknn() # Finish f = [str(x) for x in f if x] # filter out '' and None @@ -360,11 +466,11 @@ def __call__(self, model=None) -> str: predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else "" q = "int8" if self.args.int8 else "half" if self.args.half else "" # quantization LOGGER.info( - f'\nExport complete ({time.time() - t:.1f}s)' + f"\nExport complete ({time.time() - t:.1f}s)" f"\nResults saved to {colorstr('bold', file.parent.resolve())}" - f'\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}' - f'\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}' - f'\nVisualize: https://netron.app' + f"\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}" + f"\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}" + f"\nVisualize: https://netron.app" ) self.run_callbacks("on_export_end") @@ -391,17 +497,21 @@ def get_int8_calibration_dataloader(self, prefix=""): batch_size=batch, ) n = len(dataset) - if n < 300: + if n < self.args.batch: + raise ValueError( + f"The calibration dataset ({n} images) must have at least as many images as the batch size ('batch={self.args.batch}')." + ) + elif n < 300: LOGGER.warning(f"{prefix} WARNING โš ๏ธ >300 images recommended for INT8 calibration, found {n} images.") return build_dataloader(dataset, batch=batch, workers=0) # required for batch loading @try_export def export_torchscript(self, prefix=colorstr("TorchScript:")): - """YOLOv8 TorchScript model export.""" + """YOLO TorchScript model export.""" LOGGER.info(f"\n{prefix} starting export with torch {torch.__version__}...") f = self.file.with_suffix(".torchscript") - ts = torch.jit.trace(self.model, self.im, strict=False) + ts = torch.jit.trace(NMSModel(self.model, self.args) if self.args.nms else self.model, self.im, strict=False) extra_files = {"config.txt": json.dumps(self.metadata)} # torch._C.ExtraFilesMap() if self.args.optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html LOGGER.info(f"{prefix} optimizing for mobile...") @@ -414,29 +524,39 @@ def export_torchscript(self, prefix=colorstr("TorchScript:")): @try_export def export_onnx(self, prefix=colorstr("ONNX:")): - """YOLOv8 ONNX export.""" + """YOLO ONNX export.""" requirements = ["onnx>=1.12.0"] if self.args.simplify: - requirements += ["onnxslim==0.1.34", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")] + requirements += ["onnxslim", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")] check_requirements(requirements) import onnx # noqa opset_version = self.args.opset or get_latest_opset() LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...") f = str(self.file.with_suffix(".onnx")) - output_names = ["output0", "output1"] if isinstance(self.model, SegmentationModel) else ["output0"] dynamic = self.args.dynamic if dynamic: + self.model.cpu() # dynamic=True only compatible with cpu dynamic = {"images": {0: "batch", 2: "height", 3: "width"}} # shape(1,3,640,640) if isinstance(self.model, SegmentationModel): dynamic["output0"] = {0: "batch", 2: "anchors"} # shape(1, 116, 8400) dynamic["output1"] = {0: "batch", 2: "mask_height", 3: "mask_width"} # shape(1,32,160,160) elif isinstance(self.model, DetectionModel): dynamic["output0"] = {0: "batch", 2: "anchors"} # shape(1, 84, 8400) + if self.args.nms: # only batch size is dynamic with NMS + dynamic["output0"].pop(2) + if self.args.nms and self.model.task == "obb": + self.args.opset = opset_version # for NMSModel + # OBB error https://github.com/pytorch/pytorch/issues/110859#issuecomment-1757841865 + try: + torch.onnx.register_custom_op_symbolic("aten::lift_fresh", lambda g, x: x, opset_version) + except RuntimeError: # it will fail if it's already registered + pass + check_requirements("onnxslim>=0.1.46") # Older versions has bug with OBB torch.onnx.export( - self.model.cpu() if dynamic else self.model, # dynamic=True only compatible with cpu + NMSModel(self.model, self.args) if self.args.nms else self.model, self.im.cpu() if dynamic else self.im, f, verbose=False, @@ -471,21 +591,21 @@ def export_onnx(self, prefix=colorstr("ONNX:")): @try_export def export_openvino(self, prefix=colorstr("OpenVINO:")): - """YOLOv8 OpenVINO export.""" - check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}') # fix OpenVINO issue on ARM64 + """YOLO OpenVINO export.""" + check_requirements("openvino>=2024.5.0") import openvino as ov LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...") assert TORCH_1_13, f"OpenVINO export requires torch>=1.13.0 but torch=={torch.__version__} is installed" ov_model = ov.convert_model( - self.model, + NMSModel(self.model, self.args) if self.args.nms else self.model, input=None if self.args.dynamic else [self.im.shape], example_input=self.im, ) def serialize(ov_model, file): """Set RT info, serialize and save metadata YAML.""" - ov_model.set_rt_info("YOLOv8", ["model_info", "model_type"]) + ov_model.set_rt_info("YOLO", ["model_info", "model_type"]) ov_model.set_rt_info(True, ["model_info", "reverse_input_channels"]) ov_model.set_rt_info(114, ["model_info", "pad_value"]) ov_model.set_rt_info([255.0], ["model_info", "scale_values"]) @@ -501,7 +621,7 @@ def serialize(ov_model, file): if self.args.int8: fq = str(self.file).replace(self.file.suffix, f"_int8_openvino_model{os.sep}") fq_ov = str(Path(fq) / self.file.with_suffix(".xml").name) - check_requirements("nncf>=2.8.0") + check_requirements("nncf>=2.14.0") import nncf def transform_fn(data_item) -> np.ndarray: @@ -544,8 +664,8 @@ def transform_fn(data_item) -> np.ndarray: @try_export def export_paddle(self, prefix=colorstr("PaddlePaddle:")): - """YOLOv8 Paddle export.""" - check_requirements(("paddlepaddle", "x2paddle")) + """YOLO Paddle export.""" + check_requirements(("paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle", "x2paddle")) import x2paddle # noqa from x2paddle.convert import pytorch2paddle # noqa @@ -556,9 +676,34 @@ def export_paddle(self, prefix=colorstr("PaddlePaddle:")): yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml return f, None + @try_export + def export_mnn(self, prefix=colorstr("MNN:")): + """YOLOv8 MNN export using MNN https://github.com/alibaba/MNN.""" + f_onnx, _ = self.export_onnx() # get onnx model first + + check_requirements("MNN>=2.9.6") + import MNN # noqa + from MNN.tools import mnnconvert + + # Setup and checks + LOGGER.info(f"\n{prefix} starting export with MNN {MNN.version()}...") + assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}" + f = str(self.file.with_suffix(".mnn")) # MNN model file + args = ["", "-f", "ONNX", "--modelFile", f_onnx, "--MNNModel", f, "--bizCode", json.dumps(self.metadata)] + if self.args.int8: + args.extend(("--weightQuantBits", "8")) + if self.args.half: + args.append("--fp16") + mnnconvert.convert(args) + # remove scratch file for model convert optimize + convert_scratch = Path(self.file.parent / ".__convert_external_data.bin") + if convert_scratch.exists(): + convert_scratch.unlink() + return f, None + @try_export def export_ncnn(self, prefix=colorstr("NCNN:")): - """YOLOv8 NCNN export using PNNX https://github.com/pnnx/pnnx.""" + """YOLO NCNN export using PNNX https://github.com/pnnx/pnnx.""" check_requirements("ncnn") import ncnn # noqa @@ -591,16 +736,16 @@ def export_ncnn(self, prefix=colorstr("NCNN:")): shutil.rmtree(unzip_dir) # delete unzip dir ncnn_args = [ - f'ncnnparam={f / "model.ncnn.param"}', - f'ncnnbin={f / "model.ncnn.bin"}', - f'ncnnpy={f / "model_ncnn.py"}', + f"ncnnparam={f / 'model.ncnn.param'}", + f"ncnnbin={f / 'model.ncnn.bin'}", + f"ncnnpy={f / 'model_ncnn.py'}", ] pnnx_args = [ - f'pnnxparam={f / "model.pnnx.param"}', - f'pnnxbin={f / "model.pnnx.bin"}', - f'pnnxpy={f / "model_pnnx.py"}', - f'pnnxonnx={f / "model.pnnx.onnx"}', + f"pnnxparam={f / 'model.pnnx.param'}", + f"pnnxbin={f / 'model.pnnx.bin'}", + f"pnnxpy={f / 'model_pnnx.py'}", + f"pnnxonnx={f / 'model.pnnx.onnx'}", ] cmd = [ @@ -626,7 +771,7 @@ def export_ncnn(self, prefix=colorstr("NCNN:")): @try_export def export_coreml(self, prefix=colorstr("CoreML:")): - """YOLOv8 CoreML export.""" + """YOLO CoreML export.""" mlmodel = self.args.format.lower() == "mlmodel" # legacy *.mlmodel export format requested check_requirements("coremltools>=6.0,<=6.2" if mlmodel else "coremltools>=7.0") import coremltools as ct # noqa @@ -637,9 +782,6 @@ def export_coreml(self, prefix=colorstr("CoreML:")): f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage") if f.is_dir(): shutil.rmtree(f) - if self.args.nms and getattr(self.model, "end2end", False): - LOGGER.warning(f"{prefix} WARNING โš ๏ธ 'nms=True' is not available for end2end models. Forcing 'nms=False'.") - self.args.nms = False bias = [0.0, 0.0, 0.0] scale = 1 / 255 @@ -651,7 +793,7 @@ def export_coreml(self, prefix=colorstr("CoreML:")): model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model else: if self.args.nms: - LOGGER.warning(f"{prefix} WARNING โš ๏ธ 'nms=True' is only available for Detect models like 'yolov8n.pt'.") + LOGGER.warning(f"{prefix} WARNING โš ๏ธ 'nms=True' is only available for Detect models like 'yolo11n.pt'.") # TODO CoreML Segment and Pose model pipelining model = self.model @@ -702,8 +844,8 @@ def export_coreml(self, prefix=colorstr("CoreML:")): return f, ct_model @try_export - def export_engine(self, prefix=colorstr("TensorRT:")): - """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt.""" + def export_engine(self, dla=None, prefix=colorstr("TensorRT:")): + """YOLO TensorRT export https://developer.nvidia.com/tensorrt.""" assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'" f_onnx, _ = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016 @@ -711,10 +853,10 @@ def export_engine(self, prefix=colorstr("TensorRT:")): import tensorrt as trt # noqa except ImportError: if LINUX: - check_requirements("tensorrt>7.0.0,<=10.1.0") + check_requirements("tensorrt>7.0.0,!=10.1.0") import tensorrt as trt # noqa check_version(trt.__version__, ">=7.0.0", hard=True) - check_version(trt.__version__, "<=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239") + check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239") # Setup and checks LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...") @@ -728,15 +870,29 @@ def export_engine(self, prefix=colorstr("TensorRT:")): # Engine builder builder = trt.Builder(logger) config = builder.create_builder_config() - workspace = int(self.args.workspace * (1 << 30)) - if is_trt10: + workspace = int(self.args.workspace * (1 << 30)) if self.args.workspace is not None else 0 + if is_trt10 and workspace > 0: config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace) - else: # TensorRT versions 7, 8 + elif workspace > 0: # TensorRT versions 7, 8 config.max_workspace_size = workspace flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(flag) half = builder.platform_has_fast_fp16 and self.args.half int8 = builder.platform_has_fast_int8 and self.args.int8 + + # Optionally switch to DLA if enabled + if dla is not None: + if not IS_JETSON: + raise ValueError("DLA is only available on NVIDIA Jetson devices") + LOGGER.info(f"{prefix} enabling DLA on core {dla}...") + if not self.args.half and not self.args.int8: + raise ValueError( + "DLA requires either 'half=True' (FP16) or 'int8=True' (INT8) to be enabled. Please enable one of them and try again." + ) + config.default_device_type = trt.DeviceType.DLA + config.DLA_core = int(dla) + config.set_flag(trt.BuilderFlag.GPU_FALLBACK) + # Read ONNX file parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(f_onnx): @@ -756,7 +912,7 @@ def export_engine(self, prefix=colorstr("TensorRT:")): LOGGER.warning(f"{prefix} WARNING โš ๏ธ 'dynamic=True' model requires max batch size, i.e. 'batch=16'") profile = builder.create_optimization_profile() min_shape = (1, shape[1], 32, 32) # minimum input shape - max_shape = (*shape[:2], *(max(1, self.args.workspace) * d for d in shape[2:])) # max input shape + max_shape = (*shape[:2], *(int(max(1, workspace) * d) for d in shape[2:])) # max input shape for inp in inputs: profile.set_shape(inp.name, min=min_shape, opt=shape, max=max_shape) config.add_optimization_profile(profile) @@ -837,7 +993,7 @@ def write_calibration_cache(self, cache) -> None: @try_export def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")): - """YOLOv8 TensorFlow SavedModel export.""" + """YOLO TensorFlow SavedModel export.""" cuda = torch.cuda.is_available() try: import tensorflow as tf # noqa @@ -853,7 +1009,7 @@ def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")): "sng4onnx>=1.0.1", # required by 'onnx2tf' package "onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package "onnx>=1.12.0", - "onnx2tf>1.17.5,<=1.22.3", + "onnx2tf>1.17.5,<=1.26.3", "onnxslim>=0.1.31", "tflite_support<=0.4.3" if IS_JETSON else "tflite_support", # fix ImportError 'GLIBCXX_3.4.29' "flatbuffers>=23.5.26,<100", # update old 'flatbuffers' included inside tensorflow package @@ -890,23 +1046,22 @@ def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")): io_quant_dtype = "int8" if self.args.int8: tmp_file = f / "tmp_tflite_int8_calibration_images.npy" # int8 calibration images file - verbosity = "info" io_quant_dtype = "uint8" if self.args.uint8_io_dtype else "int8" if self.args.data: f.mkdir() - images = [batch["img"].permute(0, 2, 3, 1) for batch in self.get_int8_calibration_dataloader(prefix)] - images = torch.cat(images, 0).float() + images = [batch["img"] for batch in self.get_int8_calibration_dataloader(prefix)] + images = torch.nn.functional.interpolate(torch.cat(images, 0).float(), size=self.imgsz).permute( + 0, 2, 3, 1 + ) np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]] - else: - verbosity = "error" LOGGER.info(f"{prefix} starting TFLite export with onnx2tf {onnx2tf.__version__}...") - onnx2tf.convert( + keras_model = onnx2tf.convert( input_onnx_file_path=f_onnx, output_folder_path=str(f), not_use_onnxsim=True, - verbosity=verbosity, + verbosity="error", # note INT8-FP16 activation bug https://github.com/ultralytics/ultralytics/issues/15873 output_integer_quantized_tflite=self.args.int8, quant_type="per-tensor", # "per-tensor" (faster) or "per-channel" (slower but more accurate) custom_input_op_name_np_data_path=np_data, @@ -932,11 +1087,11 @@ def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")): else: if not self.args.separate_outputs: self._add_tflite_metadata(file) - return str(f), tf.saved_model.load(f, tags=None, options=None) # load saved_model as Keras model + return str(f), keras_model # or keras_model = tf.saved_model.load(f, tags=None, options=None) @try_export def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")): - """YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow.""" + """YOLO TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow.""" import tensorflow as tf # noqa from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa @@ -952,7 +1107,7 @@ def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")): @try_export def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")): - """YOLOv8 TensorFlow Lite export.""" + """YOLO TensorFlow Lite export.""" # BUG https://github.com/ultralytics/ultralytics/issues/13436 import tensorflow as tf # noqa @@ -968,7 +1123,7 @@ def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorF @try_export def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): - """YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/.""" + """YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/.""" LOGGER.warning(f"{prefix} WARNING โš ๏ธ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185") cmd = "edgetpu_compiler --version" @@ -976,7 +1131,6 @@ def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): assert LINUX, f"export only supported on Linux. See {help_url}" if subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True).returncode != 0: LOGGER.info(f"\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}") - sudo = subprocess.run("sudo --version >/dev/null", shell=True).returncode == 0 # sudo installed on system for c in ( "curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -", 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | ' @@ -984,13 +1138,21 @@ def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): "sudo apt-get update", "sudo apt-get install edgetpu-compiler", ): - subprocess.run(c if sudo else c.replace("sudo ", ""), shell=True, check=True) + subprocess.run(c if is_sudo_available() else c.replace("sudo ", ""), shell=True, check=True) ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1] LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...") f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model - cmd = f'edgetpu_compiler -s -d -k 10 --out_dir "{Path(f).parent}" "{tflite_model}"' + cmd = ( + "edgetpu_compiler " + f'--out_dir "{Path(f).parent}" ' + "--show_operations " + "--search_delegate " + "--delegate_search_step 30 " + "--timeout_sec 180 " + f'"{tflite_model}"' + ) LOGGER.info(f"{prefix} running '{cmd}'") subprocess.run(cmd, shell=True) if not self.args.separate_outputs: @@ -999,7 +1161,7 @@ def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")): @try_export def export_tfjs(self, prefix=colorstr("TensorFlow.js:")): - """YOLOv8 TensorFlow.js export.""" + """YOLO TensorFlow.js export.""" check_requirements("tensorflowjs") if ARM64: # Fix error: `np.object` was a deprecated alias for the builtin `object` when exporting to TF.js on ARM64 @@ -1033,6 +1195,171 @@ def export_tfjs(self, prefix=colorstr("TensorFlow.js:")): yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml return f, None + @try_export + def export_rknn(self, prefix=colorstr("RKNN:")): + """YOLO RKNN model export.""" + LOGGER.info(f"\n{prefix} starting export with rknn-toolkit2...") + + check_requirements("rknn-toolkit2") + if IS_COLAB: + # Prevent 'exit' from closing the notebook https://github.com/airockchip/rknn-toolkit2/issues/259 + import builtins + + builtins.exit = lambda: None + + from rknn.api import RKNN + + f, _ = self.export_onnx() + export_path = Path(f"{Path(f).stem}_rknn_model") + export_path.mkdir(exist_ok=True) + + rknn = RKNN(verbose=False) + rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform=self.args.name) + rknn.load_onnx(model=f) + rknn.build(do_quantization=False) # TODO: Add quantization support + f = f.replace(".onnx", f"-{self.args.name}.rknn") + rknn.export_rknn(f"{export_path / f}") + yaml_save(export_path / "metadata.yaml", self.metadata) + return export_path, None + + @try_export + def export_imx(self, prefix=colorstr("IMX:")): + """YOLO IMX export.""" + gptq = False + assert LINUX, ( + "export only supported on Linux. See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter" + ) + if getattr(self.model, "end2end", False): + raise ValueError("IMX export is not supported for end2end models.") + if "C2f" not in self.model.__str__(): + raise ValueError("IMX export is only supported for YOLOv8n detection models") + check_requirements(("model-compression-toolkit==2.1.1", "sony-custom-layers==0.2.0", "tensorflow==2.12.0")) + check_requirements("imx500-converter[pt]==3.14.3") # Separate requirements for imx500-converter + + import model_compression_toolkit as mct + import onnx + from sony_custom_layers.pytorch.object_detection.nms import multiclass_nms + + LOGGER.info(f"\n{prefix} starting export with model_compression_toolkit {mct.__version__}...") + + try: + out = subprocess.run( + ["java", "--version"], check=True, capture_output=True + ) # Java 17 is required for imx500-converter + if "openjdk 17" not in str(out.stdout): + raise FileNotFoundError + except FileNotFoundError: + c = ["apt", "install", "-y", "openjdk-17-jdk", "openjdk-17-jre"] + if is_sudo_available(): + c.insert(0, "sudo") + subprocess.run(c, check=True) + + def representative_dataset_gen(dataloader=self.get_int8_calibration_dataloader(prefix)): + for batch in dataloader: + img = batch["img"] + img = img / 255.0 + yield [img] + + tpc = mct.get_target_platform_capabilities( + fw_name="pytorch", target_platform_name="imx500", target_platform_version="v1" + ) + + config = mct.core.CoreConfig( + mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10), + quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True), + ) + + resource_utilization = mct.core.ResourceUtilization(weights_memory=3146176 * 0.76) + + quant_model = ( + mct.gptq.pytorch_gradient_post_training_quantization( # Perform Gradient-Based Post Training Quantization + model=self.model, + representative_data_gen=representative_dataset_gen, + target_resource_utilization=resource_utilization, + gptq_config=mct.gptq.get_pytorch_gptq_config(n_epochs=1000, use_hessian_based_weights=False), + core_config=config, + target_platform_capabilities=tpc, + )[0] + if gptq + else mct.ptq.pytorch_post_training_quantization( # Perform post training quantization + in_module=self.model, + representative_data_gen=representative_dataset_gen, + target_resource_utilization=resource_utilization, + core_config=config, + target_platform_capabilities=tpc, + )[0] + ) + + class NMSWrapper(torch.nn.Module): + def __init__( + self, + model: torch.nn.Module, + score_threshold: float = 0.001, + iou_threshold: float = 0.7, + max_detections: int = 300, + ): + """ + Wrapping PyTorch Module with multiclass_nms layer from sony_custom_layers. + + Args: + model (nn.Module): Model instance. + score_threshold (float): Score threshold for non-maximum suppression. + iou_threshold (float): Intersection over union threshold for non-maximum suppression. + max_detections (float): The number of detections to return. + """ + super().__init__() + self.model = model + self.score_threshold = score_threshold + self.iou_threshold = iou_threshold + self.max_detections = max_detections + + def forward(self, images): + # model inference + outputs = self.model(images) + + boxes = outputs[0] + scores = outputs[1] + nms = multiclass_nms( + boxes=boxes, + scores=scores, + score_threshold=self.score_threshold, + iou_threshold=self.iou_threshold, + max_detections=self.max_detections, + ) + return nms + + quant_model = NMSWrapper( + model=quant_model, + score_threshold=self.args.conf or 0.001, + iou_threshold=self.args.iou, + max_detections=self.args.max_det, + ).to(self.device) + + f = Path(str(self.file).replace(self.file.suffix, "_imx_model")) + f.mkdir(exist_ok=True) + onnx_model = f / Path(str(self.file.name).replace(self.file.suffix, "_imx.onnx")) # js dir + mct.exporter.pytorch_export_model( + model=quant_model, save_model_path=onnx_model, repr_dataset=representative_dataset_gen + ) + + model_onnx = onnx.load(onnx_model) # load onnx model + for k, v in self.metadata.items(): + meta = model_onnx.metadata_props.add() + meta.key, meta.value = k, str(v) + + onnx.save(model_onnx, onnx_model) + + subprocess.run( + ["imxconv-pt", "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"], + check=True, + ) + + # Needed for imx models. + with open(f / "labels.txt", "w") as file: + file.writelines([f"{name}\n" for _, name in self.model.names.items()]) + + return f, None + def _add_tflite_metadata(self, file): """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata.""" import flatbuffers @@ -1098,7 +1425,7 @@ def _add_tflite_metadata(self, file): tmp_file.unlink() def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")): - """YOLOv8 CoreML pipeline.""" + """YOLO CoreML pipeline.""" import coremltools as ct # noqa LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...") @@ -1162,8 +1489,8 @@ def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipe nms.coordinatesOutputFeatureName = "coordinates" nms.iouThresholdInputFeatureName = "iouThreshold" nms.confidenceThresholdInputFeatureName = "confidenceThreshold" - nms.iouThreshold = 0.45 - nms.confidenceThreshold = 0.25 + nms.iouThreshold = self.args.iou + nms.confidenceThreshold = self.args.conf nms.pickTop.perClass = True nms.stringClassLabels.vector.extend(names.values()) nms_model = ct.models.MLModel(nms_spec) @@ -1231,3 +1558,103 @@ def forward(self, x): """Normalize predictions of object detection model with input size-dependent factors.""" xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1) return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4) + + +class NMSModel(torch.nn.Module): + """Model wrapper with embedded NMS for Detect, Segment, Pose and OBB.""" + + def __init__(self, model, args): + """ + Initialize the NMSModel. + + Args: + model (torch.nn.module): The model to wrap with NMS postprocessing. + args (Namespace): The export arguments. + """ + super().__init__() + self.model = model + self.args = args + self.obb = model.task == "obb" + self.is_tf = self.args.format in frozenset({"saved_model", "tflite", "tfjs"}) + + def forward(self, x): + """ + Performs inference with NMS post-processing. Supports Detect, Segment, OBB and Pose. + + Args: + x (torch.Tensor): The preprocessed tensor with shape (N, 3, H, W). + + Returns: + out (torch.Tensor): The post-processed results with shape (N, max_det, 4 + 2 + extra_shape). + """ + from functools import partial + + from torchvision.ops import nms + + preds = self.model(x) + pred = preds[0] if isinstance(preds, tuple) else preds + pred = pred.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84) + extra_shape = pred.shape[-1] - (4 + self.model.nc) # extras from Segment, OBB, Pose + boxes, scores, extras = pred.split([4, self.model.nc, extra_shape], dim=2) + scores, classes = scores.max(dim=-1) + self.args.max_det = min(pred.shape[1], self.args.max_det) # in case num_anchors < max_det + # (N, max_det, 4 coords + 1 class score + 1 class label + extra_shape). + out = torch.zeros( + boxes.shape[0], + self.args.max_det, + boxes.shape[-1] + 2 + extra_shape, + device=boxes.device, + dtype=boxes.dtype, + ) + for i, (box, cls, score, extra) in enumerate(zip(boxes, classes, scores, extras)): + mask = score > self.args.conf + if self.is_tf: + # TFLite GatherND error if mask is empty + score *= mask + # Explicit length otherwise reshape error, hardcoded to `self.args.max_det * 5` + mask = score.topk(min(self.args.max_det * 5, score.shape[0])).indices + box, score, cls, extra = box[mask], score[mask], cls[mask], extra[mask] + if not self.obb: + box = xywh2xyxy(box) + if self.is_tf: + # TFlite bug returns less boxes + box = torch.nn.functional.pad(box, (0, 0, 0, mask.shape[0] - box.shape[0])) + nmsbox = box.clone() + # `8` is the minimum value experimented to get correct NMS results for obb + multiplier = 8 if self.obb else 1 + # Normalize boxes for NMS since large values for class offset causes issue with int8 quantization + if self.args.format == "tflite": # TFLite is already normalized + nmsbox *= multiplier + else: + nmsbox = multiplier * nmsbox / torch.tensor(x.shape[2:], device=box.device, dtype=box.dtype).max() + if not self.args.agnostic_nms: # class-specific NMS + end = 2 if self.obb else 4 + # fully explicit expansion otherwise reshape error + # large max_wh causes issues when quantizing + cls_offset = cls.reshape(-1, 1).expand(nmsbox.shape[0], end) + offbox = nmsbox[:, :end] + cls_offset * multiplier + nmsbox = torch.cat((offbox, nmsbox[:, end:]), dim=-1) + nms_fn = ( + partial( + nms_rotated, + use_triu=not ( + self.is_tf + or (self.args.opset or 14) < 14 + or (self.args.format == "openvino" and self.args.int8) # OpenVINO int8 error with triu + ), + ) + if self.obb + else nms + ) + keep = nms_fn( + torch.cat([nmsbox, extra], dim=-1) if self.obb else nmsbox, + score, + self.args.iou, + )[: self.args.max_det] + dets = torch.cat( + [box[keep], score[keep].view(-1, 1), cls[keep].view(-1, 1).to(out.dtype), extra[keep]], dim=-1 + ) + # Zero-pad to max_det size to avoid reshape error + pad = (0, 0, 0, self.args.max_det - dets.shape[0]) + out[i] = torch.nn.functional.pad(dets, pad) + return (out, preds[1]) if self.model.task == "segment" else out diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 519c3f905ed..21ef3fb44aa 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -1,8 +1,8 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import inspect from pathlib import Path -from typing import List, Union +from typing import Any, Dict, List, Union import numpy as np import torch @@ -11,7 +11,7 @@ from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir from ultralytics.engine.results import Results from ultralytics.hub import HUB_WEB_ROOT, HUBTrainingSession -from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load +from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, yaml_model_load from ultralytics.utils import ( ARGV, ASSETS, @@ -26,7 +26,7 @@ ) -class Model(nn.Module): +class Model(torch.nn.Module): """ A base class for implementing YOLO models, unifying APIs across different model types. @@ -37,7 +37,7 @@ class Model(nn.Module): Attributes: callbacks (Dict): A dictionary of callback functions for various events during model operations. predictor (BasePredictor): The predictor object used for making predictions. - model (nn.Module): The underlying PyTorch model. + model (torch.nn.Module): The underlying PyTorch model. trainer (BaseTrainer): The trainer object used for training the model. ckpt (Dict): The checkpoint data if the model is loaded from a *.pt file. cfg (str): The configuration of the model if loaded from a *.yaml file. @@ -72,16 +72,16 @@ class Model(nn.Module): Examples: >>> from ultralytics import YOLO - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.predict("image.jpg") - >>> model.train(data="coco128.yaml", epochs=3) + >>> model.train(data="coco8.yaml", epochs=3) >>> metrics = model.val() >>> model.export(format="onnx") """ def __init__( self, - model: Union[str, Path] = "yolov8n.pt", + model: Union[str, Path] = "yolo11n.pt", task: str = None, verbose: bool = False, ) -> None: @@ -106,7 +106,7 @@ def __init__( ImportError: If required dependencies for specific model types (like HUB SDK) are not installed. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model = Model("path/to/model.yaml", task="detect") >>> model = Model("hub_model", verbose=True) """ @@ -115,7 +115,7 @@ def __init__( self.predictor = None # reuse predictor self.model = None # model object self.trainer = None # trainer object - self.ckpt = None # if loaded from *.pt + self.ckpt = {} # if loaded from *.pt self.cfg = None # if loaded from *.yaml self.ckpt_path = None self.overrides = {} # overrides for trainer object @@ -136,6 +136,7 @@ def __init__( # Check if Triton Server model elif self.is_triton_model(model): self.model_name = self.model = model + self.overrides["task"] = task or "detect" # set `task=detect` if not explicitly set return # Load or create new YOLO model @@ -144,11 +145,14 @@ def __init__( else: self._load(model, task=task) + # Delete super().training for accessing self.model.training + del self.training + def __call__( self, source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None, stream: bool = False, - **kwargs, + **kwargs: Any, ) -> list: """ Alias for the predict method, enabling the model instance to be callable for predictions. @@ -161,14 +165,14 @@ def __call__( the image(s) to make predictions on. Can be a file path, URL, PIL image, numpy array, PyTorch tensor, or a list/tuple of these. stream (bool): If True, treat the input source as a continuous stream for predictions. - **kwargs (Any): Additional keyword arguments to configure the prediction process. + **kwargs: Additional keyword arguments to configure the prediction process. Returns: (List[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a Results object. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model("https://ultralytics.com/images/bus.jpg") >>> for r in results: ... print(f"Detected {len(r)} objects in image") @@ -190,9 +194,9 @@ def is_triton_model(model: str) -> bool: (bool): True if the model string is a valid Triton Server URL, False otherwise. Examples: - >>> Model.is_triton_model("http://localhost:8000/v2/models/yolov8n") + >>> Model.is_triton_model("http://localhost:8000/v2/models/yolo11n") True - >>> Model.is_triton_model("yolov8n.pt") + >>> Model.is_triton_model("yolo11n.pt") False """ from urllib.parse import urlsplit @@ -217,7 +221,7 @@ def is_hub_model(model: str) -> bool: Examples: >>> Model.is_hub_model("https://hub.ultralytics.com/models/MODEL") True - >>> Model.is_hub_model("yolov8n.pt") + >>> Model.is_hub_model("yolo11n.pt") False """ return model.startswith(f"{HUB_WEB_ROOT}/models/") @@ -243,7 +247,7 @@ class from the task map. Examples: >>> model = Model() - >>> model._new("yolov8n.yaml", task="detect", verbose=True) + >>> model._new("yolo11n.yaml", task="detect", verbose=True) """ cfg_dict = yaml_model_load(cfg) self.cfg = cfg @@ -274,12 +278,12 @@ def _load(self, weights: str, task=None) -> None: Examples: >>> model = Model() - >>> model._load("yolov8n.pt") + >>> model._load("yolo11n.pt") >>> model._load("path/to/weights.pth", task="detect") """ if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")): weights = checks.check_file(weights, download_dir=SETTINGS["weights_dir"]) # download and return local file - weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolov8n -> yolov8n.pt + weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolo11n -> yolo11n.pt if Path(weights).suffix == ".pt": self.model, self.ckpt = attempt_load_one_weight(weights) @@ -307,19 +311,19 @@ def _check_is_pytorch_model(self) -> None: information about supported model formats and operations. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model._check_is_pytorch_model() # No error raised - >>> model = Model("yolov8n.onnx") + >>> model = Model("yolo11n.onnx") >>> model._check_is_pytorch_model() # Raises TypeError """ pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt" - pt_module = isinstance(self.model, nn.Module) + pt_module = isinstance(self.model, torch.nn.Module) if not (pt_module or pt_str): raise TypeError( f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. " f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported " f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, " - f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device " + f"i.e. 'yolo predict model=yolo11n.onnx'.\nTo run CUDA or MPS inference please pass the device " f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'" ) @@ -338,7 +342,7 @@ def reset_weights(self) -> "Model": AssertionError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.reset_weights() """ self._check_is_pytorch_model() @@ -349,7 +353,7 @@ def reset_weights(self) -> "Model": p.requires_grad = True return self - def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model": + def load(self, weights: Union[str, Path] = "yolo11n.pt") -> "Model": """ Loads parameters from the specified weights file into the model. @@ -367,7 +371,7 @@ def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model": Examples: >>> model = Model() - >>> model.load("yolov8n.pt") + >>> model.load("yolo11n.pt") >>> model.load(Path("path/to/weights.pt")) """ self._check_is_pytorch_model() @@ -377,7 +381,7 @@ def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model": self.model.load(weights) return self - def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) -> None: + def save(self, filename: Union[str, Path] = "saved_model.pt") -> None: """ Saves the current model state to a file. @@ -386,13 +390,12 @@ def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) -> Args: filename (Union[str, Path]): The name of the file to save the model to. - use_dill (bool): Whether to try using dill for serialization if available. Raises: AssertionError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.save("my_model.pt") """ self._check_is_pytorch_model() @@ -402,13 +405,13 @@ def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) -> from ultralytics import __version__ updates = { - "model": deepcopy(self.model).half() if isinstance(self.model, nn.Module) else self.model, + "model": deepcopy(self.model).half() if isinstance(self.model, torch.nn.Module) else self.model, "date": datetime.now().isoformat(), "version": __version__, "license": "AGPL-3.0 License (https://ultralytics.com/license)", "docs": "https://docs.ultralytics.com", } - torch.save({**self.ckpt, **updates}, filename, use_dill=use_dill) + torch.save({**self.ckpt, **updates}, filename) def info(self, detailed: bool = False, verbose: bool = True): """ @@ -429,7 +432,7 @@ def info(self, detailed: bool = False, verbose: bool = True): TypeError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.info() # Prints model summary >>> info_list = model.info(detailed=True, verbose=False) # Returns detailed info as a list """ @@ -449,10 +452,10 @@ def fuse(self): performs both convolution and normalization in one step. Raises: - TypeError: If the model is not a PyTorch nn.Module. + TypeError: If the model is not a PyTorch torch.nn.Module. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model.fuse() >>> # Model is now fused and ready for optimized inference """ @@ -463,7 +466,7 @@ def embed( self, source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None, stream: bool = False, - **kwargs, + **kwargs: Any, ) -> list: """ Generates image embeddings based on the provided source. @@ -475,7 +478,7 @@ def embed( source (str | Path | int | List | Tuple | np.ndarray | torch.Tensor): The source of the image for generating embeddings. Can be a file path, URL, PIL image, numpy array, etc. stream (bool): If True, predictions are streamed. - **kwargs (Any): Additional keyword arguments for configuring the embedding process. + **kwargs: Additional keyword arguments for configuring the embedding process. Returns: (List[torch.Tensor]): A list containing the image embeddings. @@ -484,7 +487,7 @@ def embed( AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> image = "https://ultralytics.com/images/bus.jpg" >>> embeddings = model.embed(image) >>> print(embeddings[0].shape) @@ -498,7 +501,7 @@ def predict( source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None, stream: bool = False, predictor=None, - **kwargs, + **kwargs: Any, ) -> List[Results]: """ Performs predictions on the given image source using the YOLO model. @@ -514,14 +517,14 @@ def predict( stream (bool): If True, treats the input source as a continuous stream for predictions. predictor (BasePredictor | None): An instance of a custom predictor class for making predictions. If None, the method uses a default predictor. - **kwargs (Any): Additional keyword arguments for configuring the prediction process. + **kwargs: Additional keyword arguments for configuring the prediction process. Returns: (List[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a Results object. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.predict(source="path/to/image.jpg", conf=0.25) >>> for r in results: ... print(r.boxes.data) # print detection bounding boxes @@ -544,7 +547,7 @@ def predict( prompts = args.pop("prompts", None) # for SAM-type models if not self.predictor: - self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks) + self.predictor = (predictor or self._smart_load("predictor"))(overrides=args, _callbacks=self.callbacks) self.predictor.setup_model(model=self.model, verbose=is_cli) else: # only update args if predictor is already setup self.predictor.args = get_cfg(self.predictor.args, args) @@ -559,7 +562,7 @@ def track( source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None, stream: bool = False, persist: bool = False, - **kwargs, + **kwargs: Any, ) -> List[Results]: """ Conducts object tracking on the specified input source using the registered trackers. @@ -573,7 +576,7 @@ def track( tracking. Can be a file path, URL, or video stream. stream (bool): If True, treats the input source as a continuous video stream. Defaults to False. persist (bool): If True, persists trackers between different calls to this method. Defaults to False. - **kwargs (Any): Additional keyword arguments for configuring the tracking process. + **kwargs: Additional keyword arguments for configuring the tracking process. Returns: (List[ultralytics.engine.results.Results]): A list of tracking results, each a Results object. @@ -582,7 +585,7 @@ def track( AttributeError: If the predictor does not have registered trackers. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.track(source="path/to/video.mp4", show=True) >>> for r in results: ... print(r.boxes.id) # print tracking IDs @@ -604,7 +607,7 @@ def track( def val( self, validator=None, - **kwargs, + **kwargs: Any, ): """ Validates the model using a specified dataset and validation configuration. @@ -616,7 +619,7 @@ def val( Args: validator (ultralytics.engine.validator.BaseValidator | None): An instance of a custom validator class for validating the model. - **kwargs (Any): Arbitrary keyword arguments for customizing the validation process. + **kwargs: Arbitrary keyword arguments for customizing the validation process. Returns: (ultralytics.utils.metrics.DetMetrics): Validation metrics obtained from the validation process. @@ -625,8 +628,8 @@ def val( AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") - >>> results = model.val(data="coco128.yaml", imgsz=640) + >>> model = YOLO("yolo11n.pt") + >>> results = model.val(data="coco8.yaml", imgsz=640) >>> print(results.box.map) # Print mAP50-95 """ custom = {"rect": True} # method defaults @@ -639,7 +642,7 @@ def val( def benchmark( self, - **kwargs, + **kwargs: Any, ): """ Benchmarks the model across various export formats to evaluate performance. @@ -650,7 +653,7 @@ def benchmark( defaults, and any additional user-provided keyword arguments. Args: - **kwargs (Any): Arbitrary keyword arguments to customize the benchmarking process. These are combined with + **kwargs: Arbitrary keyword arguments to customize the benchmarking process. These are combined with default configurations, model-specific arguments, and method defaults. Common options include: - data (str): Path to the dataset for benchmarking. - imgsz (int | List[int]): Image size for benchmarking. @@ -658,6 +661,7 @@ def benchmark( - int8 (bool): Whether to use int8 precision mode. - device (str): Device to run the benchmark on (e.g., 'cpu', 'cuda'). - verbose (bool): Whether to print detailed benchmark information. + - format (str): Export format name for specific benchmarking Returns: (Dict): A dictionary containing the results of the benchmarking process, including metrics for @@ -667,7 +671,7 @@ def benchmark( AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.benchmark(data="coco8.yaml", imgsz=640, half=True) >>> print(results) """ @@ -686,11 +690,12 @@ def benchmark( verbose=kwargs.get("verbose"), separate_outputs=args["separate_outputs"], export_hw_optimized=args["export_hw_optimized"], + format=kwargs.get("format", "") ) def export( self, - **kwargs, + **kwargs: Any, ) -> str: """ Exports the model to a different format suitable for deployment. @@ -700,7 +705,7 @@ def export( defaults, and any additional arguments provided. Args: - **kwargs (Dict): Arbitrary keyword arguments to customize the export process. These are combined with + **kwargs: Arbitrary keyword arguments to customize the export process. These are combined with the model's overrides and method defaults. Common arguments include: format (str): Export format (e.g., 'onnx', 'engine', 'coreml'). half (bool): Export model in half-precision. @@ -719,7 +724,7 @@ def export( RuntimeError: If the export process fails due to errors. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.export(format="onnx", dynamic=True, simplify=True) 'path/to/exported/model.onnx' """ @@ -739,7 +744,7 @@ def export( def train( self, trainer=None, - **kwargs, + **kwargs: Any, ): """ Trains the model using the specified dataset and training configuration. @@ -754,7 +759,7 @@ def train( Args: trainer (BaseTrainer | None): Custom trainer instance for model training. If None, uses default. - **kwargs (Any): Arbitrary keyword arguments for training configuration. Common options include: + **kwargs: Arbitrary keyword arguments for training configuration. Common options include: data (str): Path to dataset configuration file. epochs (int): Number of training epochs. batch_size (int): Batch size for training. @@ -774,8 +779,8 @@ def train( ModuleNotFoundError: If the HUB SDK is not installed. Examples: - >>> model = YOLO("yolov8n.pt") - >>> results = model.train(data="coco128.yaml", epochs=3) + >>> model = YOLO("yolo11n.pt") + >>> results = model.train(data="coco8.yaml", epochs=3) """ self._check_is_pytorch_model() if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model @@ -806,7 +811,7 @@ def train( # Update model and cfg after training if RANK in {-1, 0}: ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last - self.model, _ = attempt_load_one_weight(ckpt) + self.model, self.ckpt = attempt_load_one_weight(ckpt) self.overrides = self.model.args self.metrics = getattr(self.trainer.validator, "metrics", None) # TODO: no metrics returned by DDP return self.metrics @@ -815,8 +820,8 @@ def tune( self, use_ray=False, iterations=10, - *args, - **kwargs, + *args: Any, + **kwargs: Any, ): """ Conducts hyperparameter tuning for the model, with an option to use Ray Tune. @@ -829,8 +834,8 @@ def tune( Args: use_ray (bool): If True, uses Ray Tune for hyperparameter tuning. Defaults to False. iterations (int): The number of tuning iterations to perform. Defaults to 10. - *args (List): Variable length argument list for additional arguments. - **kwargs (Dict): Arbitrary keyword arguments. These are combined with the model's overrides and defaults. + *args: Variable length argument list for additional arguments. + **kwargs: Arbitrary keyword arguments. These are combined with the model's overrides and defaults. Returns: (Dict): A dictionary containing the results of the hyperparameter search. @@ -839,7 +844,7 @@ def tune( AssertionError: If the model is not a PyTorch model. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model.tune(use_ray=True, iterations=20) >>> print(results) """ @@ -874,7 +879,7 @@ def _apply(self, fn) -> "Model": AssertionError: If the model is not a PyTorch model. Examples: - >>> model = Model("yolov8n.pt") + >>> model = Model("yolo11n.pt") >>> model = model._apply(lambda t: t.cuda()) # Move model to GPU """ self._check_is_pytorch_model() @@ -884,7 +889,7 @@ def _apply(self, fn) -> "Model": return self @property - def names(self) -> list: + def names(self) -> Dict[int, str]: """ Retrieves the class names associated with the loaded model. @@ -899,7 +904,7 @@ def names(self) -> list: AttributeError: If the model or predictor does not have a 'names' attribute. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> print(model.names) {0: 'person', 1: 'bicycle', 2: 'car', ...} """ @@ -918,23 +923,23 @@ def device(self) -> torch.device: Retrieves the device on which the model's parameters are allocated. This property determines the device (CPU or GPU) where the model's parameters are currently stored. It is - applicable only to models that are instances of nn.Module. + applicable only to models that are instances of torch.nn.Module. Returns: (torch.device): The device (CPU/GPU) of the model. Raises: - AttributeError: If the model is not a PyTorch nn.Module instance. + AttributeError: If the model is not a torch.nn.Module instance. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> print(model.device) device(type='cuda', index=0) # if CUDA is available >>> model = model.to("cpu") >>> print(model.device) device(type='cpu') """ - return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None + return next(self.model.parameters()).device if isinstance(self.model, torch.nn.Module) else None @property def transforms(self): @@ -949,7 +954,7 @@ def transforms(self): (object | None): The transform object of the model if available, otherwise None. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> transforms = model.transforms >>> if transforms: ... print(f"Model transforms: {transforms}") @@ -978,9 +983,9 @@ def add_callback(self, event: str, func) -> None: Examples: >>> def on_train_start(trainer): ... print("Training is starting!") - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.add_callback("on_train_start", on_train_start) - >>> model.train(data="coco128.yaml", epochs=1) + >>> model.train(data="coco8.yaml", epochs=1) """ self.callbacks[event].append(func) @@ -997,7 +1002,7 @@ def clear_callback(self, event: str) -> None: recognized by the Ultralytics callback system. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.add_callback("on_train_start", lambda: print("Training started")) >>> model.clear_callback("on_train_start") >>> # All callbacks for 'on_train_start' are now removed @@ -1027,7 +1032,7 @@ def reset_callbacks(self) -> None: modifications, ensuring consistent behavior across different runs or experiments. Examples: - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> model.add_callback("on_train_start", custom_function) >>> model.reset_callbacks() # All callbacks are now reset to their default functions @@ -1129,3 +1134,44 @@ def task_map(self) -> dict: description of the expected behavior and structure. """ raise NotImplementedError("Please provide task map for your model!") + + def eval(self): + """ + Sets the model to evaluation mode. + + This method changes the model's mode to evaluation, which affects layers like dropout and batch normalization + that behave differently during training and evaluation. + + Returns: + (Model): The model instance with evaluation mode set. + + Examples: + >> model = YOLO("yolo11n.pt") + >> model.eval() + """ + self.model.eval() + return self + + def __getattr__(self, name): + """ + Enables accessing model attributes directly through the Model class. + + This method provides a way to access attributes of the underlying model directly through the Model class + instance. It first checks if the requested attribute is 'model', in which case it returns the model from + the module dictionary. Otherwise, it delegates the attribute lookup to the underlying model. + + Args: + name (str): The name of the attribute to retrieve. + + Returns: + (Any): The requested attribute value. + + Raises: + AttributeError: If the requested attribute does not exist in the model. + + Examples: + >>> model = YOLO("yolo11n.pt") + >>> print(model.stride) + >>> print(model.task) + """ + return self._modules["model"] if name == "model" else getattr(self.model, name) diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index 94ec86476a5..afe301faf34 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -1,9 +1,9 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc. Usage - sources: - $ yolo mode=predict model=yolov8n.pt source=0 # webcam + $ yolo mode=predict model=yolo11n.pt source=0 # webcam img.jpg # image vid.mp4 # video screen # screenshot @@ -15,18 +15,21 @@ 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP stream Usage - formats: - $ yolo mode=predict model=yolov8n.pt # PyTorch - yolov8n.torchscript # TorchScript - yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True - yolov8n_openvino_model # OpenVINO - yolov8n.engine # TensorRT - yolov8n.mlpackage # CoreML (macOS-only) - yolov8n_saved_model # TensorFlow SavedModel - yolov8n.pb # TensorFlow GraphDef - yolov8n.tflite # TensorFlow Lite - yolov8n_edgetpu.tflite # TensorFlow Edge TPU - yolov8n_paddle_model # PaddlePaddle - yolov8n_ncnn_model # NCNN + $ yolo mode=predict model=yolo11n.pt # PyTorch + yolo11n.torchscript # TorchScript + yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolo11n_openvino_model # OpenVINO + yolo11n.engine # TensorRT + yolo11n.mlpackage # CoreML (macOS-only) + yolo11n_saved_model # TensorFlow SavedModel + yolo11n.pb # TensorFlow GraphDef + yolo11n.tflite # TensorFlow Lite + yolo11n_edgetpu.tflite # TensorFlow Edge TPU + yolo11n_paddle_model # PaddlePaddle + yolo11n.mnn # MNN + yolo11n_ncnn_model # NCNN + yolo11n_imx_model # Sony IMX + yolo11n_rknn_model # Rockchip RKNN """ import platform @@ -153,7 +156,11 @@ def pre_transform(self, im): (list): A list of transformed images. """ same_shapes = len({x.shape for x in im}) == 1 - letterbox = LetterBox(self.imgsz, auto=self.args.rect and same_shapes and self.model.pt, stride=self.model.stride) + letterbox = LetterBox( + self.imgsz, + auto=self.args.rect and same_shapes and (self.model.pt or (getattr(self.model, "dynamic", False) and not self.model.imx)), + stride=self.model.stride, + ) return [letterbox(image=x) for x in im] def postprocess(self, preds, img, orig_imgs): @@ -365,7 +372,7 @@ def save_predicted_images(self, save_path="", frame=0): # Save videos and streams if self.dataset.mode in {"stream", "video"}: fps = self.dataset.fps if self.dataset.mode == "video" else 30 - frames_path = f'{save_path.split(".", 1)[0]}_frames/' + frames_path = f"{save_path.split('.', 1)[0]}_frames/" if save_path not in self.vid_writer: # new video if self.args.save_frames: Path(frames_path).mkdir(parents=True, exist_ok=True) @@ -384,7 +391,7 @@ def save_predicted_images(self, save_path="", frame=0): # Save images else: - cv2.imwrite(save_path, im) + cv2.imwrite(str(Path(save_path).with_suffix(".jpg")), im) # save to JPG for best support def show(self, p=""): """Display an image in a window using the OpenCV imshow function.""" diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py index 737711be15d..5c0ea442b95 100644 --- a/ultralytics/engine/results.py +++ b/ultralytics/engine/results.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Ultralytics Results, Boxes and Masks classes for handling inference results. @@ -307,7 +307,7 @@ def __len__(self): if v is not None: return len(v) - def update(self, boxes=None, masks=None, probs=None, obb=None, regress=None): + def update(self, boxes=None, masks=None, probs=None, obb=None, keypoints=None, regress=None): """ Updates the Results object with new detection data. @@ -320,6 +320,7 @@ def update(self, boxes=None, masks=None, probs=None, obb=None, regress=None): masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks. probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities. obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates. + keypoints (torch.Tensor | None): A tensor of shape (N, 17, 3) containing keypoints. Examples: >>> results = model("image.jpg") @@ -336,6 +337,8 @@ def update(self, boxes=None, masks=None, probs=None, obb=None, regress=None): self.obb = OBB(obb, self.orig_shape) if regress is not None: self.regress = regress + if keypoints is not None: + self.keypoints = Keypoints(keypoints, self.orig_shape) def _apply(self, fn, *args, **kwargs): """ @@ -496,8 +499,8 @@ def plot( Examples: >>> results = model("image.jpg") >>> for result in results: - ... im = result.plot() - ... im.show() + >>> im = result.plot() + >>> im.show() """ assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}." if img is None and isinstance(self.orig_img, torch.Tensor): @@ -541,9 +544,9 @@ def plot( # Plot Detect results if pred_boxes is not None and show_boxes: for i, d in enumerate(reversed(pred_boxes)): - c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item()) + c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item()) name = ("" if id is None else f"id:{id} ") + names[c] - label = (f"{name} {conf:.2f}" if conf else name) if labels else None + label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze() annotator.box_label( box, @@ -609,7 +612,7 @@ def show(self, *args, **kwargs): >>> results = model("path/to/image.jpg") >>> results[0].show() # Display the first result >>> for result in results: - ... result.show() # Display all results + >>> result.show() # Display all results """ self.plot(show=True, *args, **kwargs) @@ -629,10 +632,10 @@ def save(self, filename=None, *args, **kwargs): Examples: >>> results = model("path/to/image.jpg") >>> for result in results: - ... result.save("annotated_image.jpg") + >>> result.save("annotated_image.jpg") >>> # Or with custom plot arguments >>> for result in results: - ... result.save("annotated_image.jpg", conf=False, line_width=2) + >>> result.save("annotated_image.jpg", conf=False, line_width=2) """ if not filename: filename = f"results_{Path(self.path).name}" @@ -653,7 +656,7 @@ def verbose(self): Examples: >>> results = model("path/to/image.jpg") >>> for result in results: - ... print(result.verbose()) + >>> print(result.verbose()) 2 persons, 1 car, 3 traffic lights, dog 0.92, cat 0.78, horse 0.64, @@ -664,15 +667,13 @@ def verbose(self): """ log_string = "" probs = self.probs - boxes = self.boxes - regress = self.regress if len(self) == 0: return log_string if probs is not None else f"{log_string}(no detections), " if probs is not None: log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, " - if regress is not None: + if regress := self.regress: log_string += f"{', '.join(f'{self.names[j]} {regress.data[j]:.2f}' for j in regress.value)}, " - if boxes: + if boxes := self.boxes: for c in boxes.cls.unique(): n = (boxes.cls == c).sum() # detections per class log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " @@ -691,10 +692,10 @@ def save_txt(self, txt_file, save_conf=False): Examples: >>> from ultralytics import YOLO - >>> model = YOLO("yolov8n.pt") + >>> model = YOLO("yolo11n.pt") >>> results = model("path/to/image.jpg") >>> for result in results: - ... result.save_txt("output.txt") + >>> result.save_txt("output.txt") Notes: - The file will contain one line per detection or classification with the following structure: @@ -757,7 +758,7 @@ def save_crop(self, save_dir, file_name=Path("im.jpg")): Examples: >>> results = model("path/to/image.jpg") >>> for result in results: - ... result.save_crop(save_dir="path/to/crops", file_name="detection") + >>> result.save_crop(save_dir="path/to/crops", file_name="detection") """ if self.probs is not None: LOGGER.warning("WARNING โš ๏ธ Classify task do not support `save_crop`.") @@ -772,7 +773,7 @@ def save_crop(self, save_dir, file_name=Path("im.jpg")): save_one_box( d.xyxy, self.orig_img.copy(), - file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg", + file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"), BGR=True, ) @@ -796,8 +797,9 @@ def summary(self, normalize=False, decimals=5): Examples: >>> results = model("image.jpg") - >>> summary = results[0].summary() - >>> print(summary) + >>> for result in results: + >>> summary = result.summary() + >>> print(summary) """ # Create list of detection dictionaries results = [] @@ -859,10 +861,11 @@ def to_df(self, normalize=False, decimals=5): Examples: >>> results = model("path/to/image.jpg") - >>> df_result = results[0].to_df() - >>> print(df_result) + >>> for result in results: + >>> df_result = result.to_df() + >>> print(df_result) """ - import pandas as pd + import pandas as pd # scope for faster 'import ultralytics' return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals)) @@ -887,8 +890,9 @@ def to_csv(self, normalize=False, decimals=5, *args, **kwargs): Examples: >>> results = model("path/to/image.jpg") - >>> csv_result = results[0].to_csv() - >>> print(csv_result) + >>> for result in results: + >>> csv_result = result.to_csv() + >>> print(csv_result) """ return self.to_df(normalize=normalize, decimals=decimals).to_csv(*args, **kwargs) @@ -912,8 +916,9 @@ def to_xml(self, normalize=False, decimals=5, *args, **kwargs): Examples: >>> results = model("path/to/image.jpg") - >>> xml_result = results[0].to_xml() - >>> print(xml_result) + >>> for result in results: + >>> xml_result = result.to_xml() + >>> print(xml_result) """ check_requirements("lxml") df = self.to_df(normalize=normalize, decimals=decimals) @@ -942,8 +947,9 @@ def to_json(self, normalize=False, decimals=5): Examples: >>> results = model("path/to/image.jpg") - >>> json_result = results[0].to_json() - >>> print(json_result) + >>> for result in results: + >>> json_result = result.to_json() + >>> print(json_result) Notes: - For classification tasks, the JSON will contain class probabilities instead of bounding boxes. @@ -957,6 +963,75 @@ def to_json(self, normalize=False, decimals=5): return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2) + def to_sql(self, table_name="results", normalize=False, decimals=5, db_path="results.db"): + """ + Converts detection results to an SQL-compatible format. + + This method serializes the detection results into a format compatible with SQL databases. + It includes information about detected objects such as bounding boxes, class names, confidence scores, + and optionally segmentation masks, keypoints or oriented bounding boxes. + + Args: + table_name (str): Name of the SQL table where the data will be inserted. Defaults to "detection_results". + normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions. + If True, coordinates will be returned as float values between 0 and 1. Defaults to False. + decimals (int): Number of decimal places to round the bounding boxes values to. Defaults to 5. + db_path (str): Path to the SQLite database file. Defaults to "results.db". + + Examples: + >>> results = model("path/to/image.jpg") + >>> for result in results: + >>> result.to_sql() + """ + import json + import sqlite3 + + # Convert results to a list of dictionaries + data = self.summary(normalize=normalize, decimals=decimals) + if not data: + LOGGER.warning("โš ๏ธ No results to save to SQL. Results dict is empty") + return + + # Connect to the SQLite database + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Create table if it doesn't exist + columns = ( + "id INTEGER PRIMARY KEY AUTOINCREMENT, class_name TEXT, confidence REAL, " + "box TEXT, masks TEXT, kpts TEXT, obb TEXT" + ) + cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({columns})") + + # Insert data into the table + for i, item in enumerate(data): + detect, obb = None, None # necessary to reinit these variables inside for loop to avoid duplication + class_name = item.get("name") + box = item.get("box", {}) + # Serialize the box as JSON for 'detect' and 'obb' based on key presence + if all(key in box for key in ["x1", "y1", "x2", "y2"]) and not any(key in box for key in ["x3", "x4"]): + detect = json.dumps(box) + if all(key in box for key in ["x1", "y1", "x2", "y2", "x3", "x4"]): + obb = json.dumps(box) + + cursor.execute( + f"INSERT INTO {table_name} (class_name, confidence, box, masks, kpts, obb) VALUES (?, ?, ?, ?, ?, ?)", + ( + class_name, + item.get("confidence"), + detect, + json.dumps(item.get("segments", {}).get("x", [])), + json.dumps(item.get("keypoints", {}).get("x", [])), + obb, + ), + ) + + # Commit and close the connection + conn.commit() + conn.close() + + LOGGER.info(f"โœ… Detection results successfully written to SQL table '{table_name}' in database '{db_path}'.") + class Boxes(BaseTensor): """ @@ -1741,7 +1816,7 @@ def xyxy(self): Examples: >>> import torch >>> from ultralytics import YOLO - >>> model = YOLO("yolov8n-obb.pt") + >>> model = YOLO("yolo11n-obb.pt") >>> results = model("path/to/image.jpg") >>> for result in results: ... obb = result.obb diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 51611f7c65e..c47409118c9 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -1,9 +1,9 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Train a model on a dataset. Usage: - $ yolo mode=train model=yolov8n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16 + $ yolo mode=train model=yolo11n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16 """ import gc @@ -12,7 +12,7 @@ import subprocess import time import warnings -from copy import deepcopy +from copy import copy, deepcopy from datetime import datetime, timedelta from pathlib import Path @@ -119,7 +119,7 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): self.save_period = self.args.save_period self.batch_size = self.args.batch - self.epochs = self.args.epochs + self.epochs = self.args.epochs or 100 # in case users accidentally pass epochs=None with timed training self.start_epoch = 0 if RANK == -1: print_args(vars(self.args)) @@ -129,7 +129,7 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading # Model and Dataset - self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolov8n -> yolov8n.pt + self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolo11n -> yolo11n.pt with torch_distributed_zero_first(LOCAL_RANK): # avoid auto-downloading dataset multiple times self.trainset, self.testset = self.get_dataset() self.ema = None @@ -197,7 +197,7 @@ def train(self): # Command cmd, file = generate_ddp_command(world_size, self) try: - LOGGER.info(f'{colorstr("DDP:")} debug command {" ".join(cmd)}') + LOGGER.info(f"{colorstr('DDP:')} debug command {' '.join(cmd)}") subprocess.run(cmd, check=True) except Exception as e: raise e @@ -280,12 +280,7 @@ def _setup_train(self, world_size): # Batch size if self.batch_size < 1 and RANK == -1: # single-GPU only, estimate best batch size - self.args.batch = self.batch_size = check_train_batch_size( - model=self.model, - imgsz=self.args.imgsz, - amp=self.amp, - batch=self.batch_size, - ) + self.args.batch = self.batch_size = self.auto_batch() # Dataloaders batch_size = self.batch_size // max(world_size, 1) @@ -335,10 +330,10 @@ def _do_train(self, world_size=1): self.train_time_start = time.time() self.run_callbacks("on_train_start") LOGGER.info( - f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n' - f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n' + f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n" + f"Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n" f"Logging results to {colorstr('bold', self.save_dir)}\n" - f'Starting training for ' + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...") + f"Starting training for " + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...") ) if self.args.close_mosaic: base_idx = (self.epochs - self.args.close_mosaic) * nb @@ -470,10 +465,8 @@ def _do_train(self, world_size=1): if RANK in {-1, 0}: # Do final val with best.pt - LOGGER.info( - f"\n{epoch - self.start_epoch + 1} epochs completed in " - f"{(time.time() - self.train_time_start) / 3600:.3f} hours." - ) + seconds = time.time() - self.train_time_start + LOGGER.info(f"\n{epoch - self.start_epoch + 1} epochs completed in {seconds / 3600:.3f} hours.") self.final_eval() if self.args.plots: self.plot_metrics() @@ -481,6 +474,16 @@ def _do_train(self, world_size=1): self._clear_memory() self.run_callbacks("teardown") + def auto_batch(self, max_num_obj=0): + """Get batch size by calculating memory occupation of model.""" + return check_train_batch_size( + model=self.model, + imgsz=self.args.imgsz, + amp=self.amp, + batch=self.batch_size, + max_num_obj=max_num_obj, + ) # returns batch size + def _get_memory(self): """Get accelerator memory utilization in GB.""" if self.device.type == "mps": @@ -505,7 +508,7 @@ def read_results_csv(self): """Read results.csv into a dict using pandas.""" import pandas as pd # scope for faster 'import ultralytics' - return {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()} + return pd.read_csv(self.csv).to_dict(orient="list") def save_model(self): """Save model training checkpoints with additional metadata.""" @@ -657,10 +660,11 @@ def plot_training_labels(self): def save_metrics(self, metrics): """Saves training metrics to a CSV file.""" keys, vals = list(metrics.keys()), list(metrics.values()) - n = len(metrics) + 1 # number of cols - s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n") # header + n = len(metrics) + 2 # number of cols + s = "" if self.csv.exists() else (("%s," * n % tuple(["epoch", "time"] + keys)).rstrip(",") + "\n") # header + t = time.time() - self.train_time_start with open(self.csv, "a") as f: - f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n") + f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n") def plot_metrics(self): """Plot and display metrics visually.""" @@ -752,7 +756,7 @@ def _close_dataloader_mosaic(self): self.train_loader.dataset.mosaic = False if hasattr(self.train_loader.dataset, "close_mosaic"): LOGGER.info("Closing dataloader mosaic") - self.train_loader.dataset.close_mosaic(hyp=self.args) + self.train_loader.dataset.close_mosaic(hyp=copy(self.args)) def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5): """ @@ -780,7 +784,7 @@ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5 f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and " f"determining best 'optimizer', 'lr0' and 'momentum' automatically... " ) - nc = getattr(model, "nc", 10) # number of classes + nc = self.data.get("nc", 10) # number of classes lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9) self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam @@ -795,6 +799,8 @@ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5 else: # weight (with decay) g[0].append(param) + optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"} + name = {x.lower(): x for x in optimizers}.get(name.lower()) if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}: optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0) elif name == "RMSProp": @@ -803,15 +809,14 @@ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5 optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True) else: raise NotImplementedError( - f"Optimizer '{name}' not found in list of available optimizers " - f"[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto]." - "To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics." + f"Optimizer '{name}' not found in list of available optimizers {optimizers}. " + "Request support for addition optimizers at https://github.com/ultralytics/ultralytics." ) optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights) LOGGER.info( f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups " - f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)' + f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)" ) return optimizer diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py index 2f42eb603df..6cc5eb273a2 100644 --- a/ultralytics/engine/tuner.py +++ b/ultralytics/engine/tuner.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection, instance segmentation, image classification, pose estimation, and multi-object tracking. @@ -8,11 +8,11 @@ where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency. Example: - Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. + Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False) ``` """ @@ -50,11 +50,11 @@ class Tuner: Executes the hyperparameter evolution across multiple iterations. Example: - Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. + Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False) ``` @@ -62,7 +62,7 @@ class Tuner: ```python from ultralytics import YOLO - model = YOLO("yolov8n.pt") + model = YOLO("yolo11n.pt") model.tune(space={key1: val1, key2: val2}) # custom search space dictionary ``` """ @@ -101,7 +101,8 @@ def __init__(self, args=DEFAULT_CFG, _callbacks=None): "copy_paste": (0.0, 1.0), # segment copy-paste (probability) } self.args = get_cfg(overrides=args) - self.tune_dir = get_save_dir(self.args, name="tune") + self.tune_dir = get_save_dir(self.args, name=self.args.name or "tune") + self.args.name = None # reset to not affect training directory self.tune_csv = self.tune_dir / "tune_results.csv" self.callbacks = _callbacks or callbacks.get_default_callbacks() self.prefix = colorstr("Tuner: ") @@ -140,7 +141,7 @@ def _mutate(self, parent="single", n=5, mutation=0.8, sigma=0.2): # Mutate r = np.random # method r.seed(int(time.time())) - g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()]) # gains 0-1 + g = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1 ng = len(self.space) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) @@ -191,7 +192,7 @@ def __call__(self, model=None, iterations=10, cleanup=True): try: # Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang) cmd = ["yolo", "train", *(f"{k}={v}" for k, v in train_args.items())] - return_code = subprocess.run(cmd, check=True).returncode + return_code = subprocess.run(" ".join(cmd), check=True, shell=True).returncode ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt") metrics = torch.load(ckpt_file)["train_metrics"] assert return_code == 0, "training failed" @@ -224,12 +225,12 @@ def __call__(self, model=None, iterations=10, cleanup=True): # Save and print tune results header = ( - f'{self.prefix}{i + 1}/{iterations} iterations complete โœ… ({time.time() - t0:.2f}s)\n' - f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n' - f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n' - f'{self.prefix}Best fitness metrics are {best_metrics}\n' - f'{self.prefix}Best fitness model is {best_save_dir}\n' - f'{self.prefix}Best fitness hyperparameters are printed below.\n' + f"{self.prefix}{i + 1}/{iterations} iterations complete โœ… ({time.time() - t0:.2f}s)\n" + f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n" + f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n" + f"{self.prefix}Best fitness metrics are {best_metrics}\n" + f"{self.prefix}Best fitness model is {best_save_dir}\n" + f"{self.prefix}Best fitness hyperparameters are printed below.\n" ) LOGGER.info("\n" + header) data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())} diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 6221f15b090..dad8b49d238 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -1,23 +1,26 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Check a model's accuracy on a test or val split of a dataset. Usage: - $ yolo mode=val model=yolov8n.pt data=coco8.yaml imgsz=640 + $ yolo mode=val model=yolo11n.pt data=coco8.yaml imgsz=640 Usage - formats: - $ yolo mode=val model=yolov8n.pt # PyTorch - yolov8n.torchscript # TorchScript - yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True - yolov8n_openvino_model # OpenVINO - yolov8n.engine # TensorRT - yolov8n.mlpackage # CoreML (macOS-only) - yolov8n_saved_model # TensorFlow SavedModel - yolov8n.pb # TensorFlow GraphDef - yolov8n.tflite # TensorFlow Lite - yolov8n_edgetpu.tflite # TensorFlow Edge TPU - yolov8n_paddle_model # PaddlePaddle - yolov8n_ncnn_model # NCNN + $ yolo mode=val model=yolo11n.pt # PyTorch + yolo11n.torchscript # TorchScript + yolo11n.onnx # ONNX Runtime or OpenCV DNN with dnn=True + yolo11n_openvino_model # OpenVINO + yolo11n.engine # TensorRT + yolo11n.mlpackage # CoreML (macOS-only) + yolo11n_saved_model # TensorFlow SavedModel + yolo11n.pb # TensorFlow GraphDef + yolo11n.tflite # TensorFlow Lite + yolo11n_edgetpu.tflite # TensorFlow Edge TPU + yolo11n_paddle_model # PaddlePaddle + yolo11n.mnn # MNN + yolo11n_ncnn_model # NCNN + yolo11n_imx_model # Sony IMX + yolo11n_rknn_model # Rockchip RKNN """ import json @@ -121,6 +124,8 @@ def __call__(self, trainer=None, model=None): self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1) model.eval() else: + if str(self.args.model).endswith(".yaml") and model is None: + LOGGER.warning("WARNING โš ๏ธ validating an untrained model YAML will result in 0 mAP.") callbacks.add_integration_callbacks(self) model = AutoBackend( weights=model or self.args.model, @@ -246,7 +251,7 @@ def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False): cost_matrix = iou * (iou >= threshold) if cost_matrix.any(): - labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True) + labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix) valid = cost_matrix[labels_idx, detections_idx] > 0 if valid.any(): correct[detections_idx[valid], i] = True diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py index 9c9c9dfa16a..74c0dfeda7b 100644 --- a/ultralytics/hub/__init__.py +++ b/ultralytics/hub/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import requests @@ -63,13 +63,13 @@ def login(api_key: str = None, save=True) -> bool: return True else: # Failed to authenticate with HUB - LOGGER.info(f"{PREFIX}Get API key from {api_key_url} and then run 'yolo hub login API_KEY'") + LOGGER.info(f"{PREFIX}Get API key from {api_key_url} and then run 'yolo login API_KEY'") return False def logout(): """ - Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo hub login'. + Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo login'. Example: ```python @@ -79,7 +79,7 @@ def logout(): ``` """ SETTINGS["api_key"] = "" - LOGGER.info(f"{PREFIX}logged out โœ…. To log in again, use 'yolo hub login'.") + LOGGER.info(f"{PREFIX}logged out โœ…. To log in again, use 'yolo login'.") def reset_model(model_id=""): diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py index 3c7c6d3d25d..2e62739f31c 100644 --- a/ultralytics/hub/auth.py +++ b/ultralytics/hub/auth.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import requests @@ -68,7 +68,7 @@ def __init__(self, api_key="", verbose=False): if verbose: LOGGER.info(f"{PREFIX}New authentication successful โœ…") elif verbose: - LOGGER.info(f"{PREFIX}Get API key from {API_KEY_URL} and then run 'yolo hub login API_KEY'") + LOGGER.info(f"{PREFIX}Get API key from {API_KEY_URL} and then run 'yolo login API_KEY'") def request_api_key(self, max_attempts=3): """ diff --git a/ultralytics/hub/google/__init__.py b/ultralytics/hub/google/__init__.py index 9090297a713..0acd2dd26da 100644 --- a/ultralytics/hub/google/__init__.py +++ b/ultralytics/hub/google/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import concurrent.futures import statistics diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py index 89b5ddfc1e9..37fba131359 100644 --- a/ultralytics/hub/session.py +++ b/ultralytics/hub/session.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import shutil import threading diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py index 2fc956fb348..5f837588947 100644 --- a/ultralytics/hub/utils.py +++ b/ultralytics/hub/utils.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import os import platform @@ -170,7 +170,7 @@ def func(func_method, func_url, **func_kwargs): class Events: """ A class for collecting anonymous event analytics. Event analytics are enabled when sync=True in settings and - disabled when sync=False. Run 'yolo settings' to see and update settings YAML file. + disabled when sync=False. Run 'yolo settings' to see and update settings. Attributes: url (str): The URL to send anonymous events. diff --git a/ultralytics/models/__init__.py b/ultralytics/models/__init__.py index aff620a9a92..ead1e923041 100644 --- a/ultralytics/models/__init__.py +++ b/ultralytics/models/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .fastsam import FastSAM from .nas import NAS diff --git a/ultralytics/models/fastsam/__init__.py b/ultralytics/models/fastsam/__init__.py index 7be2ba1edfa..8c224ac8f9e 100644 --- a/ultralytics/models/fastsam/__init__.py +++ b/ultralytics/models/fastsam/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .model import FastSAM from .predict import FastSAMPredictor diff --git a/ultralytics/models/fastsam/model.py b/ultralytics/models/fastsam/model.py index 4e3f44f8378..f9deb7a12b9 100644 --- a/ultralytics/models/fastsam/model.py +++ b/ultralytics/models/fastsam/model.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from pathlib import Path diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py index 9910237b0f3..0d019afb9e2 100644 --- a/ultralytics/models/fastsam/predict.py +++ b/ultralytics/models/fastsam/predict.py @@ -1,4 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + import torch from PIL import Image @@ -64,6 +65,9 @@ def prompt(self, results, bboxes=None, points=None, labels=None, texts=None): if not isinstance(results, list): results = [results] for result in results: + if len(result) == 0: + prompt_results.append(result) + continue masks = result.masks.data if masks.shape[1:] != result.orig_shape: masks = scale_masks(masks[None], result.orig_shape)[0] @@ -84,9 +88,9 @@ def prompt(self, results, bboxes=None, points=None, labels=None, texts=None): if labels is None: labels = torch.ones(points.shape[0]) labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device) - assert len(labels) == len( - points - ), f"Excepted `labels` got same size as `point`, but got {len(labels)} and {len(points)}" + assert len(labels) == len(points), ( + f"Excepted `labels` got same size as `point`, but got {len(labels)} and {len(points)}" + ) point_idx = ( torch.ones(len(result), dtype=torch.bool, device=self.device) if labels.sum() == 0 # all negative points diff --git a/ultralytics/models/fastsam/utils.py b/ultralytics/models/fastsam/utils.py index 0f41b1ad4cb..a8e1aa172ba 100644 --- a/ultralytics/models/fastsam/utils.py +++ b/ultralytics/models/fastsam/utils.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20): diff --git a/ultralytics/models/fastsam/val.py b/ultralytics/models/fastsam/val.py index 9014b27a767..aa130dbfc9a 100644 --- a/ultralytics/models/fastsam/val.py +++ b/ultralytics/models/fastsam/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.models.yolo.segment import SegmentationValidator from ultralytics.utils.metrics import SegmentMetrics diff --git a/ultralytics/models/nas/__init__.py b/ultralytics/models/nas/__init__.py index b095a050bad..c36c0a42f03 100644 --- a/ultralytics/models/nas/__init__.py +++ b/ultralytics/models/nas/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .model import NAS from .predict import NASPredictor diff --git a/ultralytics/models/nas/model.py b/ultralytics/models/nas/model.py index bc0ccc5942d..10fd72b4e46 100644 --- a/ultralytics/models/nas/model.py +++ b/ultralytics/models/nas/model.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ YOLO-NAS model interface. diff --git a/ultralytics/models/nas/predict.py b/ultralytics/models/nas/predict.py index 1978f395bc4..e140900e7ba 100644 --- a/ultralytics/models/nas/predict.py +++ b/ultralytics/models/nas/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch diff --git a/ultralytics/models/nas/val.py b/ultralytics/models/nas/val.py index c4fb73b699b..ca01e94e002 100644 --- a/ultralytics/models/nas/val.py +++ b/ultralytics/models/nas/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch @@ -38,13 +38,7 @@ def postprocess(self, preds_in): """Apply Non-maximum suppression to prediction outputs.""" boxes = ops.xyxy2xywh(preds_in[0][0]) preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1) - return ops.non_max_suppression( + return super().postprocess( preds, - self.args.conf, - self.args.iou, - labels=self.lb, - multi_label=False, - agnostic=self.args.single_cls or self.args.agnostic_nms, - max_det=self.args.max_det, max_time_img=0.5, ) diff --git a/ultralytics/models/rtdetr/__init__.py b/ultralytics/models/rtdetr/__init__.py index 172c74b45cc..a6d038d652c 100644 --- a/ultralytics/models/rtdetr/__init__.py +++ b/ultralytics/models/rtdetr/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .model import RTDETR from .predict import RTDETRPredictor diff --git a/ultralytics/models/rtdetr/model.py b/ultralytics/models/rtdetr/model.py index 440df1798fe..fa4123a8a24 100644 --- a/ultralytics/models/rtdetr/model.py +++ b/ultralytics/models/rtdetr/model.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py index 492254356d9..782cc2f640d 100644 --- a/ultralytics/models/rtdetr/predict.py +++ b/ultralytics/models/rtdetr/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py index cb11a727948..2fc30f9f39a 100644 --- a/ultralytics/models/rtdetr/train.py +++ b/ultralytics/models/rtdetr/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from copy import copy @@ -68,8 +68,11 @@ def build_dataset(self, img_path, mode="val", batch=None): hyp=self.args, rect=False, cache=self.args.cache or None, + single_cls=self.args.single_cls or False, prefix=colorstr(f"{mode}: "), + classes=self.args.classes, data=self.data, + fraction=self.args.fraction if mode == "train" else 1.0, ) def get_validator(self): diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py index 9122750c27b..761d20dfb26 100644 --- a/ultralytics/models/rtdetr/val.py +++ b/ultralytics/models/rtdetr/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch diff --git a/ultralytics/models/sam/__init__.py b/ultralytics/models/sam/__init__.py index a29f5cb3f3c..2d9de7b64ea 100644 --- a/ultralytics/models/sam/__init__.py +++ b/ultralytics/models/sam/__init__.py @@ -1,6 +1,6 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .model import SAM -from .predict import Predictor, SAM2Predictor +from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor -__all__ = "SAM", "Predictor", "SAM2Predictor" # tuple or list +__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor" # tuple or list diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py index 55db3e011cb..4abce4cd7db 100644 --- a/ultralytics/models/sam/amg.py +++ b/ultralytics/models/sam/amg.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import math from itertools import product @@ -76,7 +76,7 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: def generate_crop_boxes( im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float ) -> Tuple[List[List[int]], List[int]]: - """Generates crop boxes of varying sizes for multi-scale image processing, with layered overlapping regions.""" + """Generates crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.""" crop_boxes, layer_idxs = [], [] im_h, im_w = im_size short_side = min(im_h, im_w) diff --git a/ultralytics/models/sam/build.py b/ultralytics/models/sam/build.py index 0e7ddedcf0f..47c9d5a345b 100644 --- a/ultralytics/models/sam/build.py +++ b/ultralytics/models/sam/build.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -210,8 +210,6 @@ def _build_sam( state_dict = torch.load(f) sam.load_state_dict(state_dict) sam.eval() - # sam.load_state_dict(torch.load(checkpoint), strict=True) - # sam.eval() return sam @@ -265,6 +263,7 @@ def _build_sam2( memory_attention = MemoryAttention(d_model=256, pos_enc_at_input=True, num_layers=4, layer=MemoryAttentionLayer()) memory_encoder = MemoryEncoder(out_dim=64) + is_sam2_1 = checkpoint is not None and "sam2.1" in checkpoint sam2 = SAM2Model( image_encoder=image_encoder, memory_attention=memory_attention, @@ -290,6 +289,9 @@ def _build_sam2( multimask_max_pt_num=1, use_mlp_for_obj_ptr_proj=True, compile_image_encoder=False, + no_obj_embed_spatial=is_sam2_1, + proj_tpos_enc_in_obj_ptrs=is_sam2_1, + use_signed_tpos_enc_to_obj_ptrs=is_sam2_1, sam_mask_decoder_extra_args=dict( dynamic_multimask_via_stability=True, dynamic_multimask_stability_delta=0.05, @@ -315,6 +317,10 @@ def _build_sam2( "sam2_s.pt": build_sam2_s, "sam2_b.pt": build_sam2_b, "sam2_l.pt": build_sam2_l, + "sam2.1_t.pt": build_sam2_t, + "sam2.1_s.pt": build_sam2_s, + "sam2.1_b.pt": build_sam2_b, + "sam2.1_l.pt": build_sam2_l, } diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py index e685dc4e4fd..d9fb501b795 100644 --- a/ultralytics/models/sam/model.py +++ b/ultralytics/models/sam/model.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ SAM model interface. @@ -148,7 +148,7 @@ def info(self, detailed=False, verbose=True): verbose (bool): If True, prints the information to the console. Returns: - (Tuple): A tuple containing the model's information (string representations of the model). + (tuple): A tuple containing the model's information (string representations of the model). Examples: >>> sam = SAM("sam_b.pt") diff --git a/ultralytics/models/sam/modules/__init__.py b/ultralytics/models/sam/modules/__init__.py index 9e68dc12245..77a19dcf0f8 100644 --- a/ultralytics/models/sam/modules/__init__.py +++ b/ultralytics/models/sam/modules/__init__.py @@ -1 +1 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license diff --git a/ultralytics/models/sam/modules/blocks.py b/ultralytics/models/sam/modules/blocks.py index 026443c69fa..9abcc4406e2 100644 --- a/ultralytics/models/sam/modules/blocks.py +++ b/ultralytics/models/sam/modules/blocks.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import copy import math @@ -502,11 +502,11 @@ def do_pool(x: torch.Tensor, pool: nn.Module, norm: nn.Module = None) -> torch.T class MultiScaleAttention(nn.Module): """ - Implements multi-scale self-attention with optional query pooling for efficient feature extraction. + Implements multiscale self-attention with optional query pooling for efficient feature extraction. - This class provides a flexible implementation of multi-scale attention, allowing for optional + This class provides a flexible implementation of multiscale attention, allowing for optional downsampling of query features through pooling. It's designed to enhance the model's ability to - capture multi-scale information in visual tasks. + capture multiscale information in visual tasks. Attributes: dim (int): Input dimension of the feature map. @@ -518,7 +518,7 @@ class MultiScaleAttention(nn.Module): proj (nn.Linear): Output projection. Methods: - forward: Applies multi-scale attention to the input tensor. + forward: Applies multiscale attention to the input tensor. Examples: >>> import torch @@ -537,7 +537,7 @@ def __init__( num_heads: int, q_pool: nn.Module = None, ): - """Initializes multi-scale attention with optional query pooling for efficient feature extraction.""" + """Initializes multiscale attention with optional query pooling for efficient feature extraction.""" super().__init__() self.dim = dim @@ -552,7 +552,7 @@ def __init__( self.proj = nn.Linear(dim_out, dim_out) def forward(self, x: torch.Tensor) -> torch.Tensor: - """Applies multi-scale attention with optional query pooling to extract multi-scale features.""" + """Applies multiscale attention with optional query pooling to extract multiscale features.""" B, H, W, _ = x.shape # qkv with shape (B, H * W, 3, nHead, C) qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1) @@ -582,9 +582,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class MultiScaleBlock(nn.Module): """ - A multi-scale attention block with window partitioning and query pooling for efficient vision transformers. + A multiscale attention block with window partitioning and query pooling for efficient vision transformers. - This class implements a multi-scale attention mechanism with optional window partitioning and downsampling, + This class implements a multiscale attention mechanism with optional window partitioning and downsampling, designed for use in vision transformer architectures. Attributes: @@ -601,7 +601,7 @@ class MultiScaleBlock(nn.Module): proj (nn.Linear | None): Projection layer for dimension mismatch. Methods: - forward: Processes input tensor through the multi-scale block. + forward: Processes input tensor through the multiscale block. Examples: >>> block = MultiScaleBlock(dim=256, dim_out=512, num_heads=8, window_size=7) @@ -623,7 +623,7 @@ def __init__( act_layer: nn.Module = nn.GELU, window_size: int = 0, ): - """Initializes a multi-scale attention block with window partitioning and optional query pooling.""" + """Initializes a multiscale attention block with window partitioning and optional query pooling.""" super().__init__() if isinstance(norm_layer, str): @@ -660,7 +660,7 @@ def __init__( self.proj = nn.Linear(dim, dim_out) def forward(self, x: torch.Tensor) -> torch.Tensor: - """Processes input through multi-scale attention and MLP, with optional windowing and downsampling.""" + """Processes input through multiscale attention and MLP, with optional windowing and downsampling.""" shortcut = x # B, H, W, C x = self.norm1(x) diff --git a/ultralytics/models/sam/modules/decoders.py b/ultralytics/models/sam/modules/decoders.py index 7c27ca176b5..ee9497f6c67 100644 --- a/ultralytics/models/sam/modules/decoders.py +++ b/ultralytics/models/sam/modules/decoders.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from typing import List, Optional, Tuple, Type diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py index 7fa7b405d33..a6e9fae887a 100644 --- a/ultralytics/models/sam/modules/encoders.py +++ b/ultralytics/models/sam/modules/encoders.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from typing import List, Optional, Tuple, Type @@ -479,9 +479,9 @@ def __init__( self.trunk = trunk self.neck = neck self.scalp = scalp - assert ( - self.trunk.channel_list == self.neck.backbone_channel_list - ), f"Channel dims of trunk {self.trunk.channel_list} and neck {self.neck.backbone_channel_list} do not match." + assert self.trunk.channel_list == self.neck.backbone_channel_list, ( + f"Channel dims of trunk {self.trunk.channel_list} and neck {self.neck.backbone_channel_list} do not match." + ) def forward(self, sample: torch.Tensor): """Encodes input through patch embedding, positional embedding, transformer blocks, and neck module.""" diff --git a/ultralytics/models/sam/modules/memory_attention.py b/ultralytics/models/sam/modules/memory_attention.py index b55b0730274..14998f37a92 100644 --- a/ultralytics/models/sam/modules/memory_attention.py +++ b/ultralytics/models/sam/modules/memory_attention.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import copy from typing import Optional diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py index 2728b0b4818..8f5c5b77466 100644 --- a/ultralytics/models/sam/modules/sam.py +++ b/ultralytics/models/sam/modules/sam.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -36,8 +36,6 @@ class SAMModel(nn.Module): image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings. prompt_encoder (PromptEncoder): Encoder for various types of input prompts. mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings. - pixel_mean (torch.Tensor): Mean pixel values for image normalization, shape (3, 1, 1). - pixel_std (torch.Tensor): Standard deviation values for image normalization, shape (3, 1, 1). Methods: __init__: Initializes the SAMModel with encoders, decoder, and normalization parameters. @@ -161,18 +159,19 @@ def __init__( use_multimask_token_for_obj_ptr: bool = False, iou_prediction_use_sigmoid=False, memory_temporal_stride_for_eval=1, - add_all_frames_to_correct_as_cond=False, non_overlap_masks_for_mem_enc=False, use_obj_ptrs_in_encoder=False, max_obj_ptrs_in_encoder=16, add_tpos_enc_to_obj_ptrs=True, proj_tpos_enc_in_obj_ptrs=False, + use_signed_tpos_enc_to_obj_ptrs=False, only_obj_ptrs_in_the_past_for_eval=False, pred_obj_scores: bool = False, pred_obj_scores_mlp: bool = False, fixed_no_obj_ptr: bool = False, soft_no_obj_ptr: bool = False, use_mlp_for_obj_ptr_proj: bool = False, + no_obj_embed_spatial: bool = False, sam_mask_decoder_extra_args=None, compile_image_encoder: bool = False, ): @@ -205,8 +204,6 @@ def __init__( use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers. iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1]. memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation. - add_all_frames_to_correct_as_cond (bool): Whether to append frames with correction clicks to conditioning - frame list. non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in memory encoder during evaluation. use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder. @@ -216,6 +213,9 @@ def __init__( the encoder. proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional encoding in object pointers. + use_signed_tpos_enc_to_obj_ptrs (bool): whether to use signed distance (instead of unsigned absolute distance) + in the temporal positional encoding in the object pointers, only relevant when both `use_obj_ptrs_in_encoder=True` + and `add_tpos_enc_to_obj_ptrs=True`. only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past during evaluation. pred_obj_scores (bool): Whether to predict if there is an object in the frame. @@ -223,6 +223,7 @@ def __init__( fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present. soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation. use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection. + no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames. sam_mask_decoder_extra_args (Dict | None): Extra arguments for constructing the SAM mask decoder. compile_image_encoder (bool): Whether to compile the image encoder for faster inference. @@ -253,6 +254,7 @@ def __init__( if proj_tpos_enc_in_obj_ptrs: assert add_tpos_enc_to_obj_ptrs # these options need to be used together self.proj_tpos_enc_in_obj_ptrs = proj_tpos_enc_in_obj_ptrs + self.use_signed_tpos_enc_to_obj_ptrs = use_signed_tpos_enc_to_obj_ptrs self.only_obj_ptrs_in_the_past_for_eval = only_obj_ptrs_in_the_past_for_eval # Part 2: memory attention to condition current frame's visual features @@ -309,9 +311,12 @@ def __init__( self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim)) trunc_normal_(self.no_obj_ptr, std=0.02) self.use_mlp_for_obj_ptr_proj = use_mlp_for_obj_ptr_proj + self.no_obj_embed_spatial = None + if no_obj_embed_spatial: + self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim)) + trunc_normal_(self.no_obj_embed_spatial, std=0.02) self._build_sam_heads() - self.add_all_frames_to_correct_as_cond = add_all_frames_to_correct_as_cond self.max_cond_frames_in_attn = max_cond_frames_in_attn # Model compilation @@ -342,8 +347,7 @@ def _build_sam_heads(self): self.sam_prompt_embed_dim = self.hidden_dim self.sam_image_embedding_size = self.image_size // self.backbone_stride - # build PromptEncoder and MaskDecoder from SAM - # (their hyperparameters like `mask_in_chans=16` are from SAM code) + # Build PromptEncoder and MaskDecoder from SAM (hyperparameters like `mask_in_chans=16` are from SAM code) self.sam_prompt_encoder = PromptEncoder( embed_dim=self.sam_prompt_embed_dim, image_embedding_size=( @@ -418,12 +422,11 @@ def _forward_sam_heads( low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits. high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits. ious: Tensor of shape (B, M) with estimated IoU for each output mask. - low_res_masks: Tensor of shape (B, 1, H*4, W*4) with best low-resolution mask. - high_res_masks: Tensor of shape (B, 1, H*16, W*16) with best high-resolution mask. + low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask. + high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask. obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask. - object_score_logits: Tensor of shape (B,) with object score logits. - - Where M is 3 if multimask_output=True, and 1 if multimask_output=False. + object_score_logits: Tensor of shape (B) with object score logits. + Where M is 3 if multimask_output=True, and 1 if multimask_output=False. Examples: >>> backbone_features = torch.rand(1, 256, 32, 32) @@ -481,12 +484,7 @@ def _forward_sam_heads( boxes=None, masks=sam_mask_prompt, ) - ( - low_res_multimasks, - ious, - sam_output_tokens, - object_score_logits, - ) = self.sam_mask_decoder( + low_res_multimasks, ious, sam_output_tokens, object_score_logits = self.sam_mask_decoder( image_embeddings=backbone_features, image_pe=self.sam_prompt_encoder.get_dense_pe(), sparse_prompt_embeddings=sparse_embeddings, @@ -498,13 +496,8 @@ def _forward_sam_heads( if self.pred_obj_scores: is_obj_appearing = object_score_logits > 0 - # Mask used for spatial memories is always a *hard* choice between obj and no obj, - # consistent with the actual mask prediction - low_res_multimasks = torch.where( - is_obj_appearing[:, None, None], - low_res_multimasks, - NO_OBJ_SCORE, - ) + # Spatial memory mask is a *hard* choice between obj and no obj, consistent with actual mask prediction + low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, NO_OBJ_SCORE) # convert masks from possibly bfloat16 (or float16) to float32 # (older PyTorch versions before 2.1 don't support `interpolate` on bf16) @@ -533,8 +526,6 @@ def _forward_sam_heads( if self.pred_obj_scores: # Allow *soft* no obj ptr, unlike for masks if self.soft_no_obj_ptr: - # Only hard possible with gt - assert not self.teacher_force_obj_scores_for_mem lambda_is_obj_appearing = object_score_logits.sigmoid() else: lambda_is_obj_appearing = is_obj_appearing.float() @@ -612,7 +603,6 @@ def forward_image(self, img_batch: torch.Tensor): def _prepare_backbone_features(self, backbone_out): """Prepares and flattens visual features from the image backbone output for further processing.""" - backbone_out = backbone_out.copy() assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"]) assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels @@ -647,11 +637,12 @@ def _prepare_memory_conditioned_features( if self.num_maskmem == 0: # Disable memory and skip fusion return current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W) num_obj_ptr_tokens = 0 + tpos_sign_mul = -1 if track_in_reverse else 1 # Step 1: condition the visual features of the current frame on previous memories if not is_init_cond_frame: # Retrieve the memories encoded with the maskmem backbone to_cat_memory, to_cat_memory_pos_embed = [], [] - # Add conditioning frames's output first (all cond frames have t_pos=0 for + # Add conditioning frame's output first (all cond frames have t_pos=0 for # when getting temporal positional embedding below) assert len(output_dict["cond_frame_outputs"]) > 0 # Select a maximum number of temporally closest cond frames for cross attention @@ -664,7 +655,7 @@ def _prepare_memory_conditioned_features( # the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1 # We also allow taking the memory frame non-consecutively (with r>1), in which case # we take (self.num_maskmem - 2) frames among every r-th frames plus the last frame. - r = self.memory_temporal_stride_for_eval + r = 1 if self.training else self.memory_temporal_stride_for_eval for t_pos in range(1, self.num_maskmem): t_rel = self.num_maskmem - t_pos # how many frames before current frame if t_rel == 1: @@ -693,11 +684,11 @@ def _prepare_memory_conditioned_features( if prev is None: continue # skip padding frames # "maskmem_features" might have been offloaded to CPU in demo use cases, - # so we load it back to GPU (it's a no-op if it's already on GPU). - feats = prev["maskmem_features"].cuda(non_blocking=True) + # so we load it back to inference device (it's a no-op if it's already on device). + feats = prev["maskmem_features"].to(device=device, non_blocking=True) to_cat_memory.append(feats.flatten(2).permute(2, 0, 1)) # Spatial positional encoding (it might have been offloaded to CPU in eval) - maskmem_enc = prev["maskmem_pos_enc"][-1].cuda() + maskmem_enc = prev["maskmem_pos_enc"][-1].to(device=device) maskmem_enc = maskmem_enc.flatten(2).permute(2, 0, 1) # Temporal positional encoding maskmem_enc = maskmem_enc + self.maskmem_tpos_enc[self.num_maskmem - t_pos - 1] @@ -718,7 +709,14 @@ def _prepare_memory_conditioned_features( ptr_cond_outputs = selected_cond_outputs pos_and_ptrs = [ # Temporal pos encoding contains how far away each pointer is from current frame - (abs(frame_idx - t), out["obj_ptr"]) + ( + ( + (frame_idx - t) * tpos_sign_mul + if self.use_signed_tpos_enc_to_obj_ptrs + else abs(frame_idx - t) + ), + out["obj_ptr"], + ) for t, out in ptr_cond_outputs.items() ] # Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame @@ -787,6 +785,7 @@ def _encode_new_memory( current_vision_feats, feat_sizes, pred_masks_high_res, + object_score_logits, is_mask_from_pts, ): """Encodes frame features and masks into a new memory representation for video segmentation.""" @@ -812,17 +811,20 @@ def _encode_new_memory( mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc if self.sigmoid_bias_for_mem_enc != 0.0: mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc - maskmem_out = self.memory_encoder( - pix_feat, - mask_for_mem, - skip_mask_sigmoid=True, # sigmoid already applied - ) + maskmem_out = self.memory_encoder(pix_feat, mask_for_mem, skip_mask_sigmoid=True) # sigmoid already applied maskmem_features = maskmem_out["vision_features"] maskmem_pos_enc = maskmem_out["vision_pos_enc"] + # add a no-object embedding to the spatial memory to indicate that the frame + # is predicted to be occluded (i.e. no object is appearing in the frame) + if self.no_obj_embed_spatial is not None: + is_obj_appearing = (object_score_logits > 0).float() + maskmem_features += (1 - is_obj_appearing[..., None, None]) * self.no_obj_embed_spatial[ + ..., None, None + ].expand(*maskmem_features.shape) return maskmem_features, maskmem_pos_enc - def track_step( + def _track_step( self, frame_idx, is_init_cond_frame, @@ -833,15 +835,8 @@ def track_step( mask_inputs, output_dict, num_frames, - track_in_reverse=False, # tracking in reverse time order (for demo usage) - # Whether to run the memory encoder on the predicted masks. Sometimes we might want - # to skip the memory encoder with `run_mem_encoder=False`. For example, - # in demo we might call `track_step` multiple times for each user click, - # and only encode the memory when the user finalizes their clicks. And in ablation - # settings like SAM training on static images, we don't need the memory encoder. - run_mem_encoder=True, - # The previously predicted SAM mask logits (which can be fed together with new clicks in demo). - prev_sam_mask_logits=None, + track_in_reverse, + prev_sam_mask_logits, ): """Performs a single tracking step, updating object masks and memory features based on current frame inputs.""" current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs} @@ -861,7 +856,7 @@ def track_step( sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs) else: # fused the visual feature with previous memory features in the memory bank - pix_feat_with_mem = self._prepare_memory_conditioned_features( + pix_feat = self._prepare_memory_conditioned_features( frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame, current_vision_feats=current_vision_feats[-1:], @@ -880,34 +875,34 @@ def track_step( mask_inputs = prev_sam_mask_logits multimask_output = self._use_multimask(is_init_cond_frame, point_inputs) sam_outputs = self._forward_sam_heads( - backbone_features=pix_feat_with_mem, + backbone_features=pix_feat, point_inputs=point_inputs, mask_inputs=mask_inputs, high_res_features=high_res_features, multimask_output=multimask_output, ) - ( - _, - _, - _, - low_res_masks, - high_res_masks, - obj_ptr, - _, - ) = sam_outputs - - current_out["pred_masks"] = low_res_masks - current_out["pred_masks_high_res"] = high_res_masks - current_out["obj_ptr"] = obj_ptr + return current_out, sam_outputs, high_res_features, pix_feat - # Finally run the memory encoder on the predicted mask to encode - # it into a new memory feature (that can be used in future frames) + def _encode_memory_in_output( + self, + current_vision_feats, + feat_sizes, + point_inputs, + run_mem_encoder, + high_res_masks, + object_score_logits, + current_out, + ): + """Finally run the memory encoder on the predicted mask to encode, it into a new memory feature (that can be + used in future frames). + """ if run_mem_encoder and self.num_maskmem > 0: high_res_masks_for_mem_enc = high_res_masks maskmem_features, maskmem_pos_enc = self._encode_new_memory( current_vision_feats=current_vision_feats, feat_sizes=feat_sizes, pred_masks_high_res=high_res_masks_for_mem_enc, + object_score_logits=object_score_logits, is_mask_from_pts=(point_inputs is not None), ) current_out["maskmem_features"] = maskmem_features @@ -916,6 +911,62 @@ def track_step( current_out["maskmem_features"] = None current_out["maskmem_pos_enc"] = None + def track_step( + self, + frame_idx, + is_init_cond_frame, + current_vision_feats, + current_vision_pos_embeds, + feat_sizes, + point_inputs, + mask_inputs, + output_dict, + num_frames, + track_in_reverse=False, # tracking in reverse time order (for demo usage) + # Whether to run the memory encoder on the predicted masks. Sometimes we might want + # to skip the memory encoder with `run_mem_encoder=False`. For example, + # in demo we might call `track_step` multiple times for each user click, + # and only encode the memory when the user finalizes their clicks. And in ablation + # settings like SAM training on static images, we don't need the memory encoder. + run_mem_encoder=True, + # The previously predicted SAM mask logits (which can be fed together with new clicks in demo). + prev_sam_mask_logits=None, + ): + """Performs a single tracking step, updating object masks and memory features based on current frame inputs.""" + current_out, sam_outputs, _, _ = self._track_step( + frame_idx, + is_init_cond_frame, + current_vision_feats, + current_vision_pos_embeds, + feat_sizes, + point_inputs, + mask_inputs, + output_dict, + num_frames, + track_in_reverse, + prev_sam_mask_logits, + ) + _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs + + current_out["pred_masks"] = low_res_masks + current_out["pred_masks_high_res"] = high_res_masks + current_out["obj_ptr"] = obj_ptr + if not self.training: + # Only add this in inference (to avoid unused param in activation checkpointing; + # it's mainly used in the demo to encode spatial memories w/ consolidated masks) + current_out["object_score_logits"] = object_score_logits + + # Run memory encoder on the predicted mask to encode it into a new memory feature (for use in future frames) + self._encode_memory_in_output( + current_vision_feats, + feat_sizes, + point_inputs, + run_mem_encoder, + high_res_masks, + object_score_logits, + current_out, + ) + return current_out def _use_multimask(self, is_init_cond_frame, point_inputs): @@ -927,8 +978,9 @@ def _use_multimask(self, is_init_cond_frame, point_inputs): and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num) ) - def _apply_non_overlapping_constraints(self, pred_masks): - """Applies non-overlapping constraints to masks, keeping highest scoring object per location.""" + @staticmethod + def _apply_non_overlapping_constraints(pred_masks): + """Applies non-overlapping constraints to masks, keeping the highest scoring object per location.""" batch_size = pred_masks.size(0) if batch_size == 1: return pred_masks @@ -944,6 +996,10 @@ def _apply_non_overlapping_constraints(self, pred_masks): pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0)) return pred_masks + def set_binarize(self, binarize=False): + """Set binarize for VideoPredictor.""" + self.binarize_mask_from_pts_for_mem_enc = binarize + def set_imgsz(self, imgsz): """ Set image size to make model compatible with different image sizes. diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py index d036ab98745..1b181f7a06b 100644 --- a/ultralytics/models/sam/modules/tiny_encoder.py +++ b/ultralytics/models/sam/modules/tiny_encoder.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license # -------------------------------------------------------- # TinyViT Model Architecture @@ -955,7 +955,8 @@ def _check_lr_scale(m): self.apply(_check_lr_scale) - def _init_weights(self, m): + @staticmethod + def _init_weights(m): """Initializes weights for linear and normalization layers in the TinyViT model.""" if isinstance(m, nn.Linear): # NOTE: This initialization is needed only for training. diff --git a/ultralytics/models/sam/modules/transformer.py b/ultralytics/models/sam/modules/transformer.py index 5cc0b02f7f1..9b9c2bf6121 100644 --- a/ultralytics/models/sam/modules/transformer.py +++ b/ultralytics/models/sam/modules/transformer.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import math from typing import Tuple, Type diff --git a/ultralytics/models/sam/modules/utils.py b/ultralytics/models/sam/modules/utils.py index 8bfb13760d9..6751b87da2e 100644 --- a/ultralytics/models/sam/modules/utils.py +++ b/ultralytics/models/sam/modules/utils.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from typing import Tuple diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 686ef70c638..345fc7c98fe 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Generate predictions using the Segment Anything Model (SAM). @@ -8,6 +8,8 @@ segmentation tasks. """ +from collections import OrderedDict + import numpy as np import torch import torch.nn.functional as F @@ -16,7 +18,7 @@ from ultralytics.engine.predictor import BasePredictor from ultralytics.engine.results import Results from ultralytics.utils import DEFAULT_CFG, ops -from ultralytics.utils.torch_utils import select_device +from ultralytics.utils.torch_utils import select_device, smart_inference_mode from .amg import ( batch_iterator, @@ -71,8 +73,8 @@ class Predictor(BasePredictor): >>> predictor = Predictor() >>> predictor.setup_model(model_path="sam_model.pt") >>> predictor.set_image("image.jpg") - >>> masks, scores, boxes = predictor.generate() - >>> results = predictor.postprocess((masks, scores, boxes), im, orig_img) + >>> bboxes = [[100, 100, 200, 200]] + >>> results = predictor(bboxes=bboxes) """ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): @@ -89,13 +91,13 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): _callbacks (Dict | None): Dictionary of callback functions to customize behavior. Examples: - >>> predictor = Predictor(cfg=DEFAULT_CFG) - >>> predictor = Predictor(overrides={"imgsz": 640}) - >>> predictor = Predictor(_callbacks={"on_predict_start": custom_callback}) + >>> predictor_example = Predictor(cfg=DEFAULT_CFG) + >>> predictor_example_with_imgsz = Predictor(overrides={"imgsz": 640}) + >>> predictor_example_with_callback = Predictor(_callbacks={"on_predict_start": custom_callback}) """ if overrides is None: overrides = {} - overrides.update(dict(task="segment", mode="predict")) + overrides.update(dict(task="segment", mode="predict", batch=1)) super().__init__(cfg, overrides, _callbacks) self.args.retina_masks = True self.im = None @@ -114,7 +116,7 @@ def preprocess(self, im): im (torch.Tensor | List[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays. Returns: - (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype. + im (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype. Examples: >>> predictor = Predictor() @@ -181,21 +183,21 @@ def inference(self, im, bboxes=None, points=None, labels=None, masks=None, multi **kwargs (Any): Additional keyword arguments. Returns: - (tuple): Contains the following three elements: - - np.ndarray: The output masks in shape (C, H, W), where C is the number of generated masks. - - np.ndarray: An array of length C containing quality scores predicted by the model for each mask. - - np.ndarray: Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256. + (np.ndarray): The output masks in shape (C, H, W), where C is the number of generated masks. + (np.ndarray): An array of length C containing quality scores predicted by the model for each mask. + (np.ndarray): Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256. Examples: >>> predictor = Predictor() >>> predictor.setup_model(model_path="sam_model.pt") >>> predictor.set_image("image.jpg") - >>> masks, scores, logits = predictor.inference(im, bboxes=[[0, 0, 100, 100]]) + >>> results = predictor(bboxes=[[0, 0, 100, 100]]) """ # Override prompts if any stored in self.prompts bboxes = self.prompts.pop("bboxes", bboxes) points = self.prompts.pop("points", points) masks = self.prompts.pop("masks", masks) + labels = self.prompts.pop("labels", labels) if all(i is None for i in [bboxes, points, masks]): return self.generate(im, *args, **kwargs) @@ -212,16 +214,17 @@ def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None Args: im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W). bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4). - points (np.ndarray | List | None): Points indicating object locations with shape (N, 2), in pixels. - labels (np.ndarray | List | None): Point prompt labels with shape (N,). 1 for foreground, 0 for background. + points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels. + labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background. masks (np.ndarray | None): Low-res masks from previous predictions with shape (N, H, W). For SAM, H=W=256. multimask_output (bool): Flag to return multiple masks for ambiguous prompts. + Raises: + AssertionError: If the number of points don't match the number of labels, in case labels were passed. + Returns: - (tuple): Tuple containing: - - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks. - - np.ndarray: Quality scores predicted by the model for each mask, with length C. - - np.ndarray: Low-resolution logits with shape (C, H, W) for subsequent inference, where H=W=256. + (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks. + (np.ndarray): Quality scores predicted by the model for each mask, with length C. Examples: >>> predictor = Predictor() @@ -231,26 +234,7 @@ def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None """ features = self.get_im_features(im) if self.features is None else self.features - src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:] - r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1]) - # Transform input prompts - if points is not None: - points = torch.as_tensor(points, dtype=torch.float32, device=self.device) - points = points[None] if points.ndim == 1 else points - # Assuming labels are all positive if users don't pass labels. - if labels is None: - labels = np.ones(points.shape[0]) - labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device) - points *= r - # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1) - points, labels = points[:, None, :], labels[:, None] - if bboxes is not None: - bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device) - bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes - bboxes *= r - if masks is not None: - masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1) - + bboxes, points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks) points = (points, labels) if points is not None else None # Embed prompts sparse_embeddings, dense_embeddings = self.model.prompt_encoder(points=points, boxes=bboxes, masks=masks) @@ -268,6 +252,48 @@ def prompt_inference(self, im, bboxes=None, points=None, labels=None, masks=None # `d` could be 1 or 3 depends on `multimask_output`. return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1) + def _prepare_prompts(self, dst_shape, bboxes=None, points=None, labels=None, masks=None): + """ + Prepares and transforms the input prompts for processing based on the destination shape. + + Args: + dst_shape (tuple): The target shape (height, width) for the prompts. + bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4). + points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels. + labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background. + masks (List | np.ndarray, Optional): Masks for the objects, where each mask is a 2D array. + + Raises: + AssertionError: If the number of points don't match the number of labels, in case labels were passed. + + Returns: + (tuple): A tuple containing transformed bounding boxes, points, labels, and masks. + """ + src_shape = self.batch[1][0].shape[:2] + r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1]) + # Transform input prompts + if points is not None: + points = torch.as_tensor(points, dtype=torch.float32, device=self.device) + points = points[None] if points.ndim == 1 else points + # Assuming labels are all positive if users don't pass labels. + if labels is None: + labels = np.ones(points.shape[:-1]) + labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device) + assert points.shape[-2] == labels.shape[-1], ( + f"Number of points {points.shape[-2]} should match number of labels {labels.shape[-1]}." + ) + points *= r + if points.ndim == 2: + # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1) + points, labels = points[:, None, :], labels[:, None] + if bboxes is not None: + bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device) + bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes + bboxes *= r + if masks is not None: + masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1) + return bboxes, points, labels, masks + def generate( self, im, @@ -302,10 +328,9 @@ def generate( crop_nms_thresh (float): IoU cutoff for NMS to remove duplicate masks between crops. Returns: - (Tuple[torch.Tensor, torch.Tensor, torch.Tensor]): A tuple containing: - - pred_masks (torch.Tensor): Segmented masks with shape (N, H, W). - - pred_scores (torch.Tensor): Confidence scores for each mask with shape (N,). - - pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 4). + pred_masks (torch.Tensor): Segmented masks with shape (N, H, W). + pred_scores (torch.Tensor): Confidence scores for each mask with shape (N,). + pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 4). Examples: >>> predictor = Predictor() @@ -381,7 +406,7 @@ def generate( return pred_masks, pred_scores, pred_bboxes - def setup_model(self, model, verbose=True): + def setup_model(self, model=None, verbose=True): """ Initializes the Segment Anything Model (SAM) for inference. @@ -389,7 +414,7 @@ def setup_model(self, model, verbose=True): parameters for image normalization and other Ultralytics compatibility settings. Args: - model (torch.nn.Module): A pre-trained SAM model. If None, a model will be built based on configuration. + model (torch.nn.Module | None): A pretrained SAM model. If None, a new model is built based on config. verbose (bool): If True, prints selected device information. Examples: @@ -432,7 +457,7 @@ def postprocess(self, preds, img, orig_imgs): orig_imgs (List[np.ndarray] | torch.Tensor): The original, unprocessed images. Returns: - (List[Results]): List of Results objects containing detection masks, bounding boxes, and other + results (List[Results]): List of Results objects containing detection masks, bounding boxes, and other metadata for each processed image. Examples: @@ -451,7 +476,7 @@ def postprocess(self, preds, img, orig_imgs): results = [] for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]): if len(masks) == 0: - masks = None + masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device) else: masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0] masks = masks > self.model.mask_threshold # to bool @@ -527,9 +552,9 @@ def set_image(self, image): def get_im_features(self, im): """Extracts image features using the SAM model's image encoder for subsequent mask prediction.""" - assert ( - isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1] - ), f"SAM models only support square image size, but got {self.imgsz}." + assert isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1], ( + f"SAM models only support square image size, but got {self.imgsz}." + ) self.model.set_imgsz(self.imgsz) return self.model.image_encoder(im) @@ -559,9 +584,8 @@ def remove_small_regions(masks, min_area=0, nms_thresh=0.7): nms_thresh (float): IoU threshold for the NMS algorithm to remove duplicate boxes. Returns: - (tuple): - - new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W). - - keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes. + new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W). + keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes. Examples: >>> masks = torch.rand(5, 640, 640) > 0.5 # 5 random binary masks @@ -622,8 +646,8 @@ class SAM2Predictor(Predictor): >>> predictor = SAM2Predictor(cfg) >>> predictor.set_image("path/to/image.jpg") >>> bboxes = [[100, 100, 200, 200]] - >>> masks, scores, _ = predictor.prompt_inference(predictor.im, bboxes=bboxes) - >>> print(f"Predicted {len(masks)} masks with average score {scores.mean():.2f}") + >>> result = predictor(bboxes=bboxes)[0] + >>> print(f"Predicted {len(result.masks)} masks with average score {result.boxes.conf.mean():.2f}") """ _bb_feat_sizes = [ @@ -663,17 +687,15 @@ def prompt_inference( img_idx (int): Index of the image in the batch to process. Returns: - (tuple): Tuple containing: - - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks. - - np.ndarray: Quality scores for each mask, with length C. - - np.ndarray: Low-resolution logits with shape (C, 256, 256) for subsequent inference. + (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks. + (np.ndarray): Quality scores for each mask, with length C. Examples: >>> predictor = SAM2Predictor(cfg) >>> image = torch.rand(1, 3, 640, 640) >>> bboxes = [[100, 100, 200, 200]] - >>> masks, scores, logits = predictor.prompt_inference(image, bboxes=bboxes) - >>> print(f"Generated {masks.shape[0]} masks with average score {scores.mean():.2f}") + >>> result = predictor(image, bboxes=bboxes)[0] + >>> print(f"Generated {result.masks.shape[0]} masks with average score {result.boxes.conf.mean():.2f}") Notes: - The method supports batched inference for multiple objects when points or bboxes are provided. @@ -685,34 +707,7 @@ def prompt_inference( """ features = self.get_im_features(im) if self.features is None else self.features - src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:] - r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1]) - # Transform input prompts - if points is not None: - points = torch.as_tensor(points, dtype=torch.float32, device=self.device) - points = points[None] if points.ndim == 1 else points - # Assuming labels are all positive if users don't pass labels. - if labels is None: - labels = torch.ones(points.shape[0]) - labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device) - points *= r - # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1) - points, labels = points[:, None], labels[:, None] - if bboxes is not None: - bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device) - bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes - bboxes = bboxes.view(-1, 2, 2) * r - bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(len(bboxes), -1) - # NOTE: merge "boxes" and "points" into a single "points" input - # (where boxes are added at the beginning) to model.sam_prompt_encoder - if points is not None: - points = torch.cat([bboxes, points], dim=1) - labels = torch.cat([bbox_labels, labels], dim=1) - else: - points, labels = bboxes, bbox_labels - if masks is not None: - masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1) - + points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks) points = (points, labels) if points is not None else None sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder( @@ -736,6 +731,36 @@ def prompt_inference( # `d` could be 1 or 3 depends on `multimask_output`. return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1) + def _prepare_prompts(self, dst_shape, bboxes=None, points=None, labels=None, masks=None): + """ + Prepares and transforms the input prompts for processing based on the destination shape. + + Args: + dst_shape (tuple): The target shape (height, width) for the prompts. + bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4). + points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels. + labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background. + masks (List | np.ndarray, Optional): Masks for the objects, where each mask is a 2D array. + + Raises: + AssertionError: If the number of points don't match the number of labels, in case labels were passed. + + Returns: + (tuple): A tuple containing transformed points, labels, and masks. + """ + bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, bboxes, points, labels, masks) + if bboxes is not None: + bboxes = bboxes.view(-1, 2, 2) + bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(len(bboxes), -1) + # NOTE: merge "boxes" and "points" into a single "points" input + # (where boxes are added at the beginning) to model.sam_prompt_encoder + if points is not None: + points = torch.cat([bboxes, points], dim=1) + labels = torch.cat([bbox_labels, labels], dim=1) + else: + points, labels = bboxes, bbox_labels + return points, labels, masks + def set_image(self, image): """ Preprocesses and sets a single image for inference using the SAM2 model. @@ -770,9 +795,9 @@ def set_image(self, image): def get_im_features(self, im): """Extracts image features from the SAM image encoder for subsequent processing.""" - assert ( - isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1] - ), f"SAM 2 models only support square image size, but got {self.imgsz}." + assert isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1], ( + f"SAM 2 models only support square image size, but got {self.imgsz}." + ) self.model.set_imgsz(self.imgsz) self._bb_feat_sizes = [[x // (4 * i) for x in self.imgsz] for i in [1, 2, 4]] @@ -785,3 +810,796 @@ def get_im_features(self, im): for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1]) ][::-1] return {"image_embed": feats[-1], "high_res_feats": feats[:-1]} + + +class SAM2VideoPredictor(SAM2Predictor): + """ + SAM2VideoPredictor to handle user interactions with videos and manage inference states. + + This class extends the functionality of SAM2Predictor to support video processing and maintains + the state of inference operations. It includes configurations for managing non-overlapping masks, + clearing memory for non-conditional inputs, and setting up callbacks for prediction events. + + Attributes: + inference_state (Dict): A dictionary to store the current state of inference operations. + non_overlap_masks (bool): A flag indicating whether masks should be non-overlapping. + clear_non_cond_mem_around_input (bool): A flag to control clearing non-conditional memory around inputs. + clear_non_cond_mem_for_multi_obj (bool): A flag to control clearing non-conditional memory for multi-object scenarios. + callbacks (Dict): A dictionary of callbacks for various prediction lifecycle events. + + Args: + cfg (Dict, Optional): Configuration settings for the predictor. Defaults to DEFAULT_CFG. + overrides (Dict, Optional): Additional configuration overrides. Defaults to None. + _callbacks (List, Optional): Custom callbacks to be added. Defaults to None. + + Note: + The `fill_hole_area` attribute is defined but not used in the current implementation. + """ + + # fill_hole_area = 8 # not used + + def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """ + Initialize the predictor with configuration and optional overrides. + + This constructor initializes the SAM2VideoPredictor with a given configuration, applies any + specified overrides, and sets up the inference state along with certain flags + that control the behavior of the predictor. + + Args: + cfg (Dict): Configuration dictionary containing default settings. + overrides (Dict | None): Dictionary of values to override default configuration. + _callbacks (Dict | None): Dictionary of callback functions to customize behavior. + + Examples: + >>> predictor = SAM2VideoPredictor(cfg=DEFAULT_CFG) + >>> predictor_example_with_imgsz = SAM2VideoPredictor(overrides={"imgsz": 640}) + >>> predictor_example_with_callback = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback}) + """ + super().__init__(cfg, overrides, _callbacks) + self.inference_state = {} + self.non_overlap_masks = True + self.clear_non_cond_mem_around_input = False + self.clear_non_cond_mem_for_multi_obj = False + self.callbacks["on_predict_start"].append(self.init_state) + + def get_model(self): + """ + Retrieves and configures the model with binarization enabled. + + Note: + This method overrides the base class implementation to set the binarize flag to True. + """ + model = super().get_model() + model.set_binarize(True) + return model + + def inference(self, im, bboxes=None, points=None, labels=None, masks=None): + """ + Perform image segmentation inference based on the given input cues, using the currently loaded image. This + method leverages SAM's (Segment Anything Model) architecture consisting of image encoder, prompt encoder, and + mask decoder for real-time and promptable segmentation tasks. + + Args: + im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W). + bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format. + points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels. + labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background. + masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256. + + Returns: + (np.ndarray): The output masks in shape CxHxW, where C is the number of generated masks. + (np.ndarray): An array of length C containing quality scores predicted by the model for each mask. + """ + # Override prompts if any stored in self.prompts + bboxes = self.prompts.pop("bboxes", bboxes) + points = self.prompts.pop("points", points) + masks = self.prompts.pop("masks", masks) + + frame = self.dataset.frame + self.inference_state["im"] = im + output_dict = self.inference_state["output_dict"] + if len(output_dict["cond_frame_outputs"]) == 0: # initialize prompts + points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks) + if points is not None: + for i in range(len(points)): + self.add_new_prompts(obj_id=i, points=points[[i]], labels=labels[[i]], frame_idx=frame) + elif masks is not None: + for i in range(len(masks)): + self.add_new_prompts(obj_id=i, masks=masks[[i]], frame_idx=frame) + self.propagate_in_video_preflight() + + consolidated_frame_inds = self.inference_state["consolidated_frame_inds"] + batch_size = len(self.inference_state["obj_idx_to_id"]) + if len(output_dict["cond_frame_outputs"]) == 0: + raise RuntimeError("No points are provided; please add points first") + + if frame in consolidated_frame_inds["cond_frame_outputs"]: + storage_key = "cond_frame_outputs" + current_out = output_dict[storage_key][frame] + if self.clear_non_cond_mem_around_input and (self.clear_non_cond_mem_for_multi_obj or batch_size <= 1): + # clear non-conditioning memory of the surrounding frames + self._clear_non_cond_mem_around_input(frame) + elif frame in consolidated_frame_inds["non_cond_frame_outputs"]: + storage_key = "non_cond_frame_outputs" + current_out = output_dict[storage_key][frame] + else: + storage_key = "non_cond_frame_outputs" + current_out = self._run_single_frame_inference( + output_dict=output_dict, + frame_idx=frame, + batch_size=batch_size, + is_init_cond_frame=False, + point_inputs=None, + mask_inputs=None, + reverse=False, + run_mem_encoder=True, + ) + output_dict[storage_key][frame] = current_out + # Create slices of per-object outputs for subsequent interaction with each + # individual object after tracking. + self._add_output_per_object(frame, current_out, storage_key) + self.inference_state["frames_already_tracked"].append(frame) + pred_masks = current_out["pred_masks"].flatten(0, 1) + pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0] # filter blank masks + + return pred_masks, torch.ones(len(pred_masks), dtype=pred_masks.dtype, device=pred_masks.device) + + def postprocess(self, preds, img, orig_imgs): + """ + Post-processes the predictions to apply non-overlapping constraints if required. + + This method extends the post-processing functionality by applying non-overlapping constraints + to the predicted masks if the `non_overlap_masks` flag is set to True. This ensures that + the masks do not overlap, which can be useful for certain applications. + + Args: + preds (Tuple[torch.Tensor]): The predictions from the model. + img (torch.Tensor): The processed image tensor. + orig_imgs (List[np.ndarray]): The original images before processing. + + Returns: + results (list): The post-processed predictions. + + Note: + If `non_overlap_masks` is True, the method applies constraints to ensure non-overlapping masks. + """ + results = super().postprocess(preds, img, orig_imgs) + if self.non_overlap_masks: + for result in results: + if result.masks is None or len(result.masks) == 0: + continue + result.masks.data = self.model._apply_non_overlapping_constraints(result.masks.data.unsqueeze(0))[0] + return results + + @smart_inference_mode() + def add_new_prompts( + self, + obj_id, + points=None, + labels=None, + masks=None, + frame_idx=0, + ): + """ + Adds new points or masks to a specific frame for a given object ID. + + This method updates the inference state with new prompts (points or masks) for a specified + object and frame index. It ensures that the prompts are either points or masks, but not both, + and updates the internal state accordingly. It also handles the generation of new segmentations + based on the provided prompts and the existing state. + + Args: + obj_id (int): The ID of the object to which the prompts are associated. + points (torch.Tensor, Optional): The coordinates of the points of interest. Defaults to None. + labels (torch.Tensor, Optional): The labels corresponding to the points. Defaults to None. + masks (torch.Tensor, optional): Binary masks for the object. Defaults to None. + frame_idx (int, optional): The index of the frame to which the prompts are applied. Defaults to 0. + + Returns: + (tuple): A tuple containing the flattened predicted masks and a tensor of ones indicating the number of objects. + + Raises: + AssertionError: If both `masks` and `points` are provided, or neither is provided. + + Note: + - Only one type of prompt (either points or masks) can be added per call. + - If the frame is being tracked for the first time, it is treated as an initial conditioning frame. + - The method handles the consolidation of outputs and resizing of masks to the original video resolution. + """ + assert (masks is None) ^ (points is None), "'masks' and 'points' prompts are not compatible with each other." + obj_idx = self._obj_id_to_idx(obj_id) + + point_inputs = None + pop_key = "point_inputs_per_obj" + if points is not None: + point_inputs = {"point_coords": points, "point_labels": labels} + self.inference_state["point_inputs_per_obj"][obj_idx][frame_idx] = point_inputs + pop_key = "mask_inputs_per_obj" + self.inference_state["mask_inputs_per_obj"][obj_idx][frame_idx] = masks + self.inference_state[pop_key][obj_idx].pop(frame_idx, None) + # If this frame hasn't been tracked before, we treat it as an initial conditioning + # frame, meaning that the inputs points are to generate segments on this frame without + # using any memory from other frames, like in SAM. Otherwise (if it has been tracked), + # the input points will be used to correct the already tracked masks. + is_init_cond_frame = frame_idx not in self.inference_state["frames_already_tracked"] + obj_output_dict = self.inference_state["output_dict_per_obj"][obj_idx] + obj_temp_output_dict = self.inference_state["temp_output_dict_per_obj"][obj_idx] + # Add a frame to conditioning output if it's an initial conditioning frame or + # if the model sees all frames receiving clicks/mask as conditioning frames. + is_cond = is_init_cond_frame or self.model.add_all_frames_to_correct_as_cond + storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs" + + # Get any previously predicted mask logits on this object and feed it along with + # the new clicks into the SAM mask decoder. + prev_sam_mask_logits = None + # lookup temporary output dict first, which contains the most recent output + # (if not found, then lookup conditioning and non-conditioning frame output) + if point_inputs is not None: + prev_out = ( + obj_temp_output_dict[storage_key].get(frame_idx) + or obj_output_dict["cond_frame_outputs"].get(frame_idx) + or obj_output_dict["non_cond_frame_outputs"].get(frame_idx) + ) + + if prev_out is not None and prev_out.get("pred_masks") is not None: + prev_sam_mask_logits = prev_out["pred_masks"].to(device=self.device, non_blocking=True) + # Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues. + prev_sam_mask_logits.clamp_(-32.0, 32.0) + current_out = self._run_single_frame_inference( + output_dict=obj_output_dict, # run on the slice of a single object + frame_idx=frame_idx, + batch_size=1, # run on the slice of a single object + is_init_cond_frame=is_init_cond_frame, + point_inputs=point_inputs, + mask_inputs=masks, + reverse=False, + # Skip the memory encoder when adding clicks or mask. We execute the memory encoder + # at the beginning of `propagate_in_video` (after user finalize their clicks). This + # allows us to enforce non-overlapping constraints on all objects before encoding + # them into memory. + run_mem_encoder=False, + prev_sam_mask_logits=prev_sam_mask_logits, + ) + # Add the output to the output dict (to be used as future memory) + obj_temp_output_dict[storage_key][frame_idx] = current_out + + # Resize the output mask to the original video resolution + consolidated_out = self._consolidate_temp_output_across_obj( + frame_idx, + is_cond=is_cond, + run_mem_encoder=False, + ) + pred_masks = consolidated_out["pred_masks"].flatten(0, 1) + return pred_masks.flatten(0, 1), torch.ones(1, dtype=pred_masks.dtype, device=pred_masks.device) + + @smart_inference_mode() + def propagate_in_video_preflight(self): + """ + Prepare inference_state and consolidate temporary outputs before tracking. + + This method marks the start of tracking, disallowing the addition of new objects until the session is reset. + It consolidates temporary outputs from `temp_output_dict_per_obj` and merges them into `output_dict`. + Additionally, it clears non-conditioning memory around input frames and ensures that the state is consistent + with the provided inputs. + """ + # Tracking has started and we don't allow adding new objects until session is reset. + self.inference_state["tracking_has_started"] = True + batch_size = len(self.inference_state["obj_idx_to_id"]) + + # Consolidate per-object temporary outputs in "temp_output_dict_per_obj" and + # add them into "output_dict". + temp_output_dict_per_obj = self.inference_state["temp_output_dict_per_obj"] + output_dict = self.inference_state["output_dict"] + # "consolidated_frame_inds" contains indices of those frames where consolidated + # temporary outputs have been added (either in this call or any previous calls + # to `propagate_in_video_preflight`). + consolidated_frame_inds = self.inference_state["consolidated_frame_inds"] + for is_cond in {False, True}: + # Separately consolidate conditioning and non-conditioning temp outputs + storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs" + # Find all the frames that contain temporary outputs for any objects + # (these should be the frames that have just received clicks for mask inputs + # via `add_new_points` or `add_new_mask`) + temp_frame_inds = set() + for obj_temp_output_dict in temp_output_dict_per_obj.values(): + temp_frame_inds.update(obj_temp_output_dict[storage_key].keys()) + consolidated_frame_inds[storage_key].update(temp_frame_inds) + # consolidate the temporary output across all objects on this frame + for frame_idx in temp_frame_inds: + consolidated_out = self._consolidate_temp_output_across_obj( + frame_idx, is_cond=is_cond, run_mem_encoder=True + ) + # merge them into "output_dict" and also create per-object slices + output_dict[storage_key][frame_idx] = consolidated_out + self._add_output_per_object(frame_idx, consolidated_out, storage_key) + if self.clear_non_cond_mem_around_input and (self.clear_non_cond_mem_for_multi_obj or batch_size <= 1): + # clear non-conditioning memory of the surrounding frames + self._clear_non_cond_mem_around_input(frame_idx) + + # clear temporary outputs in `temp_output_dict_per_obj` + for obj_temp_output_dict in temp_output_dict_per_obj.values(): + obj_temp_output_dict[storage_key].clear() + + # edge case: if an output is added to "cond_frame_outputs", we remove any prior + # output on the same frame in "non_cond_frame_outputs" + for frame_idx in output_dict["cond_frame_outputs"]: + output_dict["non_cond_frame_outputs"].pop(frame_idx, None) + for obj_output_dict in self.inference_state["output_dict_per_obj"].values(): + for frame_idx in obj_output_dict["cond_frame_outputs"]: + obj_output_dict["non_cond_frame_outputs"].pop(frame_idx, None) + for frame_idx in consolidated_frame_inds["cond_frame_outputs"]: + assert frame_idx in output_dict["cond_frame_outputs"] + consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx) + + # Make sure that the frame indices in "consolidated_frame_inds" are exactly those frames + # with either points or mask inputs (which should be true under a correct workflow). + all_consolidated_frame_inds = ( + consolidated_frame_inds["cond_frame_outputs"] | consolidated_frame_inds["non_cond_frame_outputs"] + ) + input_frames_inds = set() + for point_inputs_per_frame in self.inference_state["point_inputs_per_obj"].values(): + input_frames_inds.update(point_inputs_per_frame.keys()) + for mask_inputs_per_frame in self.inference_state["mask_inputs_per_obj"].values(): + input_frames_inds.update(mask_inputs_per_frame.keys()) + assert all_consolidated_frame_inds == input_frames_inds + + @staticmethod + def init_state(predictor): + """ + Initialize an inference state for the predictor. + + This function sets up the initial state required for performing inference on video data. + It includes initializing various dictionaries and ordered dictionaries that will store + inputs, outputs, and other metadata relevant to the tracking process. + + Args: + predictor (SAM2VideoPredictor): The predictor object for which to initialize the state. + """ + if len(predictor.inference_state) > 0: # means initialized + return + assert predictor.dataset is not None + assert predictor.dataset.mode == "video" + + inference_state = { + "num_frames": predictor.dataset.frames, + "point_inputs_per_obj": {}, # inputs points on each frame + "mask_inputs_per_obj": {}, # inputs mask on each frame + "constants": {}, # values that don't change across frames (so we only need to hold one copy of them) + # mapping between client-side object id and model-side object index + "obj_id_to_idx": OrderedDict(), + "obj_idx_to_id": OrderedDict(), + "obj_ids": [], + # A storage to hold the model's tracking results and states on each frame + "output_dict": { + "cond_frame_outputs": {}, # dict containing {frame_idx: } + "non_cond_frame_outputs": {}, # dict containing {frame_idx: } + }, + # Slice (view) of each object tracking results, sharing the same memory with "output_dict" + "output_dict_per_obj": {}, + # A temporary storage to hold new outputs when user interact with a frame + # to add clicks or mask (it's merged into "output_dict" before propagation starts) + "temp_output_dict_per_obj": {}, + # Frames that already holds consolidated outputs from click or mask inputs + # (we directly use their consolidated outputs during tracking) + "consolidated_frame_inds": { + "cond_frame_outputs": set(), # set containing frame indices + "non_cond_frame_outputs": set(), # set containing frame indices + }, + # metadata for each tracking frame (e.g. which direction it's tracked) + "tracking_has_started": False, + "frames_already_tracked": [], + } + predictor.inference_state = inference_state + + def get_im_features(self, im, batch=1): + """ + Extracts and processes image features using SAM2's image encoder for subsequent segmentation tasks. + + Args: + im (torch.Tensor): The input image tensor. + batch (int, optional): The batch size for expanding features if there are multiple prompts. Defaults to 1. + + Returns: + vis_feats (torch.Tensor): The visual features extracted from the image. + vis_pos_embed (torch.Tensor): The positional embeddings for the visual features. + feat_sizes (List(Tuple[int])): A list containing the sizes of the extracted features. + + Note: + - If `batch` is greater than 1, the features are expanded to fit the batch size. + - The method leverages the model's `_prepare_backbone_features` method to prepare the backbone features. + """ + backbone_out = self.model.forward_image(im) + if batch > 1: # expand features if there's more than one prompt + for i, feat in enumerate(backbone_out["backbone_fpn"]): + backbone_out["backbone_fpn"][i] = feat.expand(batch, -1, -1, -1) + for i, pos in enumerate(backbone_out["vision_pos_enc"]): + pos = pos.expand(batch, -1, -1, -1) + backbone_out["vision_pos_enc"][i] = pos + _, vis_feats, vis_pos_embed, feat_sizes = self.model._prepare_backbone_features(backbone_out) + return vis_feats, vis_pos_embed, feat_sizes + + def _obj_id_to_idx(self, obj_id): + """ + Map client-side object id to model-side object index. + + Args: + obj_id (int): The unique identifier of the object provided by the client side. + + Returns: + obj_idx (int): The index of the object on the model side. + + Raises: + RuntimeError: If an attempt is made to add a new object after tracking has started. + + Note: + - The method updates or retrieves mappings between object IDs and indices stored in + `inference_state`. + - It ensures that new objects can only be added before tracking commences. + - It maintains two-way mappings between IDs and indices (`obj_id_to_idx` and `obj_idx_to_id`). + - Additional data structures are initialized for the new object to store inputs and outputs. + """ + obj_idx = self.inference_state["obj_id_to_idx"].get(obj_id, None) + if obj_idx is not None: + return obj_idx + + # This is a new object id not sent to the server before. We only allow adding + # new objects *before* the tracking starts. + allow_new_object = not self.inference_state["tracking_has_started"] + if allow_new_object: + # get the next object slot + obj_idx = len(self.inference_state["obj_id_to_idx"]) + self.inference_state["obj_id_to_idx"][obj_id] = obj_idx + self.inference_state["obj_idx_to_id"][obj_idx] = obj_id + self.inference_state["obj_ids"] = list(self.inference_state["obj_id_to_idx"]) + # set up input and output structures for this object + self.inference_state["point_inputs_per_obj"][obj_idx] = {} + self.inference_state["mask_inputs_per_obj"][obj_idx] = {} + self.inference_state["output_dict_per_obj"][obj_idx] = { + "cond_frame_outputs": {}, # dict containing {frame_idx: } + "non_cond_frame_outputs": {}, # dict containing {frame_idx: } + } + self.inference_state["temp_output_dict_per_obj"][obj_idx] = { + "cond_frame_outputs": {}, # dict containing {frame_idx: } + "non_cond_frame_outputs": {}, # dict containing {frame_idx: } + } + return obj_idx + else: + raise RuntimeError( + f"Cannot add new object id {obj_id} after tracking starts. " + f"All existing object ids: {self.inference_state['obj_ids']}. " + f"Please call 'reset_state' to restart from scratch." + ) + + def _run_single_frame_inference( + self, + output_dict, + frame_idx, + batch_size, + is_init_cond_frame, + point_inputs, + mask_inputs, + reverse, + run_mem_encoder, + prev_sam_mask_logits=None, + ): + """ + Run tracking on a single frame based on current inputs and previous memory. + + Args: + output_dict (Dict): The dictionary containing the output states of the tracking process. + frame_idx (int): The index of the current frame. + batch_size (int): The batch size for processing the frame. + is_init_cond_frame (bool): Indicates if the current frame is an initial conditioning frame. + point_inputs (Dict, Optional): Input points and their labels. Defaults to None. + mask_inputs (torch.Tensor, Optional): Input binary masks. Defaults to None. + reverse (bool): Indicates if the tracking should be performed in reverse order. + run_mem_encoder (bool): Indicates if the memory encoder should be executed. + prev_sam_mask_logits (torch.Tensor, Optional): Previous mask logits for the current object. Defaults to None. + + Returns: + current_out (dict): A dictionary containing the output of the tracking step, including updated features and predictions. + + Raises: + AssertionError: If both `point_inputs` and `mask_inputs` are provided, or neither is provided. + + Note: + - The method assumes that `point_inputs` and `mask_inputs` are mutually exclusive. + - The method retrieves image features using the `get_im_features` method. + - The `maskmem_pos_enc` is assumed to be constant across frames, hence only one copy is stored. + - The `fill_holes_in_mask_scores` function is commented out and currently unsupported due to CUDA extension requirements. + """ + # Retrieve correct image features + current_vision_feats, current_vision_pos_embeds, feat_sizes = self.get_im_features( + self.inference_state["im"], batch_size + ) + + # point and mask should not appear as input simultaneously on the same frame + assert point_inputs is None or mask_inputs is None + current_out = self.model.track_step( + frame_idx=frame_idx, + is_init_cond_frame=is_init_cond_frame, + current_vision_feats=current_vision_feats, + current_vision_pos_embeds=current_vision_pos_embeds, + feat_sizes=feat_sizes, + point_inputs=point_inputs, + mask_inputs=mask_inputs, + output_dict=output_dict, + num_frames=self.inference_state["num_frames"], + track_in_reverse=reverse, + run_mem_encoder=run_mem_encoder, + prev_sam_mask_logits=prev_sam_mask_logits, + ) + + maskmem_features = current_out["maskmem_features"] + if maskmem_features is not None: + current_out["maskmem_features"] = maskmem_features.to( + dtype=torch.float16, device=self.device, non_blocking=True + ) + # NOTE: Do not support the `fill_holes_in_mask_scores` function since it needs cuda extensions + # potentially fill holes in the predicted masks + # if self.fill_hole_area > 0: + # pred_masks = current_out["pred_masks"].to(self.device, non_blocking=True) + # pred_masks = fill_holes_in_mask_scores(pred_masks, self.fill_hole_area) + + # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it + current_out["maskmem_pos_enc"] = self._get_maskmem_pos_enc(current_out["maskmem_pos_enc"]) + return current_out + + def _get_maskmem_pos_enc(self, out_maskmem_pos_enc): + """ + Caches and manages the positional encoding for mask memory across frames and objects. + + This method optimizes storage by caching the positional encoding (`maskmem_pos_enc`) for + mask memory, which is constant across frames and objects, thus reducing the amount of + redundant information stored during an inference session. It checks if the positional + encoding has already been cached; if not, it caches a slice of the provided encoding. + If the batch size is greater than one, it expands the cached positional encoding to match + the current batch size. + + Args: + out_maskmem_pos_enc (List[torch.Tensor] or None): The positional encoding for mask memory. + Should be a list of tensors or None. + + Returns: + out_maskmem_pos_enc (List[torch.Tensor]): The positional encoding for mask memory, either cached or expanded. + + Note: + - The method assumes that `out_maskmem_pos_enc` is a list of tensors or None. + - Only a single object's slice is cached since the encoding is the same across objects. + - The method checks if the positional encoding has already been cached in the session's constants. + - If the batch size is greater than one, the cached encoding is expanded to fit the batch size. + """ + model_constants = self.inference_state["constants"] + # "out_maskmem_pos_enc" should be either a list of tensors or None + if out_maskmem_pos_enc is not None: + if "maskmem_pos_enc" not in model_constants: + assert isinstance(out_maskmem_pos_enc, list) + # only take the slice for one object, since it's same across objects + maskmem_pos_enc = [x[:1].clone() for x in out_maskmem_pos_enc] + model_constants["maskmem_pos_enc"] = maskmem_pos_enc + else: + maskmem_pos_enc = model_constants["maskmem_pos_enc"] + # expand the cached maskmem_pos_enc to the actual batch size + batch_size = out_maskmem_pos_enc[0].size(0) + if batch_size > 1: + out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc] + return out_maskmem_pos_enc + + def _consolidate_temp_output_across_obj( + self, + frame_idx, + is_cond=False, + run_mem_encoder=False, + ): + """ + Consolidates per-object temporary outputs into a single output for all objects. + + This method combines the temporary outputs for each object on a given frame into a unified + output. It fills in any missing objects either from the main output dictionary or leaves + placeholders if they do not exist in the main output. Optionally, it can re-run the memory + encoder after applying non-overlapping constraints to the object scores. + + Args: + frame_idx (int): The index of the frame for which to consolidate outputs. + is_cond (bool, Optional): Indicates if the frame is considered a conditioning frame. + Defaults to False. + run_mem_encoder (bool, Optional): Specifies whether to run the memory encoder after + consolidating the outputs. Defaults to False. + + Returns: + consolidated_out (dict): A consolidated output dictionary containing the combined results for all objects. + + Note: + - The method initializes the consolidated output with placeholder values for missing objects. + - It searches for outputs in both the temporary and main output dictionaries. + - If `run_mem_encoder` is True, it applies non-overlapping constraints and re-runs the memory encoder. + - The `maskmem_features` and `maskmem_pos_enc` are only populated when `run_mem_encoder` is True. + """ + batch_size = len(self.inference_state["obj_idx_to_id"]) + storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs" + + # Initialize `consolidated_out`. Its "maskmem_features" and "maskmem_pos_enc" + # will be added when rerunning the memory encoder after applying non-overlapping + # constraints to object scores. Its "pred_masks" are prefilled with a large + # negative value (NO_OBJ_SCORE) to represent missing objects. + consolidated_out = { + "maskmem_features": None, + "maskmem_pos_enc": None, + "pred_masks": torch.full( + size=(batch_size, 1, self.imgsz[0] // 4, self.imgsz[1] // 4), + fill_value=-1024.0, + dtype=torch.float32, + device=self.device, + ), + "obj_ptr": torch.full( + size=(batch_size, self.model.hidden_dim), + fill_value=-1024.0, + dtype=torch.float32, + device=self.device, + ), + "object_score_logits": torch.full( + size=(batch_size, 1), + # default to 10.0 for object_score_logits, i.e. assuming the object is + # present as sigmoid(10)=1, same as in `predict_masks` of `MaskDecoder` + fill_value=10.0, + dtype=torch.float32, + device=self.device, + ), + } + for obj_idx in range(batch_size): + obj_temp_output_dict = self.inference_state["temp_output_dict_per_obj"][obj_idx] + obj_output_dict = self.inference_state["output_dict_per_obj"][obj_idx] + out = ( + obj_temp_output_dict[storage_key].get(frame_idx) + # If the object doesn't appear in "temp_output_dict_per_obj" on this frame, + # we fall back and look up its previous output in "output_dict_per_obj". + # We look up both "cond_frame_outputs" and "non_cond_frame_outputs" in + # "output_dict_per_obj" to find a previous output for this object. + or obj_output_dict["cond_frame_outputs"].get(frame_idx) + or obj_output_dict["non_cond_frame_outputs"].get(frame_idx) + ) + # If the object doesn't appear in "output_dict_per_obj" either, we skip it + # and leave its mask scores to the default scores (i.e. the NO_OBJ_SCORE + # placeholder above) and set its object pointer to be a dummy pointer. + if out is None: + # Fill in dummy object pointers for those objects without any inputs or + # tracking outcomes on this frame (only do it under `run_mem_encoder=True`, + # i.e. when we need to build the memory for tracking). + if run_mem_encoder: + # fill object pointer with a dummy pointer (based on an empty mask) + consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = self._get_empty_mask_ptr(frame_idx) + continue + # Add the temporary object output mask to consolidated output mask + consolidated_out["pred_masks"][obj_idx : obj_idx + 1] = out["pred_masks"] + consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = out["obj_ptr"] + + # Optionally, apply non-overlapping constraints on the consolidated scores and rerun the memory encoder + if run_mem_encoder: + high_res_masks = F.interpolate( + consolidated_out["pred_masks"], + size=self.imgsz, + mode="bilinear", + align_corners=False, + ) + if self.model.non_overlap_masks_for_mem_enc: + high_res_masks = self.model._apply_non_overlapping_constraints(high_res_masks) + consolidated_out["maskmem_features"], consolidated_out["maskmem_pos_enc"] = self._run_memory_encoder( + batch_size=batch_size, + high_res_masks=high_res_masks, + is_mask_from_pts=True, # these frames are what the user interacted with + object_score_logits=consolidated_out["object_score_logits"], + ) + + return consolidated_out + + def _get_empty_mask_ptr(self, frame_idx): + """ + Get a dummy object pointer based on an empty mask on the current frame. + + Args: + frame_idx (int): The index of the current frame for which to generate the dummy object pointer. + + Returns: + (torch.Tensor): A tensor representing the dummy object pointer generated from the empty mask. + """ + # Retrieve correct image features + current_vision_feats, current_vision_pos_embeds, feat_sizes = self.get_im_features(self.inference_state["im"]) + + # Feed the empty mask and image feature above to get a dummy object pointer + current_out = self.model.track_step( + frame_idx=frame_idx, + is_init_cond_frame=True, + current_vision_feats=current_vision_feats, + current_vision_pos_embeds=current_vision_pos_embeds, + feat_sizes=feat_sizes, + point_inputs=None, + # A dummy (empty) mask with a single object + mask_inputs=torch.zeros((1, 1, *self.imgsz), dtype=torch.float32, device=self.device), + output_dict={}, + num_frames=self.inference_state["num_frames"], + track_in_reverse=False, + run_mem_encoder=False, + prev_sam_mask_logits=None, + ) + return current_out["obj_ptr"] + + def _run_memory_encoder(self, batch_size, high_res_masks, object_score_logits, is_mask_from_pts): + """ + Run the memory encoder on masks. + + This is usually after applying non-overlapping constraints to object scores. Since their scores changed, their + memory also needs to be computed again with the memory encoder. + + Args: + batch_size (int): The batch size for processing the frame. + high_res_masks (torch.Tensor): High-resolution masks for which to compute the memory. + object_score_logits (torch.Tensor): Logits representing the object scores. + is_mask_from_pts (bool): Indicates if the mask is derived from point interactions. + + Returns: + (tuple[torch.Tensor, torch.Tensor]): A tuple containing the encoded mask features and positional encoding. + """ + # Retrieve correct image features + current_vision_feats, _, feat_sizes = self.get_im_features(self.inference_state["im"], batch_size) + maskmem_features, maskmem_pos_enc = self.model._encode_new_memory( + current_vision_feats=current_vision_feats, + feat_sizes=feat_sizes, + pred_masks_high_res=high_res_masks, + is_mask_from_pts=is_mask_from_pts, + object_score_logits=object_score_logits, + ) + + # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it + maskmem_pos_enc = self._get_maskmem_pos_enc(maskmem_pos_enc) + return maskmem_features.to(dtype=torch.float16, device=self.device, non_blocking=True), maskmem_pos_enc + + def _add_output_per_object(self, frame_idx, current_out, storage_key): + """ + Split a multi-object output into per-object output slices and add them into Output_Dict_Per_Obj. + + The resulting slices share the same tensor storage. + + Args: + frame_idx (int): The index of the current frame. + current_out (Dict): The current output dictionary containing multi-object outputs. + storage_key (str): The key used to store the output in the per-object output dictionary. + """ + maskmem_features = current_out["maskmem_features"] + assert maskmem_features is None or isinstance(maskmem_features, torch.Tensor) + + maskmem_pos_enc = current_out["maskmem_pos_enc"] + assert maskmem_pos_enc is None or isinstance(maskmem_pos_enc, list) + + for obj_idx, obj_output_dict in self.inference_state["output_dict_per_obj"].items(): + obj_slice = slice(obj_idx, obj_idx + 1) + obj_out = { + "maskmem_features": None, + "maskmem_pos_enc": None, + "pred_masks": current_out["pred_masks"][obj_slice], + "obj_ptr": current_out["obj_ptr"][obj_slice], + } + if maskmem_features is not None: + obj_out["maskmem_features"] = maskmem_features[obj_slice] + if maskmem_pos_enc is not None: + obj_out["maskmem_pos_enc"] = [x[obj_slice] for x in maskmem_pos_enc] + obj_output_dict[storage_key][frame_idx] = obj_out + + def _clear_non_cond_mem_around_input(self, frame_idx): + """ + Remove the non-conditioning memory around the input frame. + + When users provide correction clicks, the surrounding frames' non-conditioning memories can still contain outdated + object appearance information and could confuse the model. This method clears those non-conditioning memories + surrounding the interacted frame to avoid giving the model both old and new information about the object. + + Args: + frame_idx (int): The index of the current frame where user interaction occurred. + """ + r = self.model.memory_temporal_stride_for_eval + frame_idx_begin = frame_idx - r * self.model.num_maskmem + frame_idx_end = frame_idx + r * self.model.num_maskmem + for t in range(frame_idx_begin, frame_idx_end + 1): + self.inference_state["output_dict"]["non_cond_frame_outputs"].pop(t, None) + for obj_output_dict in self.inference_state["output_dict_per_obj"].values(): + obj_output_dict["non_cond_frame_outputs"].pop(t, None) diff --git a/ultralytics/models/utils/__init__.py b/ultralytics/models/utils/__init__.py index 9e68dc12245..77a19dcf0f8 100644 --- a/ultralytics/models/utils/__init__.py +++ b/ultralytics/models/utils/__init__.py @@ -1 +1 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py index a42b13413fd..42f437439b8 100644 --- a/ultralytics/models/utils/loss.py +++ b/ultralytics/models/utils/loss.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch import torch.nn as nn @@ -243,12 +243,11 @@ def _get_loss( if len(gt_bboxes): gt_scores[idx] = bbox_iou(pred_bboxes.detach(), gt_bboxes, xywh=True).squeeze(-1) - loss = {} - loss.update(self._get_loss_class(pred_scores, targets, gt_scores, len(gt_bboxes), postfix)) - loss.update(self._get_loss_bbox(pred_bboxes, gt_bboxes, postfix)) - # if masks is not None and gt_mask is not None: - # loss.update(self._get_loss_mask(masks, gt_mask, match_indices, postfix)) - return loss + return { + **self._get_loss_class(pred_scores, targets, gt_scores, len(gt_bboxes), postfix), + **self._get_loss_bbox(pred_bboxes, gt_bboxes, postfix), + # **(self._get_loss_mask(masks, gt_mask, match_indices, postfix) if masks is not None and gt_mask is not None else {}) + } def forward(self, pred_bboxes, pred_scores, batch, postfix="", **kwargs): """ diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py index 51dba006c8d..7133072ec14 100644 --- a/ultralytics/models/utils/ops.py +++ b/ultralytics/models/utils/ops.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch import torch.nn as nn @@ -172,7 +172,7 @@ def get_cdn_group( bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn' is less than or equal to 0, the function returns None for all elements in the tuple. """ - if (not training) or num_dn <= 0: + if (not training) or num_dn <= 0 or batch is None: return None, None, None, None gt_groups = batch["gt_groups"] total_num = sum(gt_groups) diff --git a/ultralytics/models/yolo/__init__.py b/ultralytics/models/yolo/__init__.py index e31144d47a8..82450fd436f 100644 --- a/ultralytics/models/yolo/__init__.py +++ b/ultralytics/models/yolo/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.models.yolo import classify, detect, obb, pose, segment, world from ultralytics.models.yolo import regress diff --git a/ultralytics/models/yolo/classify/__init__.py b/ultralytics/models/yolo/classify/__init__.py index ca92f892de4..3a10629229f 100644 --- a/ultralytics/models/yolo/classify/__init__.py +++ b/ultralytics/models/yolo/classify/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.models.yolo.classify.predict import ClassificationPredictor from ultralytics.models.yolo.classify.train import ClassificationTrainer diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py index 596931a176a..cf5314585cb 100644 --- a/ultralytics/models/yolo/classify/predict.py +++ b/ultralytics/models/yolo/classify/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import cv2 import torch @@ -21,7 +21,7 @@ class ClassificationPredictor(BasePredictor): from ultralytics.utils import ASSETS from ultralytics.models.yolo.classify import ClassificationPredictor - args = dict(model="yolov8n-cls.pt", source=ASSETS) + args = dict(model="yolo11n-cls.pt", source=ASSETS) predictor = ClassificationPredictor(overrides=args) predictor.predict_cli() ``` @@ -53,6 +53,7 @@ def postprocess(self, preds, img, orig_imgs): if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + preds = preds[0] if isinstance(preds, (list, tuple)) else preds return [ Results(orig_img, path=img_path, names=self.model.names, probs=pred) for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]) diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py index e51349fa989..1eb638a4a40 100644 --- a/ultralytics/models/yolo/classify/train.py +++ b/ultralytics/models/yolo/classify/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from copy import copy @@ -8,7 +8,7 @@ from ultralytics.engine.trainer import BaseTrainer from ultralytics.models import yolo from ultralytics.nn.tasks import ClassificationModel -from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK, colorstr +from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK from ultralytics.utils.plotting import plot_images, plot_results from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first @@ -24,7 +24,7 @@ class ClassificationTrainer(BaseTrainer): ```python from ultralytics.models.yolo.classify import ClassificationTrainer - args = dict(model="yolov8n-cls.pt", data="imagenet10", epochs=3) + args = dict(model="yolo11n-cls.pt", data="imagenet10", epochs=3) trainer = ClassificationTrainer(overrides=args) trainer.train() ``` @@ -141,7 +141,6 @@ def final_eval(self): self.metrics = self.validator(model=f) self.metrics.pop("fitness", None) self.run_callbacks("on_fit_epoch_end") - LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}") def plot_training_samples(self, batch, ni): """Plots training samples with their annotations.""" diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py index e54f04118a2..51aa01f4861 100644 --- a/ultralytics/models/yolo/classify/val.py +++ b/ultralytics/models/yolo/classify/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch @@ -20,7 +20,7 @@ class ClassificationValidator(BaseValidator): ```python from ultralytics.models.yolo.classify import ClassificationValidator - args = dict(model="yolov8n-cls.pt", data="imagenet10") + args = dict(model="yolo11n-cls.pt", data="imagenet10") validator = ClassificationValidator(args=args) validator() ``` @@ -71,6 +71,10 @@ def finalize_metrics(self, *args, **kwargs): self.metrics.confusion_matrix = self.confusion_matrix self.metrics.save_dir = self.save_dir + def postprocess(self, preds): + """Preprocesses the classification predictions.""" + return preds[0] if isinstance(preds, (list, tuple)) else preds + def get_stats(self): """Returns a dictionary of metrics obtained by processing targets and predictions.""" self.metrics.process(self.targets, self.pred) diff --git a/ultralytics/models/yolo/detect/__init__.py b/ultralytics/models/yolo/detect/__init__.py index 5f3e62c1862..caece94ae0c 100644 --- a/ultralytics/models/yolo/detect/__init__.py +++ b/ultralytics/models/yolo/detect/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .predict import DetectionPredictor from .train import DetectionTrainer diff --git a/ultralytics/models/yolo/detect/predict.py b/ultralytics/models/yolo/detect/predict.py index 6dbbb15e459..23c31f6a396 100644 --- a/ultralytics/models/yolo/detect/predict.py +++ b/ultralytics/models/yolo/detect/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.engine.predictor import BasePredictor from ultralytics.engine.results import Results @@ -15,13 +15,13 @@ class DetectionPredictor(BasePredictor): from ultralytics.utils import ASSETS from ultralytics.models.yolo.detect import DetectionPredictor - args = dict(model="yolov8n.pt", source=ASSETS) + args = dict(model="yolo11n.pt", source=ASSETS) predictor = DetectionPredictor(overrides=args) predictor.predict_cli() ``` """ - def postprocess(self, preds, img, orig_imgs): + def postprocess(self, preds, img, orig_imgs, **kwargs): """Post-processes predictions and returns a list of Results objects.""" if self.separate_outputs: # Quant friendly export with separated outputs preds = decode_bbox(preds, img.shape, self.device) @@ -30,16 +30,48 @@ def postprocess(self, preds, img, orig_imgs): preds, self.args.conf, self.args.iou, - agnostic=self.args.agnostic_nms, + self.args.classes, + self.args.agnostic_nms, max_det=self.args.max_det, - classes=self.args.classes, + nc=len(self.model.names), + end2end=getattr(self.model, "end2end", False), + rotated=self.args.task == "obb", ) if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) - results = [] - for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]): - pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) - results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred)) - return results + return self.construct_results(preds, img, orig_imgs, **kwargs) + + def construct_results(self, preds, img, orig_imgs): + """ + Constructs a list of result objects from the predictions. + + Args: + preds (List[torch.Tensor]): List of predicted bounding boxes and scores. + img (torch.Tensor): The image after preprocessing. + orig_imgs (List[np.ndarray]): List of original images before preprocessing. + + Returns: + (list): List of result objects containing the original images, image paths, class names, and bounding boxes. + """ + return [ + self.construct_result(pred, img, orig_img, img_path) + for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]) + ] + + def construct_result(self, pred, img, orig_img, img_path): + """ + Constructs the result object from the prediction. + + Args: + pred (torch.Tensor): The predicted bounding boxes and scores. + img (torch.Tensor): The image after preprocessing. + orig_img (np.ndarray): The original image before preprocessing. + img_path (str): The path to the original image. + + Returns: + (Results): The result object containing the original image, image path, class names, and bounding boxes. + """ + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6]) diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py index 5be24c946ed..eea16e73af1 100644 --- a/ultralytics/models/yolo/detect/train.py +++ b/ultralytics/models/yolo/detect/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import math import random @@ -24,7 +24,7 @@ class DetectionTrainer(BaseTrainer): ```python from ultralytics.models.yolo.detect import DetectionTrainer - args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3) + args = dict(model="yolo11n.pt", data="coco8.yaml", epochs=3) trainer = DetectionTrainer(overrides=args) trainer.train() ``` @@ -141,3 +141,10 @@ def plot_training_labels(self): boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0) cls = np.concatenate([lb["cls"] for lb in self.train_loader.dataset.labels], 0) plot_labels(boxes, cls.squeeze(), names=self.data["names"], save_dir=self.save_dir, on_plot=self.on_plot) + + def auto_batch(self): + """Get batch size by calculating memory occupation of model.""" + train_dataset = self.build_dataset(self.trainset, mode="train", batch=16) + # 4 for mosaic augmentation + max_num_obj = max(len(label["cls"]) for label in train_dataset.labels) * 4 + return super().auto_batch(max_num_obj) diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py index f36290534f1..55274f92c75 100644 --- a/ultralytics/models/yolo/detect/val.py +++ b/ultralytics/models/yolo/detect/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import os from pathlib import Path @@ -23,7 +23,7 @@ class DetectionValidator(BaseValidator): ```python from ultralytics.models.yolo.detect import DetectionValidator - args = dict(model="yolov8n.pt", data="coco8.yaml") + args = dict(model="yolo11n.pt", data="coco8.yaml") validator = DetectionValidator(args=args) validator() ``` @@ -75,10 +75,11 @@ def init_metrics(self, model): and (val.endswith(f"{os.sep}val2017.txt") or val.endswith(f"{os.sep}test-dev2017.txt")) ) # is COCO self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS - self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(len(model.names))) - self.args.save_json |= (self.is_coco or self.is_lvis) and not self.training # run on final val if training COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1, len(model.names) + 1)) + self.args.save_json |= self.args.val and (self.is_coco or self.is_lvis) and not self.training # run final val self.names = model.names self.nc = len(model.names) + self.end2end = getattr(model, "end2end", False) self.metrics.names = self.names self.metrics.plot = self.args.plots self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf) @@ -100,9 +101,12 @@ def postprocess(self, preds, img_shape): self.args.conf, self.args.iou, labels=self.lb, + nc=self.nc, multi_label=True, agnostic=self.args.single_cls or self.args.agnostic_nms, max_det=self.args.max_det, + end2end=self.end2end, + rotated=self.args.task == "obb", ) def _prepare_batch(self, si, batch): @@ -159,8 +163,8 @@ def update_metrics(self, preds, batch): # Evaluate if nl: stat["tp"] = self._process_batch(predn, bbox, cls) - if self.args.plots: - self.confusion_matrix.process_batch(predn, bbox, cls) + if self.args.plots: + self.confusion_matrix.process_batch(predn, bbox, cls) for k in self.stats.keys(): self.stats[k].append(stat[k]) @@ -172,7 +176,7 @@ def update_metrics(self, preds, batch): predn, self.args.save_conf, pbatch["ori_shape"], - self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt', + self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt", ) def finalize_metrics(self, *args, **kwargs): @@ -292,8 +296,7 @@ def pred_to_json(self, predn, filename): self.jdict.append( { "image_id": image_id, - "category_id": self.class_map[int(p[5])] - + (1 if self.is_lvis else 0), # index starts from 1 if it's lvis + "category_id": self.class_map[int(p[5])], "bbox": [round(x, 3) for x in b], "score": round(p[4], 5), } diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py index 6b5269d500d..f34ac4b6867 100644 --- a/ultralytics/models/yolo/model.py +++ b/ultralytics/models/yolo/model.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from pathlib import Path @@ -12,7 +12,7 @@ class YOLO(Model): """YOLO (You Only Look Once) object detection model.""" - def __init__(self, model="yolov8n.pt", task=None, verbose=False): + def __init__(self, model="yolo11n.pt", task=None, verbose=False): """Initialize YOLO model, switching to YOLOWorld if model filename contains '-world'.""" path = Path(model) if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model diff --git a/ultralytics/models/yolo/obb/__init__.py b/ultralytics/models/yolo/obb/__init__.py index f60349a79d1..61e3e3c6a82 100644 --- a/ultralytics/models/yolo/obb/__init__.py +++ b/ultralytics/models/yolo/obb/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .predict import OBBPredictor from .train import OBBTrainer diff --git a/ultralytics/models/yolo/obb/predict.py b/ultralytics/models/yolo/obb/predict.py index 19308cb6c44..ef6214d4213 100644 --- a/ultralytics/models/yolo/obb/predict.py +++ b/ultralytics/models/yolo/obb/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch @@ -16,7 +16,7 @@ class OBBPredictor(DetectionPredictor): from ultralytics.utils import ASSETS from ultralytics.models.yolo.obb import OBBPredictor - args = dict(model="yolov8n-obb.pt", source=ASSETS) + args = dict(model="yolo11n-obb.pt", source=ASSETS) predictor = OBBPredictor(overrides=args) predictor.predict_cli() ``` @@ -27,27 +27,20 @@ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): super().__init__(cfg, overrides, _callbacks) self.args.task = "obb" - def postprocess(self, preds, img, orig_imgs): - """Post-processes predictions and returns a list of Results objects.""" - preds = ops.non_max_suppression( - preds, - self.args.conf, - self.args.iou, - agnostic=self.args.agnostic_nms, - max_det=self.args.max_det, - nc=len(self.model.names), - classes=self.args.classes, - rotated=True, - ) - - if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list - orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) - - results = [] - for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]): - rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1)) - rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True) - # xywh, r, conf, cls - obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1) - results.append(Results(orig_img, path=img_path, names=self.model.names, obb=obb)) - return results + def construct_result(self, pred, img, orig_img, img_path): + """ + Constructs the result object from the prediction. + + Args: + pred (torch.Tensor): The predicted bounding boxes, scores, and rotation angles. + img (torch.Tensor): The image after preprocessing. + orig_img (np.ndarray): The original image before preprocessing. + img_path (str): The path to the original image. + + Returns: + (Results): The result object containing the original image, image path, class names, and oriented bounding boxes. + """ + rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1)) + rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True) + obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1) + return Results(orig_img, path=img_path, names=self.model.names, obb=obb) diff --git a/ultralytics/models/yolo/obb/train.py b/ultralytics/models/yolo/obb/train.py index 18def219748..c88272b1545 100644 --- a/ultralytics/models/yolo/obb/train.py +++ b/ultralytics/models/yolo/obb/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from copy import copy @@ -15,7 +15,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer): ```python from ultralytics.models.yolo.obb import OBBTrainer - args = dict(model="yolov8n-obb.pt", data="dota8.yaml", epochs=3) + args = dict(model="yolo11n-obb.pt", data="dota8.yaml", epochs=3) trainer = OBBTrainer(overrides=args) trainer.train() ``` @@ -39,4 +39,6 @@ def get_model(self, cfg=None, weights=None, verbose=True): def get_validator(self): """Return an instance of OBBValidator for validation of YOLO model.""" self.loss_names = "box_loss", "cls_loss", "dfl_loss" - return yolo.obb.OBBValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args)) + return yolo.obb.OBBValidator( + self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks + ) diff --git a/ultralytics/models/yolo/obb/val.py b/ultralytics/models/yolo/obb/val.py index 93bb2bfabb2..b5cb89f1452 100644 --- a/ultralytics/models/yolo/obb/val.py +++ b/ultralytics/models/yolo/obb/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from pathlib import Path @@ -18,7 +18,7 @@ class OBBValidator(DetectionValidator): ```python from ultralytics.models.yolo.obb import OBBValidator - args = dict(model="yolov8n-obb.pt", data="dota8.yaml") + args = dict(model="yolo11n-obb.pt", data="dota8.yaml") validator = OBBValidator(args=args) validator(model=args["model"]) ``` @@ -36,20 +36,6 @@ def init_metrics(self, model): val = self.data.get(self.args.split, "") # validation path self.is_dota = isinstance(val, str) and "DOTA" in val # is COCO - def postprocess(self, preds): - """Apply Non-maximum suppression to prediction outputs.""" - return ops.non_max_suppression( - preds, - self.args.conf, - self.args.iou, - labels=self.lb, - nc=self.nc, - multi_label=True, - agnostic=self.args.single_cls or self.args.agnostic_nms, - max_det=self.args.max_det, - rotated=True, - ) - def _process_batch(self, detections, gt_bboxes, gt_cls): """ Perform computation of the correct prediction matrix for a batch of detections and ground truth bounding boxes. @@ -160,10 +146,10 @@ def eval_json(self, stats): for d in data: image_id = d["image_id"] score = d["score"] - classname = self.names[d["category_id"]].replace(" ", "-") + classname = self.names[d["category_id"] - 1].replace(" ", "-") p = d["poly"] - with open(f'{pred_txt / f"Task1_{classname}"}.txt', "a") as f: + with open(f"{pred_txt / f'Task1_{classname}'}.txt", "a") as f: f.writelines(f"{image_id} {score} {p[0]} {p[1]} {p[2]} {p[3]} {p[4]} {p[5]} {p[6]} {p[7]}\n") # Save merged results, this could result slightly lower map than using official merging script, # because of the probiou calculation. @@ -175,7 +161,7 @@ def eval_json(self, stats): image_id = d["image_id"].split("__")[0] pattern = re.compile(r"\d+___\d+") x, y = (int(c) for c in re.findall(pattern, d["image_id"])[0].split("___")) - bbox, score, cls = d["rbox"], d["score"], d["category_id"] + bbox, score, cls = d["rbox"], d["score"], d["category_id"] - 1 bbox[0] += x bbox[1] += y bbox.extend([score, cls]) @@ -197,7 +183,7 @@ def eval_json(self, stats): p = [round(i, 3) for i in x[:-2]] # poly score = round(x[-2], 3) - with open(f'{pred_merged_txt / f"Task1_{classname}"}.txt', "a") as f: + with open(f"{pred_merged_txt / f'Task1_{classname}'}.txt", "a") as f: f.writelines(f"{image_id} {score} {p[0]} {p[1]} {p[2]} {p[3]} {p[4]} {p[5]} {p[6]} {p[7]}\n") return stats diff --git a/ultralytics/models/yolo/pose/__init__.py b/ultralytics/models/yolo/pose/__init__.py index d56694301f1..396167b08f8 100644 --- a/ultralytics/models/yolo/pose/__init__.py +++ b/ultralytics/models/yolo/pose/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .predict import PosePredictor from .train import PoseTrainer diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py index 6950b4aea37..b6d4440d700 100644 --- a/ultralytics/models/yolo/pose/predict.py +++ b/ultralytics/models/yolo/pose/predict.py @@ -1,5 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch from ultralytics.engine.results import Results @@ -17,7 +16,7 @@ class PosePredictor(DetectionPredictor): from ultralytics.utils import ASSETS from ultralytics.models.yolo.pose import PosePredictor - args = dict(model="yolov8n-pose.pt", source=ASSETS) + args = dict(model="yolo11n-pose.pt", source=ASSETS) predictor = PosePredictor(overrides=args) predictor.predict_cli() ``` @@ -82,3 +81,25 @@ def postprocess(self, preds, img, orig_imgs): Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], keypoints=pred_kpts) ) return results + + def construct_result(self, pred, img, orig_img, img_path): + """ + Constructs the result object from the prediction. + + Args: + pred (torch.Tensor): The predicted bounding boxes, scores, and keypoints. + img (torch.Tensor): The image after preprocessing. + orig_img (np.ndarray): The original image before preprocessing. + img_path (str): The path to the original image. + Returns: + (Results): The result object containing the original image, image path, class names, bounding boxes, and keypoints. + """ + + result = super().construct_result(pred, img, orig_img, img_path) + if self.separate_outputs: + pred_kpts = pred[:, 6:].view(len(pred), *kpt_shape) if len(pred) else pred[:, 6:] + else: + pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] + pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, orig_img.shape) + result.update(keypoints=pred_kpts) + return result \ No newline at end of file diff --git a/ultralytics/models/yolo/pose/train.py b/ultralytics/models/yolo/pose/train.py index e52f449cf82..4a4f0ced2fb 100644 --- a/ultralytics/models/yolo/pose/train.py +++ b/ultralytics/models/yolo/pose/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from copy import copy @@ -16,7 +16,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer): ```python from ultralytics.models.yolo.pose import PoseTrainer - args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3) + args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml", epochs=3) trainer = PoseTrainer(overrides=args) trainer.train() ``` diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py index 6b431a21def..909c35b229d 100644 --- a/ultralytics/models/yolo/pose/val.py +++ b/ultralytics/models/yolo/pose/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import os from pathlib import Path @@ -22,7 +22,7 @@ class PoseValidator(DetectionValidator): ```python from ultralytics.models.yolo.pose import PoseValidator - args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml") + args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml") validator = PoseValidator(args=args) validator() ``` @@ -151,8 +151,8 @@ def update_metrics(self, preds, batch): if nl: stat["tp"] = self._process_batch(predn, bbox, cls) stat["tp_p"] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch["kpts"]) - if self.args.plots: - self.confusion_matrix.process_batch(predn, bbox, cls) + if self.args.plots: + self.confusion_matrix.process_batch(predn, bbox, cls) for k in self.stats.keys(): self.stats[k].append(stat[k]) @@ -166,7 +166,7 @@ def update_metrics(self, preds, batch): pred_kpts, self.args.save_conf, pbatch["ori_shape"], - self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt', + self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt", ) def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=None): @@ -184,7 +184,7 @@ def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts= gt_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing ground truth keypoints. Returns: - torch.Tensor: A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels, + (torch.Tensor): A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels, where N is the number of detections. Example: diff --git a/ultralytics/models/yolo/segment/__init__.py b/ultralytics/models/yolo/segment/__init__.py index ec1ac7991a9..36a921a9a36 100644 --- a/ultralytics/models/yolo/segment/__init__.py +++ b/ultralytics/models/yolo/segment/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .predict import SegmentationPredictor from .train import SegmentationTrainer diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py index 3264d36aac5..444f2482ac3 100644 --- a/ultralytics/models/yolo/segment/predict.py +++ b/ultralytics/models/yolo/segment/predict.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch @@ -17,7 +17,7 @@ class SegmentationPredictor(DetectionPredictor): from ultralytics.utils import ASSETS from ultralytics.models.yolo.segment import SegmentationPredictor - args = dict(model="yolov8n-seg.pt", source=ASSETS) + args = dict(model="yolo11n-seg.pt", source=ASSETS) predictor = SegmentationPredictor(overrides=args) predictor.predict_cli() ``` @@ -70,4 +70,46 @@ def postprocess(self, preds, img, orig_imgs): masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) - return results \ No newline at end of file + return results + + def construct_results(self, preds, img, orig_imgs, protos): + """ + Constructs a list of result objects from the predictions. + + Args: + preds (List[torch.Tensor]): List of predicted bounding boxes, scores, and masks. + img (torch.Tensor): The image after preprocessing. + orig_imgs (List[np.ndarray]): List of original images before preprocessing. + protos (List[torch.Tensor]): List of prototype masks. + + Returns: + (list): List of result objects containing the original images, image paths, class names, bounding boxes, and masks. + """ + return [ + self.construct_result(pred, img, orig_img, img_path, proto) + for pred, orig_img, img_path, proto in zip(preds, orig_imgs, self.batch[0], protos) + ] + + def construct_result(self, pred, img, orig_img, img_path, proto): + """ + Constructs the result object from the prediction. + + Args: + pred (np.ndarray): The predicted bounding boxes, scores, and masks. + img (torch.Tensor): The image after preprocessing. + orig_img (np.ndarray): The original image before preprocessing. + img_path (str): The path to the original image. + proto (torch.Tensor): The prototype masks. + + Returns: + (Results): The result object containing the original image, image path, class names, bounding boxes, and masks. + """ + if not len(pred): # save empty boxes + masks = None + elif self.args.retina_masks: + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + masks = ops.process_mask_native(proto, pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC + else: + masks = ops.process_mask(proto, pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC + pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) + return Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks) diff --git a/ultralytics/models/yolo/segment/train.py b/ultralytics/models/yolo/segment/train.py index f5b25ed6115..0142ecccfa6 100644 --- a/ultralytics/models/yolo/segment/train.py +++ b/ultralytics/models/yolo/segment/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from copy import copy @@ -16,7 +16,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer): ```python from ultralytics.models.yolo.segment import SegmentationTrainer - args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml", epochs=3) + args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml", epochs=3) trainer = SegmentationTrainer(overrides=args) trainer.train() ``` diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py index 98708c93eab..1fbc9cc7ded 100644 --- a/ultralytics/models/yolo/segment/val.py +++ b/ultralytics/models/yolo/segment/val.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import os from multiprocessing.pool import ThreadPool @@ -24,7 +24,7 @@ class SegmentationValidator(DetectionValidator): ```python from ultralytics.models.yolo.segment import SegmentationValidator - args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml") + args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml") validator = SegmentationValidator(args=args) validator() ``` @@ -153,8 +153,8 @@ def update_metrics(self, preds, batch): stat["tp_m"] = self._process_batch( predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True ) - if self.args.plots: - self.confusion_matrix.process_batch(predn, bbox, cls) + if self.args.plots: + self.confusion_matrix.process_batch(predn, bbox, cls) for k in self.stats.keys(): self.stats[k].append(stat[k]) @@ -180,7 +180,7 @@ def update_metrics(self, preds, batch): pred_masks, self.args.save_conf, pbatch["ori_shape"], - self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt', + self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt", ) def finalize_metrics(self, *args, **kwargs): diff --git a/ultralytics/models/yolo/world/__init__.py b/ultralytics/models/yolo/world/__init__.py index 1d401999cdf..4380d244602 100644 --- a/ultralytics/models/yolo/world/__init__.py +++ b/ultralytics/models/yolo/world/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .train import WorldTrainer diff --git a/ultralytics/models/yolo/world/train.py b/ultralytics/models/yolo/world/train.py index 5fffd0974c8..1a16a2d1d1e 100644 --- a/ultralytics/models/yolo/world/train.py +++ b/ultralytics/models/yolo/world/train.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import itertools diff --git a/ultralytics/models/yolo/world/train_world.py b/ultralytics/models/yolo/world/train_world.py index df26986d9c5..3cbdb2a4e77 100644 --- a/ultralytics/models/yolo/world/train_world.py +++ b/ultralytics/models/yolo/world/train_world.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset from ultralytics.data.utils import check_det_dataset diff --git a/ultralytics/nn/__init__.py b/ultralytics/nn/__init__.py index 242b690865f..e6142fe38fa 100644 --- a/ultralytics/nn/__init__.py +++ b/ultralytics/nn/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .tasks import ( BaseModel, diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index dfda624d36d..07ea5bbd410 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -1,7 +1,6 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import ast -import contextlib import json import platform import zipfile @@ -14,8 +13,8 @@ import torch.nn as nn from PIL import Image -from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, ROOT, yaml_load -from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml +from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load +from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip from ultralytics.utils.downloads import attempt_download_asset, is_url @@ -45,8 +44,10 @@ def check_class_names(names): def default_class_names(data=None): """Applies default class names to an input YAML file or returns numerical class names.""" if data: - with contextlib.suppress(Exception): + try: return yaml_load(check_yaml(data))["names"] + except Exception: + pass return {i: f"class{i}" for i in range(999)} # return default if above errors @@ -58,21 +59,24 @@ class AutoBackend(nn.Module): range of formats, each with specific naming conventions as outlined below: Supported Formats and Naming Conventions: - | Format | File Suffix | - |-----------------------|------------------| - | PyTorch | *.pt | - | TorchScript | *.torchscript | - | ONNX Runtime | *.onnx | - | ONNX OpenCV DNN | *.onnx (dnn=True)| - | OpenVINO | *openvino_model/ | - | CoreML | *.mlpackage | - | TensorRT | *.engine | - | TensorFlow SavedModel | *_saved_model | - | TensorFlow GraphDef | *.pb | - | TensorFlow Lite | *.tflite | - | TensorFlow Edge TPU | *_edgetpu.tflite | - | PaddlePaddle | *_paddle_model | - | NCNN | *_ncnn_model | + | Format | File Suffix | + | --------------------- | ----------------- | + | PyTorch | *.pt | + | TorchScript | *.torchscript | + | ONNX Runtime | *.onnx | + | ONNX OpenCV DNN | *.onnx (dnn=True) | + | OpenVINO | *openvino_model/ | + | CoreML | *.mlpackage | + | TensorRT | *.engine | + | TensorFlow SavedModel | *_saved_model/ | + | TensorFlow GraphDef | *.pb | + | TensorFlow Lite | *.tflite | + | TensorFlow Edge TPU | *_edgetpu.tflite | + | PaddlePaddle | *_paddle_model/ | + | MNN | *.mnn | + | NCNN | *_ncnn_model/ | + | IMX | *_imx_model/ | + | RKNN | *_rknn_model/ | This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy models across various platforms. @@ -81,7 +85,7 @@ class AutoBackend(nn.Module): @torch.no_grad() def __init__( self, - weights="yolov8n.pt", + weights="yolo11n.pt", device=torch.device("cpu"), dnn=False, data=None, @@ -94,7 +98,7 @@ def __init__( Initialize the AutoBackend for inference. Args: - weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'. + weights (str | torch.nn.Module): Path to the model weights file or a module instance. Defaults to 'yolo11n.pt'. device (torch.device): Device to run the model on. Defaults to CPU. dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False. data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional. @@ -119,17 +123,21 @@ def __init__( edgetpu, tfjs, paddle, + mnn, ncnn, + imx, + rknn, triton, ) = self._model_type(w) fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16 - nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) + nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCWH) stride = 32 # default stride - model, metadata = None, None + end2end = False # default end2end + model, metadata, task = None, None, None # Set device cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA - if cuda and not any([nn_module, pt, jit, engine, onnx]): # GPU dataloader formats + if cuda and not any([nn_module, pt, jit, engine, onnx, paddle]): # GPU dataloader formats device = torch.device("cpu") cuda = False @@ -179,8 +187,8 @@ def __init__( check_requirements("opencv-python>=4.5.4") net = cv2.dnn.readNetFromONNX(w) - # ONNX Runtime - elif onnx: + # ONNX Runtime and IMX + elif onnx or imx: LOGGER.info(f"Loading {w} for ONNX Runtime inference...") check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime")) if IS_RASPBERRYPI or IS_JETSON: @@ -188,10 +196,49 @@ def __init__( check_requirements("numpy==1.23.5") import onnxruntime - providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"] - session = onnxruntime.InferenceSession(w, providers=providers) + providers = ["CPUExecutionProvider"] + if cuda and "CUDAExecutionProvider" in onnxruntime.get_available_providers(): + providers.insert(0, "CUDAExecutionProvider") + elif cuda: # Only log warning if CUDA was requested but unavailable + LOGGER.warning("WARNING โš ๏ธ Failed to start ONNX Runtime with CUDA. Using CPU...") + device = torch.device("cpu") + cuda = False + LOGGER.info(f"Using ONNX Runtime {providers[0]}") + if onnx: + session = onnxruntime.InferenceSession(w, providers=providers) + else: + check_requirements( + ["model-compression-toolkit==2.1.1", "sony-custom-layers[torch]==0.2.0", "onnxruntime-extensions"] + ) + w = next(Path(w).glob("*.onnx")) + LOGGER.info(f"Loading {w} for ONNX IMX inference...") + import mct_quantizers as mctq + from sony_custom_layers.pytorch.object_detection import nms_ort # noqa + + session = onnxruntime.InferenceSession( + w, mctq.get_ort_session_options(), providers=["CPUExecutionProvider"] + ) + task = "detect" + output_names = [x.name for x in session.get_outputs()] metadata = session.get_modelmeta().custom_metadata_map + dynamic = isinstance(session.get_outputs()[0].shape[0], str) + fp16 = True if "float16" in session.get_inputs()[0].type else False + if not dynamic: + io = session.io_binding() + bindings = [] + for output in session.get_outputs(): + out_fp16 = "float16" in output.type + y_tensor = torch.empty(output.shape, dtype=torch.float16 if out_fp16 else torch.float32).to(device) + io.bind_output( + name=output.name, + device_type=device.type, + device_id=device.index if cuda else 0, + element_type=np.float16 if out_fp16 else np.float32, + shape=tuple(y_tensor.shape), + buffer_ptr=y_tensor.data_ptr(), + ) + bindings.append(y_tensor) # OpenVINO elif xml: @@ -221,14 +268,19 @@ def __init__( # TensorRT elif engine: LOGGER.info(f"Loading {w} for TensorRT inference...") + + if IS_JETSON and PYTHON_VERSION <= "3.8.0": + # fix error: `np.bool` was a deprecated alias for the builtin `bool` for JetPack 4 with Python <= 3.8.0 + check_requirements("numpy==1.23.5") + try: import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download except ImportError: if LINUX: - check_requirements("tensorrt>7.0.0,<=10.1.0") + check_requirements("tensorrt>7.0.0,!=10.1.0") import tensorrt as trt # noqa check_version(trt.__version__, ">=7.0.0", hard=True) - check_version(trt.__version__, "<=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239") + check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239") if device.type == "cpu": device = torch.device("cuda:0") Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr")) @@ -241,6 +293,12 @@ def __init__( except UnicodeDecodeError: f.seek(0) # engine file may lack embedded Ultralytics metadata model = runtime.deserialize_cuda_engine(f.read()) # read engine + if "dla" in str(device.type): + dla_core = int(device.type.split(":")[1]) + assert dla_core in {0, 1}, ( + "Expected device type for inference in DLA is 'dla:0' or 'dla:1', but received '{device.type}'" + ) + runtime.DLA_core = dla_core # Model context try: @@ -264,8 +322,8 @@ def __init__( if -1 in tuple(model.get_tensor_shape(name)): dynamic = True context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1])) - if dtype == np.float16: - fp16 = True + if dtype == np.float16: + fp16 = True else: output_names.append(name) shape = tuple(context.get_tensor_shape(name)) @@ -321,8 +379,10 @@ def wrap_frozen_graph(gd, inputs, outputs): with open(w, "rb") as f: gd.ParseFromString(f.read()) frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd)) - with contextlib.suppress(StopIteration): # find metadata in SavedModel alongside GraphDef + try: # find metadata in SavedModel alongside GraphDef metadata = next(Path(w).resolve().parent.rglob(f"{Path(w).stem}_saved_model*/metadata.yaml")) + except StopIteration: + pass # TFLite or TFLite Edge TPU elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python @@ -333,11 +393,16 @@ def wrap_frozen_graph(gd, inputs, outputs): Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime - LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...") + device = device[3:] if str(device).startswith("tpu") else ":0" + LOGGER.info(f"Loading {w} on device {device[1:]} for TensorFlow Lite Edge TPU inference...") delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[ platform.system() ] - interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) + interpreter = Interpreter( + model_path=w, + experimental_delegates=[load_delegate(delegate, options={"device": device})], + ) + device = "cpu" # Required, otherwise PyTorch will try to use the wrong device else: # TFLite LOGGER.info(f"Loading {w} for TensorFlow Lite inference...") interpreter = Interpreter(model_path=w) # load TFLite model @@ -345,10 +410,12 @@ def wrap_frozen_graph(gd, inputs, outputs): input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs # Load metadata - with contextlib.suppress(zipfile.BadZipFile): + try: with zipfile.ZipFile(w, "r") as model: meta_file = model.namelist()[0] metadata = ast.literal_eval(model.read(meta_file).decode("utf-8")) + except zipfile.BadZipFile: + pass # TF.js elif tfjs: @@ -371,6 +438,23 @@ def wrap_frozen_graph(gd, inputs, outputs): output_names = predictor.get_output_names() metadata = w.parents[1] / "metadata.yaml" + # MNN + elif mnn: + LOGGER.info(f"Loading {w} for MNN inference...") + check_requirements("MNN") # requires MNN + import os + + import MNN + + config = {"precision": "low", "backend": "CPU", "numThread": (os.cpu_count() + 1) // 2} + rt = MNN.nn.create_runtime_manager((config,)) + net = MNN.nn.load_module_from_file(w, [], [], runtime_manager=rt, rearrange=True) + + def torch_to_mnn(x): + return MNN.expr.const(x.data_ptr(), x.shape) + + metadata = json.loads(net.get_info()["bizCode"]) + # NCNN elif ncnn: LOGGER.info(f"Loading {w} for NCNN inference...") @@ -392,6 +476,23 @@ def wrap_frozen_graph(gd, inputs, outputs): from ultralytics.utils.triton import TritonRemoteModel model = TritonRemoteModel(w) + metadata = model.metadata + + # RKNN + elif rknn: + if not is_rockchip(): + raise OSError("RKNN inference is only supported on Rockchip devices.") + LOGGER.info(f"Loading {w} for RKNN inference...") + check_requirements("rknn-toolkit-lite2") + from rknnlite.api import RKNNLite + + w = Path(w) + if not w.is_file(): # if not *.rknn + w = next(w.rglob("*.rknn")) # get *.rknn file from *_rknn_model dir + rknn_model = RKNNLite() + rknn_model.load_rknn(w) + rknn_model.init_runtime() + metadata = Path(w).parent / "metadata.yaml" # Any other format (unsupported) else: @@ -409,7 +510,7 @@ def wrap_frozen_graph(gd, inputs, outputs): for k, v in metadata.items(): if k in {"stride", "batch"}: metadata[k] = int(v) - elif k in {"imgsz", "names", "kpt_shape"} and isinstance(v, str): + elif k in {"imgsz", "names", "kpt_shape", "args"} and isinstance(v, str): metadata[k] = eval(v) stride = metadata["stride"] task = metadata["task"] @@ -417,6 +518,7 @@ def wrap_frozen_graph(gd, inputs, outputs): imgsz = metadata["imgsz"] names = metadata["names"] kpt_shape = metadata.get("kpt_shape") + end2end = metadata.get("args", {}).get("nms", False) elif not (pt or triton or nn_module): LOGGER.warning(f"WARNING โš ๏ธ Metadata not found for 'model={weights}'") @@ -466,9 +568,26 @@ def forward(self, im, augment=False, visualize=False, embed=None): y = self.net.forward() # ONNX Runtime - elif self.onnx: - im = im.cpu().numpy() # torch to numpy - y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + elif self.onnx or self.imx: + if self.dynamic: + im = im.cpu().numpy() # torch to numpy + y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + else: + if not self.cuda: + im = im.cpu() + self.io.bind_input( + name="images", + device_type=im.device.type, + device_id=im.device.index if im.device.type == "cuda" else 0, + element_type=np.float16 if self.fp16 else np.float32, + shape=tuple(im.shape), + buffer_ptr=im.data_ptr(), + ) + self.session.run_with_iobinding(self.io) + y = self.bindings + if self.imx: + # boxes, conf, cls + y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1) # OpenVINO elif self.xml: @@ -496,7 +615,7 @@ def callback(request, userdata): # TensorRT elif self.engine: - if self.dynamic or im.shape != self.bindings["images"].shape: + if self.dynamic and im.shape != self.bindings["images"].shape: if self.is_trt10: self.context.set_input_shape("images", im.shape) self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape) @@ -532,10 +651,9 @@ def callback(request, userdata): # box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels # conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32) # y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) - elif len(y) == 1: # classification model - y = list(y.values()) - elif len(y) == 2: # segmentation model - y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) + y = list(y.values()) + if len(y) == 2 and len(y[1].shape) != 4: # segmentation model + y = list(reversed(y)) # reversed for segmentation models (pred, proto) # PaddlePaddle elif self.paddle: @@ -544,6 +662,12 @@ def callback(request, userdata): self.predictor.run() y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] + # MNN + elif self.mnn: + input_var = self.torch_to_mnn(im) + output_var = self.net.onForward([input_var]) + y = [x.read() for x in output_var] + # NCNN elif self.ncnn: mat_in = self.pyncnn.Mat(im[0].cpu().numpy()) @@ -557,6 +681,12 @@ def callback(request, userdata): im = im.cpu().numpy() # torch to numpy y = self.model(im) + # RKNN + elif self.rknn: + im = (im.cpu().numpy() * 255).astype("uint8") + im = im if isinstance(im, (list, tuple)) else [im] + y = self.rknn_model.inference(inputs=im) + # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) else: im = im.cpu().numpy() @@ -583,17 +713,18 @@ def callback(request, userdata): if x.ndim == 3: # if task is not classification, excluding masks (ndim=4) as well # Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695 # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models - if x.shape[-1] == 6: # end-to-end model + if x.shape[-1] == 6 or self.end2end: # end-to-end model x[:, :, [0, 2]] *= w x[:, :, [1, 3]] *= h - # TODO: pose end-to-end model + if self.task == "pose": + x[:, :, 6::3] *= w + x[:, :, 7::3] *= h else: x[:, [0, 2]] *= w x[:, [1, 3]] *= h - if self.task == "pose" or x.shape[1] > 5: - kpt_offset = x.shape[1] - self.kpt_shape[0] * self.kpt_shape[1] - x[:, kpt_offset::3] *= w - x[:, kpt_offset+1::3] *= h + if self.task == "pose": + x[:, 5::3] *= w + x[:, 6::3] *= h y.append(x) # TF segment fixes: export is reversed vs ONNX export and protos are transposed if len(y) == 2: # segment with (det, proto) output order reversed @@ -609,8 +740,7 @@ def callback(request, userdata): # print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes if isinstance(y, (list, tuple)): if len(self.names) == 999 and (self.task == "segment" or len(y) == 2): # segments and names not defined - ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes - nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400) + nc = y[0].shape[1] - y[1].shape[1] - 4 # y = (1, 32, 160, 160), (1, 116, 8400) self.names = {i: f"class{i}" for i in range(nc)} return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] else: @@ -650,7 +780,7 @@ def _model_type(p="path/to/model.pt"): saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle. Args: - p: path to the model file. Defaults to path/to/model.pt + p (str): path to the model file. Defaults to path/to/model.pt Examples: >>> model = AutoBackend(weights="path/to/model.onnx") diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py index edf7e6a0c85..ddc387223f2 100644 --- a/ultralytics/nn/modules/__init__.py +++ b/ultralytics/nn/modules/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Ultralytics modules. @@ -56,6 +56,7 @@ RepVGGDW, ResNetLayer, SCDown, + TorchVision, ) from .conv import ( CBAM, @@ -68,6 +69,7 @@ DWConvTranspose2d, Focus, GhostConv, + Index, LightConv, RepConv, SpatialAttention, @@ -160,4 +162,6 @@ "C2fCIB", "Attention", "PSA", + "TorchVision", + "Index", ) diff --git a/ultralytics/nn/modules/activation.py b/ultralytics/nn/modules/activation.py index aaf636e7625..cc6b44b47b6 100644 --- a/ultralytics/nn/modules/activation.py +++ b/ultralytics/nn/modules/activation.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Activation modules.""" import torch diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index 91a94f6ad73..1f16310acc8 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Block modules.""" import torch @@ -49,6 +49,7 @@ "Attention", "PSA", "SCDown", + "TorchVision", ) @@ -242,7 +243,8 @@ def forward(self, x): def forward_split(self, x): """Forward pass using split() instead of chunk().""" - y = list(self.cv1(x).split((self.c, self.c), 1)) + y = self.cv1(x).split((self.c, self.c), 1) + y = [y[0], y[1]] y.extend(m(y[-1]) for m in self.m) return self.cv2(torch.cat(y, 1)) @@ -315,8 +317,8 @@ def __init__(self, c1, c2, n=3, e=1.0): """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number.""" super().__init__() c_ = int(c2 * e) # hidden channels - self.cv1 = Conv(c1, c2, 1, 1) - self.cv2 = Conv(c1, c2, 1, 1) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)]) self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity() @@ -1142,3 +1144,49 @@ def __init__(self, c1, c2, k, s): def forward(self, x): """Applies convolution and downsampling to the input tensor in the SCDown module.""" return self.cv2(self.cv1(x)) + + +class TorchVision(nn.Module): + """ + TorchVision module to allow loading any torchvision model. + + This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and customize the model by truncating or unwrapping layers. + + Attributes: + m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped. + + Args: + model (str): Name of the torchvision model to load. + weights (str, optional): Pre-trained weights to load. Default is "DEFAULT". + unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True. + truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2. + split (bool, optional): Returns output from intermediate child modules as list. Default is False. + """ + + def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False): + """Load the model and weights from torchvision.""" + import torchvision # scope for faster 'import ultralytics' + + super().__init__() + if hasattr(torchvision.models, "get_model"): + self.m = torchvision.models.get_model(model, weights=weights) + else: + self.m = torchvision.models.__dict__[model](pretrained=bool(weights)) + if unwrap: + layers = list(self.m.children()) + if isinstance(layers[0], nn.Sequential): # Second-level for some models like EfficientNet, Swin + layers = [*list(layers[0].children()), *layers[1:]] + self.m = nn.Sequential(*(layers[:-truncate] if truncate else layers)) + self.split = split + else: + self.split = False + self.m.head = self.m.heads = nn.Identity() + + def forward(self, x): + """Forward pass through the model.""" + if self.split: + y = [x] + y.extend(m(y[-1]) for m in self.m) + else: + y = self.m(x) + return y diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py index aaa70f5745a..6c15e1d66cc 100644 --- a/ultralytics/nn/modules/conv.py +++ b/ultralytics/nn/modules/conv.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Convolution modules.""" import math @@ -21,6 +21,7 @@ "CBAM", "Concat", "RepConv", + "Index", ) @@ -50,7 +51,7 @@ def forward(self, x): return self.act(self.bn(self.conv(x))) def forward_fuse(self, x): - """Perform transposed convolution of 2D data.""" + """Apply convolution and activation without batch normalization.""" return self.act(self.conv(x)) @@ -330,3 +331,20 @@ def __init__(self, dimension=1): def forward(self, x): """Forward pass for the YOLOv8 mask Proto module.""" return torch.cat(x, self.d) + + +class Index(nn.Module): + """Returns a particular index of the input.""" + + def __init__(self, index=0): + """Returns a particular index of the input.""" + super().__init__() + self.index = index + + def forward(self, x): + """ + Forward pass. + + Expects a list of tensors as input. + """ + return x[self.index] diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index ece9a2b81a1..91889f5c14f 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Model head modules.""" import copy @@ -19,10 +19,11 @@ __all__ += "Regress", "Regress6", class Detect(nn.Module): - """YOLOv8 Detect head for detection models.""" + """YOLO Detect head for detection models.""" dynamic = False # force grid reconstruction export = False # export mode + format = None # export format end2end = False # end2end max_det = 300 # max_det shape = None @@ -30,8 +31,10 @@ class Detect(nn.Module): strides = torch.empty(0) # init separate_outputs = False + legacy = False # backward compatibility for v3/v5/v8/v9 models + def __init__(self, nc=80, ch=()): - """Initializes the YOLOv8 detection layer with specified number of classes and channels.""" + """Initializes the YOLO detection layer with specified number of classes and channels.""" super().__init__() self.nc = nc # number of classes self.nl = len(ch) # number of detection layers @@ -42,13 +45,17 @@ def __init__(self, nc=80, ch=()): self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch ) - self.cv3 = nn.ModuleList( - nn.Sequential( - nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)), - nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)), - nn.Conv2d(c3, self.nc, 1), + self.cv3 = ( + nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + if self.legacy + else nn.ModuleList( + nn.Sequential( + nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)), + nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)), + nn.Conv2d(c3, self.nc, 1), + ) + for x in ch ) - for x in ch ) self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() @@ -111,7 +118,7 @@ def _inference(self, x): # Inference path shape = x[0].shape # BCHW x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2) - if self.dynamic or self.shape != shape: + if self.format != "imx" and (self.dynamic or self.shape != shape): self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) self.shape = shape @@ -129,6 +136,11 @@ def _inference(self, x): grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1) norm = self.strides / (self.stride[0] * grid_size) dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2]) + elif self.export and self.format == "imx": + dbox = self.decode_bboxes( + self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False + ) + return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1) else: dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides @@ -147,9 +159,9 @@ def bias_init(self): a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) - def decode_bboxes(self, bboxes, anchors): + def decode_bboxes(self, bboxes, anchors, xywh=True): """Decode bounding boxes.""" - return dist2bbox(bboxes, anchors, xywh=not self.end2end, dim=1) + return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1) @staticmethod def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80): @@ -177,7 +189,7 @@ def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80): class Segment(Detect): - """YOLOv8 Segment head for segmentation models.""" + """YOLO Segment head for segmentation models.""" def __init__(self, nc=80, nm=32, npr=256, ch=()): """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.""" @@ -209,7 +221,7 @@ def forward(self, x): class OBB(Detect): - """YOLOv8 OBB detection head for detection with rotation models.""" + """YOLO OBB detection head for detection with rotation models.""" def __init__(self, nc=80, ne=1, ch=()): """Initialize OBB with number of classes `nc` and layer channels `ch`.""" @@ -239,7 +251,7 @@ def decode_bboxes(self, bboxes, anchors): class Pose(Detect): - """YOLOv8 Pose head for keypoints models.""" + """YOLO Pose head for keypoints models.""" separate_pose = False @@ -273,16 +285,20 @@ def forward(self, x): def kpts_decode(self, bs, kpts, shape): """Decodes keypoints.""" ndim = self.kpt_shape[1] - if self.export: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug - y = kpts.view(bs, *self.kpt_shape, -1) - if self.format in {"tflite", "edgetpu"}: + if self.export: + if self.format in { + "tflite", + "edgetpu", + }: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug # Precompute normalization factor to increase numerical stability - grid_w = shape[2] - grid_h = shape[3] - grid_size = torch.tensor([grid_w, grid_h], device=kpts.device).reshape(2, 1) + y = kpts.view(bs, *self.kpt_shape, -1) + grid_h, grid_w = self.shape[2], self.shape[3] + grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1) norm = self.strides / (self.stride[0] * grid_size) a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm else: + # NCNN fix + y = kpts.view(bs, *self.kpt_shape, -1) a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides if ndim == 3: a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2) @@ -297,10 +313,12 @@ def kpts_decode(self, bs, kpts, shape): class Classify(nn.Module): - """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2).""" + """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).""" + + export = False # export mode def __init__(self, c1, c2, k=1, s=1, p=None, g=1): - """Initializes YOLOv8 classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.""" + """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.""" super().__init__() c_ = 1280 # efficientnet_b0 size self.conv = Conv(c1, c_, k, s, p, g) @@ -313,7 +331,10 @@ def forward(self, x): if isinstance(x, list): x = torch.cat(x, 1) x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) - return x if self.training else x.softmax(1) + if self.training: + return x + y = x.softmax(1) # get final output + return y if self.export else (y, x) class Regress(nn.Module): @@ -377,10 +398,10 @@ def forward(self, x): class WorldDetect(Detect): - """Head for integrating YOLOv8 detection models with semantic understanding from text embeddings.""" + """Head for integrating YOLO detection models with semantic understanding from text embeddings.""" def __init__(self, nc=80, embed=512, with_bn=False, ch=()): - """Initialize YOLOv8 detection layer with nc classes and layer channels ch.""" + """Initialize YOLO detection layer with nc classes and layer channels ch.""" super().__init__(nc, ch) c3 = max(ch[0], min(self.nc, 100)) self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch) diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py index bae008ac6c2..c198736908e 100644 --- a/ultralytics/nn/modules/transformer.py +++ b/ultralytics/nn/modules/transformer.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Transformer modules.""" import math diff --git a/ultralytics/nn/modules/utils.py b/ultralytics/nn/modules/utils.py index a7c86391c42..c7837ebe6c6 100644 --- a/ultralytics/nn/modules/utils.py +++ b/ultralytics/nn/modules/utils.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Module utils.""" import copy diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index a1930c238b2..5b303969abe 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -1,13 +1,14 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import contextlib import pickle +import re import types from copy import deepcopy from pathlib import Path +import thop import torch -import torch.nn as nn from ultralytics.nn.modules import ( AIFI, @@ -49,6 +50,7 @@ HGBlock, HGStem, ImagePoolingAttn, + Index, Pose, Regress, Regress6, @@ -60,6 +62,7 @@ RTDETRDecoder, SCDown, Segment, + TorchVision, WorldDetect, v10Detect, ) @@ -86,13 +89,8 @@ time_sync, ) -try: - import thop -except ImportError: - thop = None - -class BaseModel(nn.Module): +class BaseModel(torch.nn.Module): """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.""" def forward(self, x, *args, **kwargs): @@ -155,7 +153,7 @@ def _predict_once(self, x, profile=False, visualize=False, embed=None): if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) if embed and m.i in embed: - embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten + embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten if m.i == max(embed): return torch.unbind(torch.cat(embeddings, 1), dim=0) return x @@ -174,12 +172,9 @@ def _profile_one_layer(self, m, x, dt): the provided list. Args: - m (nn.Module): The layer to be profiled. + m (torch.nn.Module): The layer to be profiled. x (torch.Tensor): The input data to the layer. dt (list): A list to store the computation time of the layer. - - Returns: - None """ c = m == self.model[-1] and isinstance(x, list) # is final layer list, copy input as inplace fix flops = thop.profile(m, inputs=[x.copy() if c else x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs @@ -199,7 +194,7 @@ def fuse(self, verbose=True): computation efficiency. Returns: - (nn.Module): The fused model is returned. + (torch.nn.Module): The fused model is returned. """ if not self.is_fused(): for m in self.model.modules(): @@ -233,7 +228,7 @@ def is_fused(self, thresh=10): Returns: (bool): True if the number of BatchNorm layers in the model is less than the threshold, False otherwise. """ - bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d() + bn = tuple(v for k, v in torch.nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d() return sum(isinstance(v, bn) for v in self.modules()) < thresh # True if < 'thresh' BatchNorm layers in model def info(self, detailed=False, verbose=True, imgsz=640): @@ -300,12 +295,18 @@ def init_criterion(self): class DetectionModel(BaseModel): - """YOLOv8 detection model.""" + """YOLO detection model.""" - def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True): # model, input channels, number of classes - """Initialize the YOLOv8 detection model with the given config and parameters.""" + def __init__(self, cfg="yolo11n.yaml", ch=3, nc=None, verbose=True): # model, input channels, number of classes + """Initialize the YOLO detection model with the given config and parameters.""" super().__init__() self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict + if self.yaml["backbone"][0][2] == "Silence": + LOGGER.warning( + "WARNING โš ๏ธ YOLOv9 `Silence` module is deprecated in favor of torch.nn.Identity. " + "Please delete local *.pt file and re-download the latest model checkpoint." + ) + self.yaml["backbone"][0][2] = "nn.Identity" # Define model ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels @@ -386,10 +387,10 @@ def init_criterion(self): class OBBModel(DetectionModel): - """YOLOv8 Oriented Bounding Box (OBB) model.""" + """YOLO Oriented Bounding Box (OBB) model.""" - def __init__(self, cfg="yolov8n-obb.yaml", ch=3, nc=None, verbose=True): - """Initialize YOLOv8 OBB model with given config and parameters.""" + def __init__(self, cfg="yolo11n-obb.yaml", ch=3, nc=None, verbose=True): + """Initialize YOLO OBB model with given config and parameters.""" super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) def init_criterion(self): @@ -398,9 +399,9 @@ def init_criterion(self): class SegmentationModel(DetectionModel): - """YOLOv8 segmentation model.""" + """YOLO segmentation model.""" - def __init__(self, cfg="yolov8n-seg.yaml", ch=3, nc=None, verbose=True): + def __init__(self, cfg="yolo11n-seg.yaml", ch=3, nc=None, verbose=True): """Initialize YOLOv8 segmentation model with given config and parameters.""" super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) @@ -410,9 +411,9 @@ def init_criterion(self): class PoseModel(DetectionModel): - """YOLOv8 pose model.""" + """YOLO pose model.""" - def __init__(self, cfg="yolov8n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True): + def __init__(self, cfg="yolo11n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True): """Initialize YOLOv8 Pose model.""" if not isinstance(cfg, dict): cfg = yaml_model_load(cfg) # load model YAML @@ -427,9 +428,9 @@ def init_criterion(self): class ClassificationModel(BaseModel): - """YOLOv8 classification model.""" + """YOLO classification model.""" - def __init__(self, cfg="yolov8n-cls.yaml", ch=3, nc=None, verbose=True): + def __init__(self, cfg="yolo11n-cls.yaml", ch=3, nc=None, verbose=True): """Init ClassificationModel with YAML, channels, number of classes, verbose flag.""" super().__init__() self._from_yaml(cfg, ch, nc, verbose) @@ -456,20 +457,22 @@ def reshape_outputs(model, nc): name, m = list((model.model if hasattr(model, "model") else model).named_children())[-1] # last module if isinstance(m, Classify): # YOLO Classify() head if m.linear.out_features != nc: - m.linear = nn.Linear(m.linear.in_features, nc) - elif isinstance(m, nn.Linear): # ResNet, EfficientNet + m.linear = torch.nn.Linear(m.linear.in_features, nc) + elif isinstance(m, torch.nn.Linear): # ResNet, EfficientNet if m.out_features != nc: - setattr(model, name, nn.Linear(m.in_features, nc)) - elif isinstance(m, nn.Sequential): + setattr(model, name, torch.nn.Linear(m.in_features, nc)) + elif isinstance(m, torch.nn.Sequential): types = [type(x) for x in m] - if nn.Linear in types: - i = len(types) - 1 - types[::-1].index(nn.Linear) # last nn.Linear index + if torch.nn.Linear in types: + i = len(types) - 1 - types[::-1].index(torch.nn.Linear) # last torch.nn.Linear index if m[i].out_features != nc: - m[i] = nn.Linear(m[i].in_features, nc) - elif nn.Conv2d in types: - i = len(types) - 1 - types[::-1].index(nn.Conv2d) # last nn.Conv2d index + m[i] = torch.nn.Linear(m[i].in_features, nc) + elif torch.nn.Conv2d in types: + i = len(types) - 1 - types[::-1].index(torch.nn.Conv2d) # last torch.nn.Conv2d index if m[i].out_channels != nc: - m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) + m[i] = torch.nn.Conv2d( + m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None + ) def init_criterion(self): """Initialize the loss criterion for the ClassificationModel.""" @@ -506,12 +509,6 @@ class RTDETRDetectionModel(DetectionModel): the training and inference processes. RTDETR is an object detection and tracking model that extends from the DetectionModel base class. - Attributes: - cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'. - ch (int): Number of input channels. Default is 3 (RGB). - nc (int, optional): Number of classes for object detection. Default is None. - verbose (bool): Specifies if summary statistics are shown during initialization. Default is True. - Methods: init_criterion: Initializes the criterion used for loss calculation. loss: Computes and returns the loss during training. @@ -607,7 +604,7 @@ def predict(self, x, profile=False, visualize=False, batch=None, augment=False, if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) if embed and m.i in embed: - embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten + embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten if m.i == max(embed): return torch.unbind(torch.cat(embeddings, 1), dim=0) head = self.model[-1] @@ -683,7 +680,7 @@ def predict(self, x, profile=False, visualize=False, txt_feats=None, augment=Fal if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) if embed and m.i in embed: - embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten + embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten if m.i == max(embed): return torch.unbind(torch.cat(embeddings, 1), dim=0) return x @@ -704,15 +701,7 @@ def loss(self, batch, preds=None): return self.criterion(preds, batch) -# NOTE: keep YOLOv10DetectionModel for compatibility with yolov10 pretrained weights. -class YOLOv10DetectionModel(DetectionModel): - """YOLOv10 Detection model.""" - - def __init__(self, cfg="yolov10n.yaml", ch=3, nc=None, verbose=True, end2end=True): - super().__init__(cfg, ch, nc, verbose, end2end) - - -class Ensemble(nn.ModuleList): +class Ensemble(torch.nn.ModuleList): """Ensemble of models.""" def __init__(self): @@ -870,14 +859,14 @@ def torch_safe_load(weight, safe_only=False): f"with https://github.com/ultralytics/yolov5.\nThis model is NOT forwards compatible with " f"YOLOv8 at https://github.com/ultralytics/ultralytics." f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to " - f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolov8n.pt'" + f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo11n.pt'" ) ) from e LOGGER.warning( f"WARNING โš ๏ธ {weight} appears to require '{e.name}', which is not in Ultralytics requirements." f"\nAutoInstall will run now for '{e.name}' but this feature will be removed in the future." f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to " - f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolov8n.pt'" + f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo11n.pt'" ) check_requirements(e.name) # install missing module ckpt = torch.load(file, map_location="cpu") @@ -915,7 +904,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False): for m in ensemble.modules(): if hasattr(m, "inplace"): m.inplace = inplace - elif isinstance(m, nn.Upsample) and not hasattr(m, "recompute_scale_factor"): + elif isinstance(m, torch.nn.Upsample) and not hasattr(m, "recompute_scale_factor"): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model @@ -950,7 +939,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False): for m in model.modules(): if hasattr(m, "inplace"): m.inplace = inplace - elif isinstance(m, nn.Upsample) and not hasattr(m, "recompute_scale_factor"): + elif isinstance(m, torch.nn.Upsample) and not hasattr(m, "recompute_scale_factor"): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model and ckpt @@ -962,6 +951,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) import ast # Args + legacy = True # backward compatibility for v3/v5/v8/v9 models max_channels = float("inf") nc, act, scales = (d.get(x) for x in ("nc", "activation", "scales")) depth, width, kpt_shape = (d.get(x, 1.0) for x in ("depth_multiple", "width_multiple", "kpt_shape")) @@ -973,7 +963,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) depth, width, max_channels = scales[scale] if act: - Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = torch.nn.SiLU() if verbose: LOGGER.info(f"{colorstr('activation:')} {act}") # print @@ -981,15 +971,8 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10} {'module':<45}{'arguments':<30}") ch = [ch] layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out - for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args - m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m] # get module - for j, a in enumerate(args): - if isinstance(a, str): - with contextlib.suppress(ValueError): - args[j] = locals()[a] if a in locals() else ast.literal_eval(a) - - n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain - if m in { + base_modules = frozenset( + { Classify, Conv, ConvTranspose, @@ -1016,47 +999,65 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) C3, C3TR, C3Ghost, - nn.ConvTranspose2d, + torch.nn.ConvTranspose2d, DWConvTranspose2d, C3x, RepC3, PSA, SCDown, C2fCIB, - }: + } + ) + repeat_modules = frozenset( # modules with 'repeat' arguments + { + BottleneckCSP, + C1, + C2, + C2f, + C3k2, + C2fAttn, + C3, + C3TR, + C3Ghost, + C3x, + RepC3, + C2fPSA, + C2fCIB, + C2PSA, + } + ) + for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args + m = ( + getattr(torch.nn, m[3:]) + if "nn." in m + else getattr(__import__("torchvision").ops, m[16:]) + if "torchvision.ops." in m + else globals()[m] + ) # get module + for j, a in enumerate(args): + if isinstance(a, str): + with contextlib.suppress(ValueError): + args[j] = locals()[a] if a in locals() else ast.literal_eval(a) + n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain + if m in base_modules: c1, c2 = ch[f], args[0] if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output) c2 = make_divisible(min(c2, max_channels) * width, 8) - if m is C2fAttn: - args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8) # embed channels - args[2] = int( - max(round(min(args[2], max_channels // 2 // 32)) * width, 1) if args[2] > 1 else args[2] - ) # num heads + if m is C2fAttn: # set 1) embed channels and 2) num heads + args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8) + args[2] = int(max(round(min(args[2], max_channels // 2 // 32)) * width, 1) if args[2] > 1 else args[2]) args = [c1, c2, *args[1:]] - if m in { - BottleneckCSP, - C1, - C2, - C2f, - C3k2, - C2fAttn, - C3, - C3TR, - C3Ghost, - C3x, - RepC3, - C2fPSA, - C2fCIB, - C2PSA, - }: + if m in repeat_modules: args.insert(2, n) # number of repeats n = 1 - if m is C3k2 and scale in "mlx": # for M/L/X sizes - args[3] = True + if m is C3k2: # for M/L/X sizes + legacy = False + if scale in "mlx": + args[3] = True elif m is AIFI: args = [ch[f], *args] - elif m in {HGStem, HGBlock}: + elif m in frozenset({HGStem, HGBlock}): c1, cm, c2 = ch[f], args[0], args[1] args = [c1, cm, c2, *args[2:]] if m is HGBlock: @@ -1064,14 +1065,16 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) n = 1 elif m is ResNetLayer: c2 = args[1] if args[3] else args[1] * 4 - elif m is nn.BatchNorm2d: + elif m is torch.nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) - elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}: + elif m in frozenset({Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}): args.append([ch[x] for x in f]) if m is Segment: args[2] = make_divisible(min(args[2], max_channels) * width, 8) + if m in {Detect, Segment, Pose, OBB}: + m.legacy = legacy elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1 args.insert(1, [ch[x] for x in f]) elif m in (Regress, Regress6): @@ -1085,27 +1088,29 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) args = [c1, c2, *args[1:]] elif m is CBFuse: c2 = ch[f[-1]] + elif m in frozenset({TorchVision, Index}): + c2 = args[0] + c1 = ch[f] + args = [*args[1:]] else: c2 = ch[f] - m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + m_ = torch.nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace("__main__.", "") # module type - m.np = sum(x.numel() for x in m_.parameters()) # number params + m_.np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type if verbose: - LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}") # print + LOGGER.info(f"{i:>3}{str(f):>20}{n_:>3}{m_.np:10.0f} {t:<45}{str(args):<30}") # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: ch = [] ch.append(c2) - return nn.Sequential(*layers), sorted(save) + return torch.nn.Sequential(*layers), sorted(save) def yaml_model_load(path): """Load a YOLOv8 model from a YAML file.""" - import re - path = Path(path) if path.stem in (f"yolov{d}{x}6" for x in "nsmlx" for d in (5, 8)): new_stem = re.sub(r"(\d+)([nslmx])6(.+)?$", r"\1\2-p6\3", path.stem) @@ -1132,11 +1137,10 @@ def guess_model_scale(model_path): Returns: (str): The size character of the model's scale, which can be n, s, m, l, or x. """ - with contextlib.suppress(AttributeError): - import re - - return re.search(r"yolo[v]?\d+([nslmx])", Path(model_path).stem).group(1) # n, s, m, l, or x - return "" + try: + return re.search(r"yolo[v]?\d+([nslmx])", Path(model_path).stem).group(1) # noqa, returns n, s, m, l, or x + except AttributeError: + return "" def guess_model_task(model): @@ -1144,7 +1148,7 @@ def guess_model_task(model): Guess the task of a PyTorch model from its architecture or configuration. Args: - model (nn.Module | dict): PyTorch model or model configuration in YAML format. + model (torch.nn.Module | dict): PyTorch model or model configuration in YAML format. Returns: (str): Task of the model ('detect', 'segment', 'classify', 'pose', 'regress'). @@ -1173,16 +1177,14 @@ def cfg2task(cfg): if isinstance(model, dict): with contextlib.suppress(Exception): return cfg2task(model) - # Guess from PyTorch model - if isinstance(model, nn.Module): # PyTorch model + if isinstance(model, torch.nn.Module): # PyTorch model for x in "model.args", "model.model.args", "model.model.model.args": with contextlib.suppress(Exception): return eval(x)["task"] for x in "model.yaml", "model.model.yaml", "model.model.model.yaml": with contextlib.suppress(Exception): return cfg2task(eval(x)) - for m in model.modules(): if isinstance(m, Segment): return "segment" diff --git a/ultralytics/solutions/__init__.py b/ultralytics/solutions/__init__.py index 4446c1826ed..635cb3ad7e2 100644 --- a/ultralytics/solutions/__init__.py +++ b/ultralytics/solutions/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .ai_gym import AIGym from .analytics import Analytics @@ -7,8 +7,11 @@ from .object_counter import ObjectCounter from .parking_management import ParkingManagement, ParkingPtsSelection from .queue_management import QueueManager +from .region_counter import RegionCounter +from .security_alarm import SecurityAlarm from .speed_estimation import SpeedEstimator -from .streamlit_inference import inference +from .streamlit_inference import Inference +from .trackzone import TrackZone __all__ = ( "AIGym", @@ -20,5 +23,8 @@ "QueueManager", "SpeedEstimator", "Analytics", - "inference", + "Inference", + "RegionCounter", + "TrackZone", + "SecurityAlarm", ) diff --git a/ultralytics/solutions/ai_gym.py b/ultralytics/solutions/ai_gym.py index 349e46e8f08..fab84f16891 100644 --- a/ultralytics/solutions/ai_gym.py +++ b/ultralytics/solutions/ai_gym.py @@ -1,127 +1,111 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import cv2 - -from ultralytics.utils.checks import check_imshow +from ultralytics.solutions.solutions import BaseSolution from ultralytics.utils.plotting import Annotator -class AIGym: - """A class to manage the gym steps of people in a real-time video stream based on their poses.""" - - def __init__( - self, - kpts_to_check, - line_thickness=2, - view_img=False, - pose_up_angle=145.0, - pose_down_angle=90.0, - pose_type="pullup", - ): +class AIGym(BaseSolution): + """ + A class to manage gym steps of people in a real-time video stream based on their poses. + + This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts + repetitions of exercises based on predefined angle thresholds for up and down positions. + + Attributes: + count (List[int]): Repetition counts for each detected person. + angle (List[float]): Current angle of the tracked body part for each person. + stage (List[str]): Current exercise stage ('up', 'down', or '-') for each person. + initial_stage (str | None): Initial stage of the exercise. + up_angle (float): Angle threshold for considering the 'up' position of an exercise. + down_angle (float): Angle threshold for considering the 'down' position of an exercise. + kpts (List[int]): Indices of keypoints used for angle calculation. + annotator (Annotator): Object for drawing annotations on the image. + + Methods: + monitor: Processes a frame to detect poses, calculate angles, and count repetitions. + + Examples: + >>> gym = AIGym(model="yolo11n-pose.pt") + >>> image = cv2.imread("gym_scene.jpg") + >>> processed_image = gym.monitor(image) + >>> cv2.imshow("Processed Image", processed_image) + >>> cv2.waitKey(0) + """ + + def __init__(self, **kwargs): + """Initializes AIGym for workout monitoring using pose estimation and predefined angles.""" + # Check if the model name ends with '-pose' + if "model" in kwargs and "-pose" not in kwargs["model"]: + kwargs["model"] = "yolo11n-pose.pt" + elif "model" not in kwargs: + kwargs["model"] = "yolo11n-pose.pt" + + super().__init__(**kwargs) + self.count = [] # List for counts, necessary where there are multiple objects in frame + self.angle = [] # List for angle, necessary where there are multiple objects in frame + self.stage = [] # List for stage, necessary where there are multiple objects in frame + + # Extract details from CFG single time for usage later + self.initial_stage = None + self.up_angle = float(self.CFG["up_angle"]) # Pose up predefined angle to consider up pose + self.down_angle = float(self.CFG["down_angle"]) # Pose down predefined angle to consider down pose + self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage + + def monitor(self, im0): """ - Initializes the AIGym class with the specified parameters. + Monitors workouts using Ultralytics YOLO Pose Model. - Args: - kpts_to_check (list): Indices of keypoints to check. - line_thickness (int, optional): Thickness of the lines drawn. Defaults to 2. - view_img (bool, optional): Flag to display the image. Defaults to False. - pose_up_angle (float, optional): Angle threshold for the 'up' pose. Defaults to 145.0. - pose_down_angle (float, optional): Angle threshold for the 'down' pose. Defaults to 90.0. - pose_type (str, optional): Type of pose to detect ('pullup', 'pushup', 'abworkout'). Defaults to "pullup". - """ - # Image and line thickness - self.im0 = None - self.tf = line_thickness - - # Keypoints and count information - self.keypoints = None - self.poseup_angle = pose_up_angle - self.posedown_angle = pose_down_angle - self.threshold = 0.001 - - # Store stage, count and angle information - self.angle = None - self.count = None - self.stage = None - self.pose_type = pose_type - self.kpts_to_check = kpts_to_check - - # Visual Information - self.view_img = view_img - self.annotator = None - - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) - self.count = [] - self.angle = [] - self.stage = [] - - def start_counting(self, im0, results): - """ - Function used to count the gym steps. + This function processes an input image to track and analyze human poses for workout monitoring. It uses + the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined + angle thresholds. Args: - im0 (ndarray): Current frame from the video stream. - results (list): Pose estimation data. - """ - self.im0 = im0 - - if not len(results[0]): - return self.im0 - - if len(results[0]) > len(self.count): - new_human = len(results[0]) - len(self.count) - self.count += [0] * new_human - self.angle += [0] * new_human - self.stage += ["-"] * new_human - - self.keypoints = results[0].keypoints.data - self.annotator = Annotator(im0, line_width=self.tf) - - for ind, k in enumerate(reversed(self.keypoints)): - # Estimate angle and draw specific points based on pose type - if self.pose_type in {"pushup", "pullup", "abworkout", "squat"}: - self.angle[ind] = self.annotator.estimate_pose_angle( - k[int(self.kpts_to_check[0])].cpu(), - k[int(self.kpts_to_check[1])].cpu(), - k[int(self.kpts_to_check[2])].cpu(), - ) - self.im0 = self.annotator.draw_specific_points(k, self.kpts_to_check, shape=(640, 640), radius=10) - - # Check and update pose stages and counts based on angle - if self.pose_type in {"abworkout", "pullup"}: - if self.angle[ind] > self.poseup_angle: - self.stage[ind] = "down" - if self.angle[ind] < self.posedown_angle and self.stage[ind] == "down": - self.stage[ind] = "up" - self.count[ind] += 1 + im0 (ndarray): Input image for processing. + + Returns: + (ndarray): Processed image with annotations for workout monitoring. - elif self.pose_type in {"pushup", "squat"}: - if self.angle[ind] > self.poseup_angle: - self.stage[ind] = "up" - if self.angle[ind] < self.posedown_angle and self.stage[ind] == "up": - self.stage[ind] = "down" + Examples: + >>> gym = AIGym() + >>> image = cv2.imread("workout.jpg") + >>> processed_image = gym.monitor(image) + """ + # Extract tracks + tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)[0] + + if tracks.boxes.id is not None: + # Extract and check keypoints + if len(tracks) > len(self.count): + new_human = len(tracks) - len(self.count) + self.angle += [0] * new_human + self.count += [0] * new_human + self.stage += ["-"] * new_human + + # Initialize annotator + self.annotator = Annotator(im0, line_width=self.line_width) + + # Enumerate over keypoints + for ind, k in enumerate(reversed(tracks.keypoints.data)): + # Get keypoints and estimate the angle + kpts = [k[int(self.kpts[i])].cpu() for i in range(3)] + self.angle[ind] = self.annotator.estimate_pose_angle(*kpts) + im0 = self.annotator.draw_specific_points(k, self.kpts, radius=self.line_width * 3) + + # Determine stage and count logic based on angle thresholds + if self.angle[ind] < self.down_angle: + if self.stage[ind] == "up": self.count[ind] += 1 + self.stage[ind] = "down" + elif self.angle[ind] > self.up_angle: + self.stage[ind] = "up" + # Display angle, count, and stage text self.annotator.plot_angle_and_count_and_stage( - angle_text=self.angle[ind], - count_text=self.count[ind], - stage_text=self.stage[ind], - center_kpt=k[int(self.kpts_to_check[1])], + angle_text=self.angle[ind], # angle text for display + count_text=self.count[ind], # count text for workouts + stage_text=self.stage[ind], # stage position text + center_kpt=k[int(self.kpts[1])], # center keypoint for display ) - # Draw keypoints - self.annotator.kpts(k, shape=(640, 640), radius=1, kpt_line=True) - - # Display the image if environment supports it and view_img is True - if self.env_check and self.view_img: - cv2.imshow("Ultralytics YOLOv8 AI GYM", self.im0) - if cv2.waitKey(1) & 0xFF == ord("q"): - return - - return self.im0 - - -if __name__ == "__main__": - kpts_to_check = [0, 1, 2] # example keypoints - aigym = AIGym(kpts_to_check) + self.display_output(im0) # Display output image, if environment support display + return im0 # return an image for writing or further usage diff --git a/ultralytics/solutions/analytics.py b/ultralytics/solutions/analytics.py index c2990097786..3a62e8c2e68 100644 --- a/ultralytics/solutions/analytics.py +++ b/ultralytics/solutions/analytics.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import warnings from itertools import cycle import cv2 @@ -9,299 +8,240 @@ from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from matplotlib.figure import Figure - -class Analytics: - """A class to create and update various types of charts (line, bar, pie, area) for visual analytics.""" - - def __init__( - self, - type, - writer, - im0_shape, - title="ultralytics", - x_label="x", - y_label="y", - bg_color="white", - fg_color="black", - line_color="yellow", - line_width=2, - points_width=10, - fontsize=13, - view_img=False, - save_img=True, - max_points=50, - ): - """ - Initialize the Analytics class with various chart types. - - Args: - type (str): Type of chart to initialize ('line', 'bar', 'pie', or 'area'). - writer (object): Video writer object to save the frames. - im0_shape (tuple): Shape of the input image (width, height). - title (str): Title of the chart. - x_label (str): Label for the x-axis. - y_label (str): Label for the y-axis. - bg_color (str): Background color of the chart. - fg_color (str): Foreground (text) color of the chart. - line_color (str): Line color for line charts. - line_width (int): Width of the lines in line charts. - points_width (int): Width of line points highlighter - fontsize (int): Font size for chart text. - view_img (bool): Whether to display the image. - save_img (bool): Whether to save the image. - max_points (int): Specifies when to remove the oldest points in a graph for multiple lines. - """ - self.bg_color = bg_color - self.fg_color = fg_color - self.view_img = view_img - self.save_img = save_img - self.title = title - self.writer = writer - self.max_points = max_points - self.line_color = line_color - self.x_label = x_label - self.y_label = y_label - self.points_width = points_width - self.line_width = line_width - self.fontsize = fontsize - - # Set figure size based on image shape - figsize = (im0_shape[0] / 100, im0_shape[1] / 100) - - if type in {"line", "area"}: - # Initialize line or area plot +from ultralytics.solutions.solutions import BaseSolution # Import a parent class + + +class Analytics(BaseSolution): + """ + A class for creating and updating various types of charts for visual analytics. + + This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts + based on object detection and tracking data. + + Attributes: + type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area'). + x_label (str): Label for the x-axis. + y_label (str): Label for the y-axis. + bg_color (str): Background color of the chart frame. + fg_color (str): Foreground color of the chart frame. + title (str): Title of the chart window. + max_points (int): Maximum number of data points to display on the chart. + fontsize (int): Font size for text display. + color_cycle (cycle): Cyclic iterator for chart colors. + total_counts (int): Total count of detected objects (used for line charts). + clswise_count (Dict[str, int]): Dictionary for class-wise object counts. + fig (Figure): Matplotlib figure object for the chart. + ax (Axes): Matplotlib axes object for the chart. + canvas (FigureCanvas): Canvas for rendering the chart. + + Methods: + process_data: Processes image data and updates the chart. + update_graph: Updates the chart with new data points. + + Examples: + >>> analytics = Analytics(analytics_type="line") + >>> frame = cv2.imread("image.jpg") + >>> processed_frame = analytics.process_data(frame, frame_number=1) + >>> cv2.imshow("Analytics", processed_frame) + """ + + def __init__(self, **kwargs): + """Initialize Analytics class with various chart types for visual data representation.""" + super().__init__(**kwargs) + + self.type = self.CFG["analytics_type"] # extract type of analytics + self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#" + self.y_label = "Total Counts" + + # Predefined data + self.bg_color = "#F3F3F3" # background color of frame + self.fg_color = "#111E68" # foreground color of frame + self.title = "Ultralytics Solutions" # window name + self.max_points = 45 # maximum points to be drawn on window + self.fontsize = 25 # text font size for display + figsize = (19.2, 10.8) # Set output image size 1920 * 1080 + self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"]) + + self.total_counts = 0 # count variable for storing total counts i.e. for line + self.clswise_count = {} # dictionary for class-wise counts + + # Ensure line and area chart + if self.type in {"line", "area"}: self.lines = {} self.fig = Figure(facecolor=self.bg_color, figsize=figsize) - self.canvas = FigureCanvas(self.fig) + self.canvas = FigureCanvas(self.fig) # Set common axis properties self.ax = self.fig.add_subplot(111, facecolor=self.bg_color) - if type == "line": - (self.line,) = self.ax.plot([], [], color=self.line_color, linewidth=self.line_width) - - elif type in {"bar", "pie"}: + if self.type == "line": + (self.line,) = self.ax.plot([], [], color="cyan", linewidth=self.line_width) + elif self.type in {"bar", "pie"}: # Initialize bar or pie plot self.fig, self.ax = plt.subplots(figsize=figsize, facecolor=self.bg_color) + self.canvas = FigureCanvas(self.fig) # Set common axis properties self.ax.set_facecolor(self.bg_color) - color_palette = [ - (31, 119, 180), - (255, 127, 14), - (44, 160, 44), - (214, 39, 40), - (148, 103, 189), - (140, 86, 75), - (227, 119, 194), - (127, 127, 127), - (188, 189, 34), - (23, 190, 207), - ] - self.color_palette = [(r / 255, g / 255, b / 255, 1) for r, g, b in color_palette] - self.color_cycle = cycle(self.color_palette) self.color_mapping = {} - # Ensure pie chart is circular - self.ax.axis("equal") if type == "pie" else None - - # Set common axis properties - self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize) - self.ax.set_xlabel(x_label, color=self.fg_color, fontsize=self.fontsize - 3) - self.ax.set_ylabel(y_label, color=self.fg_color, fontsize=self.fontsize - 3) - self.ax.tick_params(axis="both", colors=self.fg_color) + if self.type == "pie": # Ensure pie chart is circular + self.ax.axis("equal") - def update_area(self, frame_number, counts_dict): + def process_data(self, im0, frame_number): """ - Update the area graph with new data for multiple classes. + Processes image data and runs object tracking to update analytics charts. Args: - frame_number (int): The current frame number. - counts_dict (dict): Dictionary with class names as keys and counts as values. - """ - x_data = np.array([]) - y_data_dict = {key: np.array([]) for key in counts_dict.keys()} - - if self.ax.lines: - x_data = self.ax.lines[0].get_xdata() - for line, key in zip(self.ax.lines, counts_dict.keys()): - y_data_dict[key] = line.get_ydata() - - x_data = np.append(x_data, float(frame_number)) - max_length = len(x_data) - - for key in counts_dict.keys(): - y_data_dict[key] = np.append(y_data_dict[key], float(counts_dict[key])) - if len(y_data_dict[key]) < max_length: - y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])), "constant") - - # Remove the oldest points if the number of points exceeds max_points - if len(x_data) > self.max_points: - x_data = x_data[1:] - for key in counts_dict.keys(): - y_data_dict[key] = y_data_dict[key][1:] - - self.ax.clear() - - colors = ["#E1FF25", "#0BDBEB", "#FF64DA", "#111F68", "#042AFF"] - color_cycle = cycle(colors) - - for key, y_data in y_data_dict.items(): - color = next(color_cycle) - self.ax.fill_between(x_data, y_data, color=color, alpha=0.6) - self.ax.plot( - x_data, - y_data, - color=color, - linewidth=self.line_width, - marker="o", - markersize=self.points_width, - label=f"{key} Data Points", - ) + im0 (np.ndarray): Input image for processing. + frame_number (int): Video frame number for plotting the data. - self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize) - self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3) - self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3) - legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.fg_color) + Returns: + (np.ndarray): Processed image with updated analytics chart. - # Set legend text color - for text in legend.get_texts(): - text.set_color(self.fg_color) - - self.canvas.draw() - im0 = np.array(self.canvas.renderer.buffer_rgba()) - self.write_and_display(im0) + Raises: + ModuleNotFoundError: If an unsupported chart type is specified. - def update_line(self, frame_number, total_counts): + Examples: + >>> analytics = Analytics(analytics_type="line") + >>> frame = np.zeros((480, 640, 3), dtype=np.uint8) + >>> processed_frame = analytics.process_data(frame, frame_number=1) """ - Update the line graph with new data. - - Args: - frame_number (int): The current frame number. - total_counts (int): The total counts to plot. - """ - # Update line graph data - x_data = self.line.get_xdata() - y_data = self.line.get_ydata() - x_data = np.append(x_data, float(frame_number)) - y_data = np.append(y_data, float(total_counts)) - self.line.set_data(x_data, y_data) - self.ax.relim() - self.ax.autoscale_view() - self.canvas.draw() - im0 = np.array(self.canvas.renderer.buffer_rgba()) - self.write_and_display(im0) - - def update_multiple_lines(self, counts_dict, labels_list, frame_number): + self.extract_tracks(im0) # Extract tracks + + if self.type == "line": + for _ in self.boxes: + self.total_counts += 1 + im0 = self.update_graph(frame_number=frame_number) + self.total_counts = 0 + elif self.type in {"pie", "bar", "area"}: + self.clswise_count = {} + for box, cls in zip(self.boxes, self.clss): + if self.names[int(cls)] in self.clswise_count: + self.clswise_count[self.names[int(cls)]] += 1 + else: + self.clswise_count[self.names[int(cls)]] = 1 + im0 = self.update_graph(frame_number=frame_number, count_dict=self.clswise_count, plot=self.type) + else: + raise ModuleNotFoundError(f"{self.type} chart is not supported โŒ") + return im0 + + def update_graph(self, frame_number, count_dict=None, plot="line"): """ - Update the line graph with multiple classes. + Updates the graph with new data for single or multiple classes. Args: - counts_dict (int): Dictionary include each class counts. - labels_list (int): list include each classes names. frame_number (int): The current frame number. + count_dict (Dict[str, int] | None): Dictionary with class names as keys and counts as values for multiple + classes. If None, updates a single line graph. + plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'. + + Returns: + (np.ndarray): Updated image containing the graph. + + Examples: + >>> analytics = Analytics() + >>> frame_number = 10 + >>> count_dict = {"person": 5, "car": 3} + >>> updated_image = analytics.update_graph(frame_number, count_dict, plot="bar") """ - warnings.warn("Display is not supported for multiple lines, output will be stored normally!") - for obj in labels_list: - if obj not in self.lines: - (line,) = self.ax.plot([], [], label=obj, marker="o", markersize=self.points_width) - self.lines[obj] = line - - x_data = self.lines[obj].get_xdata() - y_data = self.lines[obj].get_ydata() - - # Remove the initial point if the number of points exceeds max_points - if len(x_data) >= self.max_points: - x_data = np.delete(x_data, 0) - y_data = np.delete(y_data, 0) + if count_dict is None: + # Single line update + x_data = np.append(self.line.get_xdata(), float(frame_number)) + y_data = np.append(self.line.get_ydata(), float(self.total_counts)) + + if len(x_data) > self.max_points: + x_data, y_data = x_data[-self.max_points :], y_data[-self.max_points :] + + self.line.set_data(x_data, y_data) + self.line.set_label("Counts") + self.line.set_color("#7b0068") # Pink color + self.line.set_marker("*") + self.line.set_markersize(self.line_width * 5) + else: + labels = list(count_dict.keys()) + counts = list(count_dict.values()) + if plot == "area": + color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"]) + # Multiple lines or area update + x_data = self.ax.lines[0].get_xdata() if self.ax.lines else np.array([]) + y_data_dict = {key: np.array([]) for key in count_dict.keys()} + if self.ax.lines: + for line, key in zip(self.ax.lines, count_dict.keys()): + y_data_dict[key] = line.get_ydata() + + x_data = np.append(x_data, float(frame_number)) + max_length = len(x_data) + for key in count_dict.keys(): + y_data_dict[key] = np.append(y_data_dict[key], float(count_dict[key])) + if len(y_data_dict[key]) < max_length: + y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key]))) + if len(x_data) > self.max_points: + x_data = x_data[1:] + for key in count_dict.keys(): + y_data_dict[key] = y_data_dict[key][1:] + + self.ax.clear() + for key, y_data in y_data_dict.items(): + color = next(color_cycle) + self.ax.fill_between(x_data, y_data, color=color, alpha=0.7) + self.ax.plot( + x_data, + y_data, + color=color, + linewidth=self.line_width, + marker="o", + markersize=self.line_width * 5, + label=f"{key} Data Points", + ) + if plot == "bar": + self.ax.clear() # clear bar data + for label in labels: # Map labels to colors + if label not in self.color_mapping: + self.color_mapping[label] = next(self.color_cycle) + colors = [self.color_mapping[label] for label in labels] + bars = self.ax.bar(labels, counts, color=colors) + for bar, count in zip(bars, counts): + self.ax.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + str(count), + ha="center", + va="bottom", + color=self.fg_color, + ) + # Create the legend using labels from the bars + for bar, label in zip(bars, labels): + bar.set_label(label) # Assign label to each bar + self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color) + if plot == "pie": + total = sum(counts) + percentages = [size / total * 100 for size in counts] + start_angle = 90 + self.ax.clear() + + # Create pie chart and create legend labels with percentages + wedges, autotexts = self.ax.pie( + counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None + ) + legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)] + + # Assign the legend using the wedges and manually created labels + self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) + self.fig.subplots_adjust(left=0.1, right=0.75) # Adjust layout to fit the legend + + # Common plot settings + self.ax.set_facecolor("#f0f0f0") # Set to light gray or any other color you like + self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize) + self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3) + self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3) - x_data = np.append(x_data, float(frame_number)) # Ensure frame_number is converted to float - y_data = np.append(y_data, float(counts_dict.get(obj, 0))) # Ensure total_count is converted to float - self.lines[obj].set_data(x_data, y_data) + # Add and format legend + legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.bg_color) + for text in legend.get_texts(): + text.set_color(self.fg_color) + # Redraw graph, update view, capture, and display the updated plot self.ax.relim() self.ax.autoscale_view() - self.ax.legend() self.canvas.draw() - im0 = np.array(self.canvas.renderer.buffer_rgba()) - self.view_img = False # for multiple line view_img not supported yet, coming soon! - self.write_and_display(im0) - - def write_and_display(self, im0): - """ - Write and display the line graph - Args: - im0 (ndarray): Image for processing. - """ im0 = cv2.cvtColor(im0[:, :, :3], cv2.COLOR_RGBA2BGR) - cv2.imshow(self.title, im0) if self.view_img else None - self.writer.write(im0) if self.save_img else None - - def update_bar(self, count_dict): - """ - Update the bar graph with new data. - - Args: - count_dict (dict): Dictionary containing the count data to plot. - """ - # Update bar graph data - self.ax.clear() - self.ax.set_facecolor(self.bg_color) - labels = list(count_dict.keys()) - counts = list(count_dict.values()) - - # Map labels to colors - for label in labels: - if label not in self.color_mapping: - self.color_mapping[label] = next(self.color_cycle) - - colors = [self.color_mapping[label] for label in labels] - - bars = self.ax.bar(labels, counts, color=colors) - for bar, count in zip(bars, counts): - self.ax.text( - bar.get_x() + bar.get_width() / 2, - bar.get_height(), - str(count), - ha="center", - va="bottom", - color=self.fg_color, - ) - - # Display and save the updated graph - canvas = FigureCanvas(self.fig) - canvas.draw() - buf = canvas.buffer_rgba() - im0 = np.asarray(buf) - self.write_and_display(im0) - - def update_pie(self, classes_dict): - """ - Update the pie chart with new data. - - Args: - classes_dict (dict): Dictionary containing the class data to plot. - """ - # Update pie chart data - labels = list(classes_dict.keys()) - sizes = list(classes_dict.values()) - total = sum(sizes) - percentages = [size / total * 100 for size in sizes] - start_angle = 90 - self.ax.clear() - - # Create pie chart without labels inside the slices - wedges, autotexts = self.ax.pie(sizes, autopct=None, startangle=start_angle, textprops={"color": self.fg_color}) - - # Construct legend labels with percentages - legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)] - self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1)) - - # Adjust layout to fit the legend - self.fig.tight_layout() - self.fig.subplots_adjust(left=0.1, right=0.75) - - # Display and save the updated chart - im0 = self.fig.canvas.draw() - im0 = np.array(self.fig.canvas.renderer.buffer_rgba()) - self.write_and_display(im0) - + self.display_output(im0) -if __name__ == "__main__": - Analytics("line", writer=None, im0_shape=None) + return im0 # Return the image diff --git a/ultralytics/solutions/distance_calculation.py b/ultralytics/solutions/distance_calculation.py index dccd1687c60..c0d8e77b371 100644 --- a/ultralytics/solutions/distance_calculation.py +++ b/ultralytics/solutions/distance_calculation.py @@ -1,73 +1,71 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import math import cv2 -from ultralytics.utils.checks import check_imshow +from ultralytics.solutions.solutions import BaseSolution from ultralytics.utils.plotting import Annotator, colors -class DistanceCalculation: - """A class to calculate distance between two objects in a real-time video stream based on their tracks.""" +class DistanceCalculation(BaseSolution): + """ + A class to calculate distance between two objects in a real-time video stream based on their tracks. - def __init__( - self, - names, - view_img=False, - line_thickness=2, - line_color=(255, 0, 255), - centroid_color=(104, 31, 17), - ): - """ - Initializes the DistanceCalculation class with the given parameters. + This class extends BaseSolution to provide functionality for selecting objects and calculating the distance + between them in a video stream using YOLO object detection and tracking. - Args: - names (dict): Dictionary of classes names. - view_img (bool, optional): Flag to indicate if the video stream should be displayed. Defaults to False. - line_thickness (int, optional): Thickness of the lines drawn on the image. Defaults to 2. - line_color (tuple, optional): Color of the lines drawn on the image (BGR format). Defaults to (255, 255, 0). - centroid_color (tuple, optional): Color of the centroids drawn (BGR format). Defaults to (255, 0, 255). - """ - # Visual & image information - self.im0 = None - self.annotator = None - self.view_img = view_img - self.line_color = line_color - self.centroid_color = centroid_color - - # Prediction & tracking information - self.names = names - self.boxes = None - self.line_thickness = line_thickness - self.trk_ids = None - - # Distance calculation information - self.centroids = [] + Attributes: + left_mouse_count (int): Counter for left mouse button clicks. + selected_boxes (Dict[int, List[float]]): Dictionary to store selected bounding boxes and their track IDs. + annotator (Annotator): An instance of the Annotator class for drawing on the image. + boxes (List[List[float]]): List of bounding boxes for detected objects. + track_ids (List[int]): List of track IDs for detected objects. + clss (List[int]): List of class indices for detected objects. + names (List[str]): List of class names that the model can detect. + centroids (List[List[int]]): List to store centroids of selected bounding boxes. + + Methods: + mouse_event_for_distance: Handles mouse events for selecting objects in the video stream. + calculate: Processes video frames and calculates the distance between selected objects. + + Examples: + >>> distance_calc = DistanceCalculation() + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = distance_calc.calculate(frame) + >>> cv2.imshow("Distance Calculation", processed_frame) + >>> cv2.waitKey(0) + """ + + def __init__(self, **kwargs): + """Initializes the DistanceCalculation class for measuring object distances in video streams.""" + super().__init__(**kwargs) # Mouse event information self.left_mouse_count = 0 self.selected_boxes = {} - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) - self.window_name = "Ultralytics Solutions" + self.centroids = [] # Initialize empty list to store centroids def mouse_event_for_distance(self, event, x, y, flags, param): """ - Handles mouse events to select regions in a real-time video stream. + Handles mouse events to select regions in a real-time video stream for distance calculation. Args: - event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.). + event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN). x (int): X-coordinate of the mouse pointer. y (int): Y-coordinate of the mouse pointer. - flags (int): Flags associated with the event (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY, etc.). - param (dict): Additional parameters passed to the function. + flags (int): Flags associated with the event (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY). + param (Dict): Additional parameters passed to the function. + + Examples: + >>> # Assuming 'dc' is an instance of DistanceCalculation + >>> cv2.setMouseCallback("window_name", dc.mouse_event_for_distance) """ if event == cv2.EVENT_LBUTTONDOWN: self.left_mouse_count += 1 if self.left_mouse_count <= 2: - for box, track_id in zip(self.boxes, self.trk_ids): + for box, track_id in zip(self.boxes, self.track_ids): if box[0] < x < box[2] and box[1] < y < box[3] and track_id not in self.selected_boxes: self.selected_boxes[track_id] = box @@ -75,30 +73,31 @@ def mouse_event_for_distance(self, event, x, y, flags, param): self.selected_boxes = {} self.left_mouse_count = 0 - def start_process(self, im0, tracks): + def calculate(self, im0): """ - Processes the video frame and calculates the distance between two bounding boxes. + Processes a video frame and calculates the distance between two selected bounding boxes. + + This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance + between two user-selected objects if they have been chosen. Args: - im0 (ndarray): The image frame. - tracks (list): List of tracks obtained from the object tracking process. + im0 (numpy.ndarray): The input image frame to process. Returns: - (ndarray): The processed image frame. + (numpy.ndarray): The processed image frame with annotations and distance calculations. + + Examples: + >>> import numpy as np + >>> from ultralytics.solutions import DistanceCalculation + >>> dc = DistanceCalculation() + >>> frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) + >>> processed_frame = dc.calculate(frame) """ - self.im0 = im0 - if tracks[0].boxes.id is None: - if self.view_img: - self.display_frames() - return im0 - - self.boxes = tracks[0].boxes.xyxy.cpu() - clss = tracks[0].boxes.cls.cpu().tolist() - self.trk_ids = tracks[0].boxes.id.int().cpu().tolist() + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks - self.annotator = Annotator(self.im0, line_width=self.line_thickness) - - for box, cls, track_id in zip(self.boxes, clss, self.trk_ids): + # Iterate over bounding boxes, track ids and classes index + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): self.annotator.box_label(box, color=colors(int(cls), True), label=self.names[int(cls)]) if len(self.selected_boxes) == 2: @@ -115,25 +114,11 @@ def start_process(self, im0, tracks): pixels_distance = math.sqrt( (self.centroids[0][0] - self.centroids[1][0]) ** 2 + (self.centroids[0][1] - self.centroids[1][1]) ** 2 ) - self.annotator.plot_distance_and_line(pixels_distance, self.centroids, self.line_color, self.centroid_color) + self.annotator.plot_distance_and_line(pixels_distance, self.centroids) self.centroids = [] - if self.view_img and self.env_check: - self.display_frames() - - return im0 - - def display_frames(self): - """Displays the current frame with annotations.""" - cv2.namedWindow(self.window_name) - cv2.setMouseCallback(self.window_name, self.mouse_event_for_distance) - cv2.imshow(self.window_name, self.im0) - - if cv2.waitKey(1) & 0xFF == ord("q"): - return - + self.display_output(im0) # display output with base class function + cv2.setMouseCallback("Ultralytics Solutions", self.mouse_event_for_distance) -if __name__ == "__main__": - names = {0: "person", 1: "car"} # example class names - distance_calculation = DistanceCalculation(names) + return im0 # return output image for more usage diff --git a/ultralytics/solutions/heatmap.py b/ultralytics/solutions/heatmap.py index 728b167bc86..c0ee1494959 100644 --- a/ultralytics/solutions/heatmap.py +++ b/ultralytics/solutions/heatmap.py @@ -1,259 +1,127 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -from collections import defaultdict +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import cv2 import numpy as np -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.object_counter import ObjectCounter from ultralytics.utils.plotting import Annotator -check_requirements("shapely>=2.0.0") -from shapely.geometry import LineString, Point, Polygon +class Heatmap(ObjectCounter): + """ + A class to draw heatmaps in real-time video streams based on object tracks. + + This class extends the ObjectCounter class to generate and visualize heatmaps of object movements in video + streams. It uses tracked object positions to create a cumulative heatmap effect over time. + Attributes: + initialized (bool): Flag indicating whether the heatmap has been initialized. + colormap (int): OpenCV colormap used for heatmap visualization. + heatmap (np.ndarray): Array storing the cumulative heatmap data. + annotator (Annotator): Object for drawing annotations on the image. -class Heatmap: - """A class to draw heatmaps in real-time video stream based on their tracks.""" + Methods: + heatmap_effect: Calculates and updates the heatmap effect for a given bounding box. + generate_heatmap: Generates and applies the heatmap effect to each frame. - def __init__( - self, - names, - imw=0, - imh=0, - colormap=cv2.COLORMAP_JET, - heatmap_alpha=0.5, - view_img=False, - view_in_counts=True, - view_out_counts=True, - count_reg_pts=None, - count_txt_color=(0, 0, 0), - count_bg_color=(255, 255, 255), - count_reg_color=(255, 0, 255), - region_thickness=5, - line_dist_thresh=15, - line_thickness=2, - decay_factor=0.99, - shape="circle", - ): - """Initializes the heatmap class with default values for Visual, Image, track, count and heatmap parameters.""" - # Visual information - self.annotator = None - self.view_img = view_img - self.shape = shape + Examples: + >>> from ultralytics.solutions import Heatmap + >>> heatmap = Heatmap(model="yolo11n.pt", colormap=cv2.COLORMAP_JET) + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = heatmap.generate_heatmap(frame) + """ - self.initialized = False - self.names = names # Classes names + def __init__(self, **kwargs): + """Initializes the Heatmap class for real-time video stream heatmap generation based on object tracks.""" + super().__init__(**kwargs) - # Image information - self.imw = imw - self.imh = imh - self.im0 = None - self.tf = line_thickness - self.view_in_counts = view_in_counts - self.view_out_counts = view_out_counts + self.initialized = False # bool variable for heatmap initialization + if self.region is not None: # check if user provided the region coordinates + self.initialize_region() - # Heatmap colormap and heatmap np array - self.colormap = colormap + # store colormap + self.colormap = cv2.COLORMAP_PARULA if self.CFG["colormap"] is None else self.CFG["colormap"] self.heatmap = None - self.heatmap_alpha = heatmap_alpha - # Predict/track information - self.boxes = [] - self.track_ids = [] - self.clss = [] - self.track_history = defaultdict(list) + def heatmap_effect(self, box): + """ + Efficiently calculates heatmap area and effect location for applying colormap. - # Region & Line Information - self.counting_region = None - self.line_dist_thresh = line_dist_thresh - self.region_thickness = region_thickness - self.region_color = count_reg_color + Args: + box (List[float]): Bounding box coordinates [x0, y0, x1, y1]. - # Object Counting Information - self.in_counts = 0 - self.out_counts = 0 - self.count_ids = [] - self.class_wise_count = {} - self.count_txt_color = count_txt_color - self.count_bg_color = count_bg_color - self.cls_txtdisplay_gap = 50 + Examples: + >>> heatmap = Heatmap() + >>> box = [100, 100, 200, 200] + >>> heatmap.heatmap_effect(box) + """ + x0, y0, x1, y1 = map(int, box) + radius_squared = (min(x1 - x0, y1 - y0) // 2) ** 2 - # Decay factor - self.decay_factor = decay_factor + # Create a meshgrid with region of interest (ROI) for vectorized distance calculations + xv, yv = np.meshgrid(np.arange(x0, x1), np.arange(y0, y1)) - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) + # Calculate squared distances from the center + dist_squared = (xv - ((x0 + x1) // 2)) ** 2 + (yv - ((y0 + y1) // 2)) ** 2 - # Region and line selection - self.count_reg_pts = count_reg_pts - print(self.count_reg_pts) - if self.count_reg_pts is not None: - if len(self.count_reg_pts) == 2: - print("Line Counter Initiated.") - self.counting_region = LineString(self.count_reg_pts) - elif len(self.count_reg_pts) >= 3: - print("Polygon Counter Initiated.") - self.counting_region = Polygon(self.count_reg_pts) - else: - print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.") - print("Using Line Counter Now") - self.counting_region = LineString(self.count_reg_pts) + # Create a mask of points within the radius + within_radius = dist_squared <= radius_squared - # Shape of heatmap, if not selected - if self.shape not in {"circle", "rect"}: - print("Unknown shape value provided, 'circle' & 'rect' supported") - print("Using Circular shape now") - self.shape = "circle" + # Update only the values within the bounding box in a single vectorized operation + self.heatmap[y0:y1, x0:x1][within_radius] += 2 - def extract_results(self, tracks): + def generate_heatmap(self, im0): """ - Extracts results from the provided data. + Generate heatmap for each frame using Ultralytics. Args: - tracks (list): List of tracks obtained from the object tracking process. - """ - if tracks[0].boxes.id is not None: - self.boxes = tracks[0].boxes.xyxy.cpu() - self.clss = tracks[0].boxes.cls.tolist() - self.track_ids = tracks[0].boxes.id.int().tolist() + im0 (np.ndarray): Input image array for processing. - def generate_heatmap(self, im0, tracks): - """ - Generate heatmap based on tracking data. + Returns: + (np.ndarray): Processed image with heatmap overlay and object counts (if region is specified). - Args: - im0 (nd array): Image - tracks (list): List of tracks obtained from the object tracking process. + Examples: + >>> heatmap = Heatmap() + >>> im0 = cv2.imread("image.jpg") + >>> result = heatmap.generate_heatmap(im0) """ - self.im0 = im0 - - # Initialize heatmap only once if not self.initialized: - self.heatmap = np.zeros((int(self.im0.shape[0]), int(self.im0.shape[1])), dtype=np.float32) - self.initialized = True - - self.heatmap *= self.decay_factor # decay factor - - self.extract_results(tracks) - self.annotator = Annotator(self.im0, self.tf, None) - - if self.track_ids: - # Draw counting region - if self.count_reg_pts is not None: - self.annotator.draw_region( - reg_pts=self.count_reg_pts, color=self.region_color, thickness=self.region_thickness - ) - - for box, cls, track_id in zip(self.boxes, self.clss, self.track_ids): - # Store class info - if self.names[cls] not in self.class_wise_count: - self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0} - - if self.shape == "circle": - center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)) - radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2 - - y, x = np.ogrid[0 : self.heatmap.shape[0], 0 : self.heatmap.shape[1]] - mask = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= radius**2 - - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += ( - 2 * mask[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] - ) - - else: - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += 2 - - # Store tracking hist - track_line = self.track_history[track_id] - track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) - if len(track_line) > 30: - track_line.pop(0) - - prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None - - if self.count_reg_pts is not None: - # Count objects in any polygon - if len(self.count_reg_pts) >= 3: - is_inside = self.counting_region.contains(Point(track_line[-1])) - - if prev_position is not None and is_inside and track_id not in self.count_ids: - self.count_ids.append(track_id) - - if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - # Count objects using line - elif len(self.count_reg_pts) == 2: - if prev_position is not None and track_id not in self.count_ids: - distance = Point(track_line[-1]).distance(self.counting_region) - if distance < self.line_dist_thresh and track_id not in self.count_ids: - self.count_ids.append(track_id) - - if (box[0] - prev_position[0]) * ( - self.counting_region.centroid.x - prev_position[0] - ) > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - else: - for box, cls in zip(self.boxes, self.clss): - if self.shape == "circle": - center = (int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)) - radius = min(int(box[2]) - int(box[0]), int(box[3]) - int(box[1])) // 2 - - y, x = np.ogrid[0 : self.heatmap.shape[0], 0 : self.heatmap.shape[1]] - mask = (x - center[0]) ** 2 + (y - center[1]) ** 2 <= radius**2 - - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += ( - 2 * mask[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] - ) - - else: - self.heatmap[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] += 2 - - if self.count_reg_pts is not None: - labels_dict = {} - - for key, value in self.class_wise_count.items(): - if value["IN"] != 0 or value["OUT"] != 0: - if not self.view_in_counts and not self.view_out_counts: - continue - elif not self.view_in_counts: - labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}" - elif not self.view_out_counts: - labels_dict[str.capitalize(key)] = f"IN {value['IN']}" - else: - labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}" - - if labels_dict is not None: - self.annotator.display_analytics(self.im0, labels_dict, self.count_txt_color, self.count_bg_color, 10) + self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99 + self.initialized = True # Initialize heatmap only once + + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks + + # Iterate over bounding boxes, track ids and classes index + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + # Draw bounding box and counting region + self.heatmap_effect(box) + + if self.region is not None: + self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2) + self.store_tracking_history(track_id, box) # Store track history + self.store_classwise_counts(cls) # store classwise counts in dict + current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2) + # Store tracking previous position and perform object counting + prev_position = None + if len(self.track_history[track_id]) > 1: + prev_position = self.track_history[track_id][-2] + self.count_objects(current_centroid, track_id, prev_position, cls) # Perform object counting + + if self.region is not None: + self.display_counts(im0) # Display the counts on the frame # Normalize, apply colormap to heatmap and combine with original image - heatmap_normalized = cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX) - heatmap_colored = cv2.applyColorMap(heatmap_normalized.astype(np.uint8), self.colormap) - self.im0 = cv2.addWeighted(self.im0, 1 - self.heatmap_alpha, heatmap_colored, self.heatmap_alpha, 0) - - if self.env_check and self.view_img: - self.display_frames() - - return self.im0 - - def display_frames(self): - """Display frame.""" - cv2.imshow("Ultralytics Heatmap", self.im0) - - if cv2.waitKey(1) & 0xFF == ord("q"): - return - - -if __name__ == "__main__": - classes_names = {0: "person", 1: "car"} # example class names - heatmap = Heatmap(classes_names) + if self.track_data.id is not None: + im0 = cv2.addWeighted( + im0, + 0.5, + cv2.applyColorMap( + cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8), self.colormap + ), + 0.5, + 0, + ) + + self.display_output(im0) # display output with base class function + return im0 # return output image for more usage diff --git a/ultralytics/solutions/object_counter.py b/ultralytics/solutions/object_counter.py index cc7fe459462..d202ca51f51 100644 --- a/ultralytics/solutions/object_counter.py +++ b/ultralytics/solutions/object_counter.py @@ -1,243 +1,203 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -from collections import defaultdict - -import cv2 - -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.solutions import BaseSolution from ultralytics.utils.plotting import Annotator, colors -check_requirements("shapely>=2.0.0") - -from shapely.geometry import LineString, Point, Polygon +class ObjectCounter(BaseSolution): + """ + A class to manage the counting of objects in a real-time video stream based on their tracks. + + This class extends the BaseSolution class and provides functionality for counting objects moving in and out of a + specified region in a video stream. It supports both polygonal and linear regions for counting. + + Attributes: + in_count (int): Counter for objects moving inward. + out_count (int): Counter for objects moving outward. + counted_ids (List[int]): List of IDs of objects that have been counted. + classwise_counts (Dict[str, Dict[str, int]]): Dictionary for counts, categorized by object class. + region_initialized (bool): Flag indicating whether the counting region has been initialized. + show_in (bool): Flag to control display of inward count. + show_out (bool): Flag to control display of outward count. + + Methods: + count_objects: Counts objects within a polygonal or linear region. + store_classwise_counts: Initializes class-wise counts if not already present. + display_counts: Displays object counts on the frame. + count: Processes input data (frames or object tracks) and updates counts. + + Examples: + >>> counter = ObjectCounter() + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = counter.count(frame) + >>> print(f"Inward count: {counter.in_count}, Outward count: {counter.out_count}") + """ + + def __init__(self, **kwargs): + """Initializes the ObjectCounter class for real-time object counting in video streams.""" + super().__init__(**kwargs) + + self.in_count = 0 # Counter for objects moving inward + self.out_count = 0 # Counter for objects moving outward + self.counted_ids = [] # List of IDs of objects that have been counted + self.classwise_counts = {} # Dictionary for counts, categorized by object class + self.region_initialized = False # Bool variable for region initialization + + self.show_in = self.CFG["show_in"] + self.show_out = self.CFG["show_out"] + + def count_objects(self, current_centroid, track_id, prev_position, cls): + """ + Counts objects within a polygonal or linear region based on their tracks. -class ObjectCounter: - """A class to manage the counting of objects in a real-time video stream based on their tracks.""" + Args: + current_centroid (Tuple[float, float]): Current centroid values in the current frame. + track_id (int): Unique identifier for the tracked object. + prev_position (Tuple[float, float]): Last frame position coordinates (x, y) of the track. + cls (int): Class index for classwise count updates. + + Examples: + >>> counter = ObjectCounter() + >>> track_line = {1: [100, 200], 2: [110, 210], 3: [120, 220]} + >>> box = [130, 230, 150, 250] + >>> track_id = 1 + >>> prev_position = (120, 220) + >>> cls = 0 + >>> counter.count_objects(current_centroid, track_id, prev_position, cls) + """ + if prev_position is None or track_id in self.counted_ids: + return + + if len(self.region) == 2: # Linear region (defined as a line segment) + line = self.LineString(self.region) # Check if the line intersects the trajectory of the object + if line.intersects(self.LineString([prev_position, current_centroid])): + # Determine orientation of the region (vertical or horizontal) + if abs(self.region[0][0] - self.region[1][0]) < abs(self.region[0][1] - self.region[1][1]): + # Vertical region: Compare x-coordinates to determine direction + if current_centroid[0] > prev_position[0]: # Moving right + self.in_count += 1 + self.classwise_counts[self.names[cls]]["IN"] += 1 + else: # Moving left + self.out_count += 1 + self.classwise_counts[self.names[cls]]["OUT"] += 1 + # Horizontal region: Compare y-coordinates to determine direction + elif current_centroid[1] > prev_position[1]: # Moving downward + self.in_count += 1 + self.classwise_counts[self.names[cls]]["IN"] += 1 + else: # Moving upward + self.out_count += 1 + self.classwise_counts[self.names[cls]]["OUT"] += 1 + self.counted_ids.append(track_id) + + elif len(self.region) > 2: # Polygonal region + polygon = self.Polygon(self.region) + if polygon.contains(self.Point(current_centroid)): + # Determine motion direction for vertical or horizontal polygons + region_width = max(p[0] for p in self.region) - min(p[0] for p in self.region) + region_height = max(p[1] for p in self.region) - min(p[1] for p in self.region) - def __init__( - self, - names, - reg_pts=None, - line_thickness=2, - view_img=False, - view_in_counts=True, - view_out_counts=True, - draw_tracks=False, - ): + if ( + region_width < region_height + and current_centroid[0] > prev_position[0] + or region_width >= region_height + and current_centroid[1] > prev_position[1] + ): # Moving right + self.in_count += 1 + self.classwise_counts[self.names[cls]]["IN"] += 1 + else: # Moving left + self.out_count += 1 + self.classwise_counts[self.names[cls]]["OUT"] += 1 + self.counted_ids.append(track_id) + + def store_classwise_counts(self, cls): """ - Initializes the ObjectCounter with various tracking and counting parameters. + Initialize class-wise counts for a specific object class if not already present. Args: - names (dict): Dictionary of class names. - reg_pts (list): List of points defining the counting region. - line_thickness (int): Line thickness for bounding boxes. - view_img (bool): Flag to control whether to display the video stream. - view_in_counts (bool): Flag to control whether to display the in counts on the video stream. - view_out_counts (bool): Flag to control whether to display the out counts on the video stream. - draw_tracks (bool): Flag to control whether to draw the object tracks. + cls (int): Class index for classwise count updates. + + This method ensures that the 'classwise_counts' dictionary contains an entry for the specified class, + initializing 'IN' and 'OUT' counts to zero if the class is not already present. + + Examples: + >>> counter = ObjectCounter() + >>> counter.store_classwise_counts(0) # Initialize counts for class index 0 + >>> print(counter.classwise_counts) + {'person': {'IN': 0, 'OUT': 0}} """ - # Mouse events - self.is_drawing = False - self.selected_point = None - - # Region & Line Information - self.reg_pts = [(20, 400), (1260, 400)] if reg_pts is None else reg_pts - self.counting_region = None - - # Image and annotation Information - self.im0 = None - self.tf = line_thickness - self.view_img = view_img - self.view_in_counts = view_in_counts - self.view_out_counts = view_out_counts - - self.names = names # Classes names - self.window_name = "Ultralytics YOLOv8 Object Counter" - - # Object counting Information - self.in_counts = 0 - self.out_counts = 0 - self.count_ids = [] - self.class_wise_count = {} - - # Tracks info - self.track_history = defaultdict(list) - self.draw_tracks = draw_tracks - - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) - - # Initialize counting region - if len(self.reg_pts) == 2: - print("Line Counter Initiated.") - self.counting_region = LineString(self.reg_pts) - elif len(self.reg_pts) >= 3: - print("Polygon Counter Initiated.") - self.counting_region = Polygon(self.reg_pts) - else: - print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.") - print("Using Line Counter Now") - self.counting_region = LineString(self.reg_pts) - - # Define the counting line segment - self.counting_line_segment = LineString( - [ - (self.reg_pts[0][0], self.reg_pts[0][1]), - (self.reg_pts[1][0], self.reg_pts[1][1]), - ] - ) - - def mouse_event_for_region(self, event, x, y, flags, params): + if self.names[cls] not in self.classwise_counts: + self.classwise_counts[self.names[cls]] = {"IN": 0, "OUT": 0} + + def display_counts(self, im0): """ - Handles mouse events for defining and moving the counting region in a real-time video stream. + Displays object counts on the input image or frame. Args: - event (int): The type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.). - x (int): The x-coordinate of the mouse pointer. - y (int): The y-coordinate of the mouse pointer. - flags (int): Any associated event flags (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY, etc.). - params (dict): Additional parameters for the function. + im0 (numpy.ndarray): The input image or frame to display counts on. + + Examples: + >>> counter = ObjectCounter() + >>> frame = cv2.imread("image.jpg") + >>> counter.display_counts(frame) """ - if event == cv2.EVENT_LBUTTONDOWN: - for i, point in enumerate(self.reg_pts): - if ( - isinstance(point, (tuple, list)) - and len(point) >= 2 - and (abs(x - point[0]) < 10 and abs(y - point[1]) < 10) - ): - self.selected_point = i - self.is_drawing = True - break - - elif event == cv2.EVENT_MOUSEMOVE: - if self.is_drawing and self.selected_point is not None: - self.reg_pts[self.selected_point] = (x, y) - self.counting_region = Polygon(self.reg_pts) - - elif event == cv2.EVENT_LBUTTONUP: - self.is_drawing = False - self.selected_point = None - - def extract_and_process_tracks(self, tracks): - """Extracts and processes tracks for object counting in a video stream.""" - # Annotator Init and region drawing - annotator = Annotator(self.im0, self.tf, self.names) - - # Draw region or line - annotator.draw_region(reg_pts=self.reg_pts, color=(104, 0, 123), thickness=self.tf * 2) - - # Extract tracks for OBB or object detection - track_data = tracks[0].obb or tracks[0].boxes - - if track_data and track_data.id is not None: - boxes = track_data.xyxy.cpu() - clss = track_data.cls.cpu().tolist() - track_ids = track_data.id.int().cpu().tolist() - - # Extract tracks - for box, track_id, cls in zip(boxes, track_ids, clss): - # Draw bounding box - annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True)) - - # Store class info - if self.names[cls] not in self.class_wise_count: - self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0} - - # Draw Tracks - track_line = self.track_history[track_id] - track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) - if len(track_line) > 30: - track_line.pop(0) - - # Draw track trails - if self.draw_tracks: - annotator.draw_centroid_and_tracks( - track_line, - color=colors(int(track_id), True), - track_thickness=self.tf, - ) - - prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None - - # Count objects in any polygon - if len(self.reg_pts) >= 3: - is_inside = self.counting_region.contains(Point(track_line[-1])) - - if prev_position is not None and is_inside and track_id not in self.count_ids: - self.count_ids.append(track_id) - - if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - # Count objects using line - elif len(self.reg_pts) == 2: - if ( - prev_position is not None - and track_id not in self.count_ids - and LineString([(prev_position[0], prev_position[1]), (box[0], box[1])]).intersects( - self.counting_line_segment - ) - ): - self.count_ids.append(track_id) - - # Determine the direction of movement (IN or OUT) - dx = (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) - dy = (box[1] - prev_position[1]) * (self.counting_region.centroid.y - prev_position[1]) - if dx > 0 and dy > 0: - self.in_counts += 1 - self.class_wise_count[self.names[cls]]["IN"] += 1 - else: - self.out_counts += 1 - self.class_wise_count[self.names[cls]]["OUT"] += 1 - - labels_dict = {} - - for key, value in self.class_wise_count.items(): - if value["IN"] != 0 or value["OUT"] != 0: - if not self.view_in_counts and not self.view_out_counts: - continue - elif not self.view_in_counts: - labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}" - elif not self.view_out_counts: - labels_dict[str.capitalize(key)] = f"IN {value['IN']}" - else: - labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}" + labels_dict = { + str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} " + f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip() + for key, value in self.classwise_counts.items() + if value["IN"] != 0 or value["OUT"] != 0 + } if labels_dict: - annotator.display_analytics(self.im0, labels_dict, (104, 31, 17), (255, 255, 255), 10) - - def display_frames(self): - """Displays the current frame with annotations and regions in a window.""" - if self.env_check: - cv2.namedWindow(self.window_name) - if len(self.reg_pts) == 4: # only add mouse event If user drawn region - cv2.setMouseCallback(self.window_name, self.mouse_event_for_region, {"region_points": self.reg_pts}) - cv2.imshow(self.window_name, self.im0) - # Break Window - if cv2.waitKey(1) & 0xFF == ord("q"): - return - - def start_counting(self, im0, tracks): - """ - Main function to start the object counting process. + self.annotator.display_analytics(im0, labels_dict, (104, 31, 17), (255, 255, 255), 10) - Args: - im0 (ndarray): Current frame from the video stream. - tracks (list): List of tracks obtained from the object tracking process. + def count(self, im0): """ - self.im0 = im0 # store image - self.extract_and_process_tracks(tracks) # draw region even if no objects + Processes input data (frames or object tracks) and updates object counts. + + This method initializes the counting region, extracts tracks, draws bounding boxes and regions, updates + object counts, and displays the results on the input image. - if self.view_img: - self.display_frames() - return self.im0 + Args: + im0 (numpy.ndarray): The input image or frame to be processed. + Returns: + (numpy.ndarray): The processed image with annotations and count information. -if __name__ == "__main__": - classes_names = {0: "person", 1: "car"} # example class names - ObjectCounter(classes_names) + Examples: + >>> counter = ObjectCounter() + >>> frame = cv2.imread("path/to/image.jpg") + >>> processed_frame = counter.count(frame) + """ + if not self.region_initialized: + self.initialize_region() + self.region_initialized = True + + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks + + self.annotator.draw_region( + reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2 + ) # Draw region + + # Iterate over bounding boxes, track ids and classes index + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + # Draw bounding box and counting region + self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True)) + self.store_tracking_history(track_id, box) # Store track history + self.store_classwise_counts(cls) # store classwise counts in dict + + # Draw tracks of objects + self.annotator.draw_centroid_and_tracks( + self.track_line, color=colors(int(cls), True), track_thickness=self.line_width + ) + current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2) + # store previous position of track for object counting + prev_position = None + if len(self.track_history[track_id]) > 1: + prev_position = self.track_history[track_id][-2] + self.count_objects(current_centroid, track_id, prev_position, cls) # Perform object counting + + self.display_counts(im0) # Display the counts on the frame + self.display_output(im0) # display output with base class function + + return im0 # return output image for more usage diff --git a/ultralytics/solutions/parking_management.py b/ultralytics/solutions/parking_management.py index ef58ad62744..be1c8c9b3f4 100644 --- a/ultralytics/solutions/parking_management.py +++ b/ultralytics/solutions/parking_management.py @@ -1,241 +1,246 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import json import cv2 import numpy as np -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.solutions import BaseSolution +from ultralytics.utils import LOGGER +from ultralytics.utils.checks import check_requirements from ultralytics.utils.plotting import Annotator class ParkingPtsSelection: - """Class for selecting and managing parking zone points on images using a Tkinter-based UI.""" + """ + A class for selecting and managing parking zone points on images using a Tkinter-based UI. + + This class provides functionality to upload an image, select points to define parking zones, and save the + selected points to a JSON file. It uses Tkinter for the graphical user interface. + + Attributes: + tk (module): The Tkinter module for GUI operations. + filedialog (module): Tkinter's filedialog module for file selection operations. + messagebox (module): Tkinter's messagebox module for displaying message boxes. + master (tk.Tk): The main Tkinter window. + canvas (tk.Canvas): The canvas widget for displaying the image and drawing bounding boxes. + image (PIL.Image.Image): The uploaded image. + canvas_image (ImageTk.PhotoImage): The image displayed on the canvas. + rg_data (List[List[Tuple[int, int]]]): List of bounding boxes, each defined by 4 points. + current_box (List[Tuple[int, int]]): Temporary storage for the points of the current bounding box. + imgw (int): Original width of the uploaded image. + imgh (int): Original height of the uploaded image. + canvas_max_width (int): Maximum width of the canvas. + canvas_max_height (int): Maximum height of the canvas. + + Methods: + initialize_properties: Initializes the necessary properties. + upload_image: Uploads an image, resizes it to fit the canvas, and displays it. + on_canvas_click: Handles mouse clicks to add points for bounding boxes. + draw_box: Draws a bounding box on the canvas. + remove_last_bounding_box: Removes the last bounding box and redraws the canvas. + redraw_canvas: Redraws the canvas with the image and all bounding boxes. + save_to_json: Saves the bounding boxes to a JSON file. + + Examples: + >>> parking_selector = ParkingPtsSelection() + >>> # Use the GUI to upload an image, select parking zones, and save the data + """ def __init__(self): - """Initializes the UI for selecting parking zone points in a tkinter window.""" + """Initializes the ParkingPtsSelection class, setting up UI and properties for parking zone point selection.""" check_requirements("tkinter") + import tkinter as tk + from tkinter import filedialog, messagebox - import tkinter as tk # scope for multi-environment compatibility - - self.tk = tk - self.master = tk.Tk() + self.tk, self.filedialog, self.messagebox = tk, filedialog, messagebox + self.master = self.tk.Tk() # Reference to the main application window or parent widget self.master.title("Ultralytics Parking Zones Points Selector") - - # Disable window resizing self.master.resizable(False, False) - # Setup canvas for image display - self.canvas = self.tk.Canvas(self.master, bg="white") + self.canvas = self.tk.Canvas(self.master, bg="white") # Canvas widget for displaying images or graphics + self.canvas.pack(side=self.tk.BOTTOM) + + self.image = None # Variable to store the loaded image + self.canvas_image = None # Reference to the image displayed on the canvas + self.canvas_max_width = None # Maximum allowed width for the canvas + self.canvas_max_height = None # Maximum allowed height for the canvas + self.rg_data = None # Data related to region or annotation management + self.current_box = None # Stores the currently selected or active bounding box + self.imgh = None # Height of the current image + self.imgw = None # Width of the current image - # Setup buttons + # Button frame with buttons button_frame = self.tk.Frame(self.master) button_frame.pack(side=self.tk.TOP) - self.tk.Button(button_frame, text="Upload Image", command=self.upload_image).grid(row=0, column=0) - self.tk.Button(button_frame, text="Remove Last BBox", command=self.remove_last_bounding_box).grid( - row=0, column=1 - ) - self.tk.Button(button_frame, text="Save", command=self.save_to_json).grid(row=0, column=2) - - # Initialize properties - self.image_path = None - self.image = None - self.canvas_image = None - self.rg_data = [] # region coordinates - self.current_box = [] - self.imgw = 0 # image width - self.imgh = 0 # image height - - # Constants - self.canvas_max_width = 1280 - self.canvas_max_height = 720 + for text, cmd in [ + ("Upload Image", self.upload_image), + ("Remove Last BBox", self.remove_last_bounding_box), + ("Save", self.save_to_json), + ]: + self.tk.Button(button_frame, text=text, command=cmd).pack(side=self.tk.LEFT) + self.initialize_properties() self.master.mainloop() - def upload_image(self): - """Upload an image and resize it to fit canvas.""" - from tkinter import filedialog + def initialize_properties(self): + """Initialize properties for image, canvas, bounding boxes, and dimensions.""" + self.image = self.canvas_image = None + self.rg_data, self.current_box = [], [] + self.imgw = self.imgh = 0 + self.canvas_max_width, self.canvas_max_height = 1280, 720 + def upload_image(self): + """Uploads and displays an image on the canvas, resizing it to fit within specified dimensions.""" from PIL import Image, ImageTk # scope because ImageTk requires tkinter package - self.image_path = filedialog.askopenfilename(filetypes=[("Image Files", "*.png;*.jpg;*.jpeg")]) - if not self.image_path: + self.image = Image.open(self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg")])) + if not self.image: return - self.image = Image.open(self.image_path) self.imgw, self.imgh = self.image.size - - # Calculate the aspect ratio and resize image aspect_ratio = self.imgw / self.imgh - if aspect_ratio > 1: - # Landscape orientation - canvas_width = min(self.canvas_max_width, self.imgw) - canvas_height = int(canvas_width / aspect_ratio) - else: - # Portrait orientation - canvas_height = min(self.canvas_max_height, self.imgh) - canvas_width = int(canvas_height * aspect_ratio) - - # Check if canvas is already initialized - if self.canvas: - self.canvas.destroy() # Destroy previous canvas - - self.canvas = self.tk.Canvas(self.master, bg="white", width=canvas_width, height=canvas_height) - resized_image = self.image.resize((canvas_width, canvas_height), Image.LANCZOS) - self.canvas_image = ImageTk.PhotoImage(resized_image) - self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image) + canvas_width = ( + min(self.canvas_max_width, self.imgw) if aspect_ratio > 1 else int(self.canvas_max_height * aspect_ratio) + ) + canvas_height = ( + min(self.canvas_max_height, self.imgh) if aspect_ratio <= 1 else int(canvas_width / aspect_ratio) + ) - self.canvas.pack(side=self.tk.BOTTOM) + self.canvas.config(width=canvas_width, height=canvas_height) + self.canvas_image = ImageTk.PhotoImage(self.image.resize((canvas_width, canvas_height))) + self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image) self.canvas.bind("", self.on_canvas_click) - # Reset bounding boxes and current box - self.rg_data = [] - self.current_box = [] + self.rg_data.clear(), self.current_box.clear() def on_canvas_click(self, event): - """Handle mouse clicks on canvas to create points for bounding boxes.""" + """Handles mouse clicks to add points for bounding boxes on the canvas.""" self.current_box.append((event.x, event.y)) self.canvas.create_oval(event.x - 3, event.y - 3, event.x + 3, event.y + 3, fill="red") - if len(self.current_box) == 4: - self.rg_data.append(self.current_box) - [ - self.canvas.create_line(self.current_box[i], self.current_box[(i + 1) % 4], fill="blue", width=2) - for i in range(4) - ] - self.current_box = [] + self.rg_data.append(self.current_box.copy()) + self.draw_box(self.current_box) + self.current_box.clear() - def remove_last_bounding_box(self): - """Remove the last drawn bounding box from canvas.""" - from tkinter import messagebox # scope for multi-environment compatibility + def draw_box(self, box): + """Draws a bounding box on the canvas using the provided coordinates.""" + for i in range(4): + self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2) - if self.rg_data: - self.rg_data.pop() # Remove the last bounding box - self.canvas.delete("all") # Clear the canvas - self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image) # Redraw the image + def remove_last_bounding_box(self): + """Removes the last bounding box from the list and redraws the canvas.""" + if not self.rg_data: + self.messagebox.showwarning("Warning", "No bounding boxes to remove.") + return + self.rg_data.pop() + self.redraw_canvas() - # Redraw all bounding boxes - for box in self.rg_data: - [self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2) for i in range(4)] - messagebox.showinfo("Success", "Last bounding box removed.") - else: - messagebox.showwarning("Warning", "No bounding boxes to remove.") + def redraw_canvas(self): + """Redraws the canvas with the image and all bounding boxes.""" + self.canvas.delete("all") + self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image) + for box in self.rg_data: + self.draw_box(box) def save_to_json(self): - """Saves rescaled bounding boxes to 'bounding_boxes.json' based on image-to-canvas size ratio.""" - from tkinter import messagebox # scope for multi-environment compatibility + """Saves the selected parking zone points to a JSON file with scaled coordinates.""" + scale_w, scale_h = self.imgw / self.canvas.winfo_width(), self.imgh / self.canvas.winfo_height() + data = [{"points": [(int(x * scale_w), int(y * scale_h)) for x, y in box]} for box in self.rg_data] - rg_data = [] # regions data - for box in self.rg_data: - rs_box = [ - ( - int(x * self.imgw / self.canvas.winfo_width()), # width scaling - int(y * self.imgh / self.canvas.winfo_height()), # height scaling - ) - for x, y in box - ] - rg_data.append({"points": rs_box}) - with open("bounding_boxes.json", "w") as f: - json.dump(rg_data, f, indent=4) - - messagebox.showinfo("Success", "Bounding boxes saved to bounding_boxes.json") - - -class ParkingManagement: - """Manages parking occupancy and availability using YOLOv8 for real-time monitoring and visualization.""" - - def __init__( - self, - model, # Ultralytics YOLO model file path - json_file, # Parking management annotation file created from Parking Annotator - occupied_region_color=(0, 0, 255), # occupied region color - available_region_color=(0, 255, 0), # available region color - ): - """ - Initializes the parking management system with a YOLOv8 model and visualization settings. + from io import StringIO # Function level import, as it's only required to store coordinates, not every frame - Args: - model (str): Path to the YOLOv8 model. - json_file (str): file that have all parking slot points data - occupied_region_color (tuple): RGB color tuple for occupied regions. - available_region_color (tuple): RGB color tuple for available regions. - """ - # Model initialization - from ultralytics import YOLO + write_buffer = StringIO() + json.dump(data, write_buffer, indent=4) + with open("bounding_boxes.json", "w", encoding="utf-8") as f: + f.write(write_buffer.getvalue()) + self.messagebox.showinfo("Success", "Bounding boxes saved to bounding_boxes.json") - self.model = YOLO(model) - # Load JSON data - with open(json_file) as f: - self.json_data = json.load(f) +class ParkingManagement(BaseSolution): + """ + Manages parking occupancy and availability using YOLO model for real-time monitoring and visualization. - self.pr_info = {"Occupancy": 0, "Available": 0} # dictionary for parking information + This class extends BaseSolution to provide functionality for parking lot management, including detection of + occupied spaces, visualization of parking regions, and display of occupancy statistics. - self.occ = occupied_region_color - self.arc = available_region_color + Attributes: + json_file (str): Path to the JSON file containing parking region details. + json (List[Dict]): Loaded JSON data containing parking region information. + pr_info (Dict[str, int]): Dictionary storing parking information (Occupancy and Available spaces). + arc (Tuple[int, int, int]): RGB color tuple for available region visualization. + occ (Tuple[int, int, int]): RGB color tuple for occupied region visualization. + dc (Tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects. - self.env_check = check_imshow(warn=True) # check if environment supports imshow + Methods: + process_data: Processes model data for parking lot management and visualization. - def process_data(self, im0): - """ - Process the model data for parking lot management. + Examples: + >>> from ultralytics.solutions import ParkingManagement + >>> parking_manager = ParkingManagement(model="yolo11n.pt", json_file="parking_regions.json") + >>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}") + >>> print(f"Available spaces: {parking_manager.pr_info['Available']}") + """ - Args: - im0 (ndarray): inference image + def __init__(self, **kwargs): + """Initializes the parking management system with a YOLO model and visualization settings.""" + super().__init__(**kwargs) + + self.json_file = self.CFG["json_file"] # Load JSON data + if self.json_file is None: + LOGGER.warning("โŒ json_file argument missing. Parking region details required.") + raise ValueError("โŒ Json file path can not be empty") + + with open(self.json_file) as f: + self.json = json.load(f) + + self.pr_info = {"Occupancy": 0, "Available": 0} # dictionary for parking information + + self.arc = (0, 0, 255) # available region color + self.occ = (0, 255, 0) # occupied region color + self.dc = (255, 0, 189) # centroid color for each box + + def process_data(self, im0): """ - results = self.model.track(im0, persist=True, show=False) # object tracking + Processes the model data for parking lot management. - es, fs = len(self.json_data), 0 # empty slots, filled slots - annotator = Annotator(im0) # init annotator + This function analyzes the input image, extracts tracks, and determines the occupancy status of parking + regions defined in the JSON file. It annotates the image with occupied and available parking spots, + and updates the parking information. - # extract tracks data - if results[0].boxes.id is None: - self.display_frames(im0) - return im0 + Args: + im0 (np.ndarray): The input inference image. - boxes = results[0].boxes.xyxy.cpu().tolist() - clss = results[0].boxes.cls.cpu().tolist() + Examples: + >>> parking_manager = ParkingManagement(json_file="parking_regions.json") + >>> image = cv2.imread("parking_lot.jpg") + >>> parking_manager.process_data(image) + """ + self.extract_tracks(im0) # extract tracks from im0 + es, fs = len(self.json), 0 # empty slots, filled slots + annotator = Annotator(im0, self.line_width) # init annotator - for region in self.json_data: + for region in self.json: # Convert points to a NumPy array with the correct dtype and reshape properly pts_array = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2)) rg_occupied = False # occupied region initialization - for box, cls in zip(boxes, clss): - xc = int((box[0] + box[2]) / 2) - yc = int((box[1] + box[3]) / 2) - annotator.display_objects_labels( - im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10 - ) + for box, cls in zip(self.boxes, self.clss): + xc, yc = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2) dist = cv2.pointPolygonTest(pts_array, (xc, yc), False) if dist >= 0: + # cv2.circle(im0, (xc, yc), radius=self.line_width * 4, color=self.dc, thickness=-1) + annotator.display_objects_labels( + im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10 + ) rg_occupied = True break - if rg_occupied: - fs += 1 - es -= 1 - + fs, es = (fs + 1, es - 1) if rg_occupied else (fs, es) # Plotting regions - color = self.occ if rg_occupied else self.arc - cv2.polylines(im0, [pts_array], isClosed=True, color=color, thickness=2) + cv2.polylines(im0, [pts_array], isClosed=True, color=self.occ if rg_occupied else self.arc, thickness=2) - self.pr_info["Occupancy"] = fs - self.pr_info["Available"] = es + self.pr_info["Occupancy"], self.pr_info["Available"] = fs, es annotator.display_analytics(im0, self.pr_info, (104, 31, 17), (255, 255, 255), 10) - - self.display_frames(im0) - return im0 - - def display_frames(self, im0): - """ - Display frame. - - Args: - im0 (ndarray): inference image - """ - if self.env_check: - cv2.imshow("Ultralytics Parking Manager", im0) - # Break Window - if cv2.waitKey(1) & 0xFF == ord("q"): - return + self.display_output(im0) # display output with base class function + return im0 # return output image for more usage diff --git a/ultralytics/solutions/queue_management.py b/ultralytics/solutions/queue_management.py index ef601503950..4fcf8fa7103 100644 --- a/ultralytics/solutions/queue_management.py +++ b/ultralytics/solutions/queue_management.py @@ -1,127 +1,112 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -from collections import defaultdict - -import cv2 - -from ultralytics.utils.checks import check_imshow, check_requirements +from ultralytics.solutions.solutions import BaseSolution from ultralytics.utils.plotting import Annotator, colors -check_requirements("shapely>=2.0.0") - -from shapely.geometry import Point, Polygon - - -class QueueManager: - """A class to manage the queue in a real-time video stream based on object tracks.""" - def __init__( - self, - names, - reg_pts=None, - line_thickness=2, - view_img=False, - draw_tracks=False, - ): +class QueueManager(BaseSolution): + """ + Manages queue counting in real-time video streams based on object tracks. + + This class extends BaseSolution to provide functionality for tracking and counting objects within a specified + region in video frames. + + Attributes: + counts (int): The current count of objects in the queue. + rect_color (Tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle. + region_length (int): The number of points defining the queue region. + annotator (Annotator): An instance of the Annotator class for drawing on frames. + track_line (List[Tuple[int, int]]): List of track line coordinates. + track_history (Dict[int, List[Tuple[int, int]]]): Dictionary storing tracking history for each object. + + Methods: + initialize_region: Initializes the queue region. + process_queue: Processes a single frame for queue management. + extract_tracks: Extracts object tracks from the current frame. + store_tracking_history: Stores the tracking history for an object. + display_output: Displays the processed output. + + Examples: + >>> cap = cv2.VideoCapture("Path/to/video/file.mp4") + >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300]) + >>> while cap.isOpened(): + >>> success, im0 = cap.read() + >>> if not success: + >>> break + >>> out = queue.process_queue(im0) + """ + + def __init__(self, **kwargs): + """Initializes the QueueManager with parameters for tracking and counting objects in a video stream.""" + super().__init__(**kwargs) + self.initialize_region() + self.counts = 0 # Queue counts Information + self.rect_color = (255, 255, 255) # Rectangle color + self.region_length = len(self.region) # Store region length for further usage + + def process_queue(self, im0): """ - Initializes the QueueManager with specified parameters for tracking and counting objects. + Processes the queue management for a single frame of video. Args: - names (dict): A dictionary mapping class IDs to class names. - reg_pts (list of tuples, optional): Points defining the counting region polygon. Defaults to a predefined - rectangle. - line_thickness (int, optional): Thickness of the annotation lines. Defaults to 2. - view_img (bool, optional): Whether to display the image frames. Defaults to False. - draw_tracks (bool, optional): Whether to draw tracks of the objects. Defaults to False. + im0 (numpy.ndarray): Input image for processing, typically a frame from a video stream. + + Returns: + (numpy.ndarray): Processed image with annotations, bounding boxes, and queue counts. + + This method performs the following steps: + 1. Resets the queue count for the current frame. + 2. Initializes an Annotator object for drawing on the image. + 3. Extracts tracks from the image. + 4. Draws the counting region on the image. + 5. For each detected object: + - Draws bounding boxes and labels. + - Stores tracking history. + - Draws centroids and tracks. + - Checks if the object is inside the counting region and updates the count. + 6. Displays the queue count on the image. + 7. Displays the processed output. + + Examples: + >>> queue_manager = QueueManager() + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = queue_manager.process_queue(frame) """ - # Region & Line Information - self.reg_pts = reg_pts if reg_pts is not None else [(20, 60), (20, 680), (1120, 680), (1120, 60)] - self.counting_region = ( - Polygon(self.reg_pts) if len(self.reg_pts) >= 3 else Polygon([(20, 60), (20, 680), (1120, 680), (1120, 60)]) - ) - - # annotation Information - self.tf = line_thickness - self.view_img = view_img - - self.names = names # Class names - - # Object counting Information - self.counts = 0 - - # Tracks info - self.track_history = defaultdict(list) - self.draw_tracks = draw_tracks + self.counts = 0 # Reset counts every frame + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks - # Check if environment supports imshow - self.env_check = check_imshow(warn=True) + self.annotator.draw_region( + reg_pts=self.region, color=self.rect_color, thickness=self.line_width * 2 + ) # Draw region - def extract_and_process_tracks(self, tracks, im0): - """Extracts and processes tracks for queue management in a video stream.""" - # Initialize annotator and draw the queue region - annotator = Annotator(im0, self.tf, self.names) - self.counts = 0 # Reset counts every frame - if tracks[0].boxes.id is not None: - boxes = tracks[0].boxes.xyxy.cpu() - clss = tracks[0].boxes.cls.cpu().tolist() - track_ids = tracks[0].boxes.id.int().cpu().tolist() - - # Extract tracks - for box, track_id, cls in zip(boxes, track_ids, clss): - # Draw bounding box - annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True)) - - # Update track history - track_line = self.track_history[track_id] - track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) - if len(track_line) > 30: - track_line.pop(0) - - # Draw track trails if enabled - if self.draw_tracks: - annotator.draw_centroid_and_tracks( - track_line, - color=colors(int(track_id), True), - track_thickness=self.line_thickness, - ) - - prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None - - # Check if the object is inside the counting region - if len(self.reg_pts) >= 3: - is_inside = self.counting_region.contains(Point(track_line[-1])) - if prev_position is not None and is_inside: - self.counts += 1 + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + # Draw bounding box and counting region + self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True)) + self.store_tracking_history(track_id, box) # Store track history - # Display queue counts - label = f"Queue Counts : {str(self.counts)}" - if label is not None: - annotator.queue_counts_display( - label, - points=self.reg_pts, - region_color=(255, 0, 255), - txt_color=(104, 31, 17), + # Draw tracks of objects + self.annotator.draw_centroid_and_tracks( + self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width ) - if self.env_check and self.view_img: - annotator.draw_region(reg_pts=self.reg_pts, thickness=self.tf * 2, color=(255, 0, 255)) - cv2.imshow("Ultralytics YOLOv8 Queue Manager", im0) - # Close window on 'q' key press - if cv2.waitKey(1) & 0xFF == ord("q"): - return + # Cache frequently accessed attributes + track_history = self.track_history.get(track_id, []) - def process_queue(self, im0, tracks): - """ - Main function to start the queue management process. - - Args: - im0 (ndarray): Current frame from the video stream. - tracks (list): List of tracks obtained from the object tracking process. - """ - self.extract_and_process_tracks(tracks, im0) # Extract and process tracks - return im0 + # store previous position of track and check if the object is inside the counting region + prev_position = None + if len(track_history) > 1: + prev_position = track_history[-2] + if self.region_length >= 3 and prev_position and self.r_s.contains(self.Point(self.track_line[-1])): + self.counts += 1 + # Display queue counts + self.annotator.queue_counts_display( + f"Queue Counts : {str(self.counts)}", + points=self.region, + region_color=self.rect_color, + txt_color=(104, 31, 17), + ) + self.display_output(im0) # display output with base class function -if __name__ == "__main__": - classes_names = {0: "person", 1: "car"} # example class names - queue_manager = QueueManager(classes_names) + return im0 # return output image for more usage diff --git a/ultralytics/solutions/region_counter.py b/ultralytics/solutions/region_counter.py new file mode 100644 index 00000000000..5a2953f3c61 --- /dev/null +++ b/ultralytics/solutions/region_counter.py @@ -0,0 +1,116 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +from ultralytics.solutions.solutions import BaseSolution +from ultralytics.utils import LOGGER +from ultralytics.utils.plotting import Annotator, colors + + +class RegionCounter(BaseSolution): + """ + A class designed for real-time counting of objects within user-defined regions in a video stream. + + This class inherits from `BaseSolution` and offers functionalities to define polygonal regions in a video + frame, track objects, and count those objects that pass through each defined region. This makes it useful + for applications that require counting in specified areas, such as monitoring zones or segmented sections. + + Attributes: + region_template (dict): A template for creating new counting regions with default attributes including + the name, polygon coordinates, and display colors. + counting_regions (list): A list storing all defined regions, where each entry is based on `region_template` + and includes specific region settings like name, coordinates, and color. + + Methods: + add_region: Adds a new counting region with specified attributes, such as the region's name, polygon points, + region color, and text color. + count: Processes video frames to count objects in each region, drawing regions and displaying counts + on the frame. Handles object detection, region definition, and containment checks. + """ + + def __init__(self, **kwargs): + """Initializes the RegionCounter class for real-time counting in different regions of the video streams.""" + super().__init__(**kwargs) + self.region_template = { + "name": "Default Region", + "polygon": None, + "counts": 0, + "dragging": False, + "region_color": (255, 255, 255), + "text_color": (0, 0, 0), + } + self.counting_regions = [] + + def add_region(self, name, polygon_points, region_color, text_color): + """ + Adds a new region to the counting list based on the provided template with specific attributes. + + Args: + name (str): Name assigned to the new region. + polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon. + region_color (tuple): BGR color for region visualization. + text_color (tuple): BGR color for the text within the region. + """ + region = self.region_template.copy() + region.update( + { + "name": name, + "polygon": self.Polygon(polygon_points), + "region_color": region_color, + "text_color": text_color, + } + ) + self.counting_regions.append(region) + + def count(self, im0): + """ + Processes the input frame to detect and count objects within each defined region. + + Args: + im0 (numpy.ndarray): Input image frame where objects and regions are annotated. + + Returns: + im0 (numpy.ndarray): Processed image frame with annotated counting information. + """ + self.annotator = Annotator(im0, line_width=self.line_width) + self.extract_tracks(im0) + + # Region initialization and conversion + if self.region is None: + self.initialize_region() + regions = {"Region#01": self.region} + else: + regions = self.region if isinstance(self.region, dict) else {"Region#01": self.region} + + # Draw regions and process counts for each defined area + for idx, (region_name, reg_pts) in enumerate(regions.items(), start=1): + if not isinstance(reg_pts, list) or not all(isinstance(pt, tuple) for pt in reg_pts): + LOGGER.warning(f"Invalid region points for {region_name}: {reg_pts}") + continue # Skip invalid entries + color = colors(idx, True) + self.annotator.draw_region(reg_pts=reg_pts, color=color, thickness=self.line_width * 2) + self.add_region(region_name, reg_pts, color, self.annotator.get_txt_color()) + + # Prepare regions for containment check + for region in self.counting_regions: + region["prepared_polygon"] = self.prep(region["polygon"]) + + # Process bounding boxes and count objects within each region + for box, cls in zip(self.boxes, self.clss): + self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True)) + bbox_center = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2) + + for region in self.counting_regions: + if region["prepared_polygon"].contains(self.Point(bbox_center)): + region["counts"] += 1 + + # Display counts in each region + for region in self.counting_regions: + self.annotator.text_label( + region["polygon"].bounds, + label=str(region["counts"]), + color=region["region_color"], + txt_color=region["text_color"], + ) + region["counts"] = 0 # Reset count for next frame + + self.display_output(im0) + return im0 diff --git a/ultralytics/solutions/security_alarm.py b/ultralytics/solutions/security_alarm.py new file mode 100644 index 00000000000..e07119bc5bd --- /dev/null +++ b/ultralytics/solutions/security_alarm.py @@ -0,0 +1,144 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +from ultralytics.solutions.solutions import BaseSolution +from ultralytics.utils import LOGGER +from ultralytics.utils.plotting import Annotator, colors + + +class SecurityAlarm(BaseSolution): + """ + A class to manage security alarm functionalities for real-time monitoring. + + This class extends the BaseSolution class and provides features to monitor + objects in a frame, send email notifications when specific thresholds are + exceeded for total detections, and annotate the output frame for visualization. + + Attributes: + email_sent (bool): Flag to track if an email has already been sent for the current event. + records (int): Threshold for the number of detected objects to trigger an alert. + + Methods: + authenticate: Sets up email server authentication for sending alerts. + send_email: Sends an email notification with details and an image attachment. + monitor: Monitors the frame, processes detections, and triggers alerts if thresholds are crossed. + + Examples: + >>> security = SecurityAlarm() + >>> security.authenticate("abc@gmail.com", "1111222233334444", "xyz@gmail.com") + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = security.monitor(frame) + """ + + def __init__(self, **kwargs): + """Initializes the SecurityAlarm class with parameters for real-time object monitoring.""" + super().__init__(**kwargs) + self.email_sent = False + self.records = self.CFG["records"] + self.server = None + self.to_email = "" + self.from_email = "" + + def authenticate(self, from_email, password, to_email): + """ + Authenticates the email server for sending alert notifications. + + Args: + from_email (str): Sender's email address. + password (str): Password for the sender's email account. + to_email (str): Recipient's email address. + + This method initializes a secure connection with the SMTP server + and logs in using the provided credentials. + + Examples: + >>> alarm = SecurityAlarm() + >>> alarm.authenticate("sender@example.com", "password123", "recipient@example.com") + """ + import smtplib + + self.server = smtplib.SMTP("smtp.gmail.com: 587") + self.server.starttls() + self.server.login(from_email, password) + self.to_email = to_email + self.from_email = from_email + + def send_email(self, im0, records=5): + """ + Sends an email notification with an image attachment indicating the number of objects detected. + + Args: + im0 (numpy.ndarray): The input image or frame to be attached to the email. + records (int): The number of detected objects to be included in the email message. + + This method encodes the input image, composes the email message with + details about the detection, and sends it to the specified recipient. + + Examples: + >>> alarm = SecurityAlarm() + >>> frame = cv2.imread("path/to/image.jpg") + >>> alarm.send_email(frame, records=10) + """ + from email.mime.image import MIMEImage + from email.mime.multipart import MIMEMultipart + from email.mime.text import MIMEText + + import cv2 + + img_bytes = cv2.imencode(".jpg", im0)[1].tobytes() # Encode the image as JPEG + + # Create the email + message = MIMEMultipart() + message["From"] = self.from_email + message["To"] = self.to_email + message["Subject"] = "Security Alert" + + # Add the text message body + message_body = f"Ultralytics ALERT!!! {records} objects have been detected!!" + message.attach(MIMEText(message_body)) + + # Attach the image + image_attachment = MIMEImage(img_bytes, name="ultralytics.jpg") + message.attach(image_attachment) + + # Send the email + try: + self.server.send_message(message) + LOGGER.info("โœ… Email sent successfully!") + except Exception as e: + print(f"โŒ Failed to send email: {e}") + + def monitor(self, im0): + """ + Monitors the frame, processes object detections, and triggers alerts if thresholds are exceeded. + + Args: + im0 (numpy.ndarray): The input image or frame to be processed and annotated. + + This method processes the input frame, extracts detections, annotates the frame + with bounding boxes, and sends an email notification if the number of detected objects + surpasses the specified threshold and an alert has not already been sent. + + Returns: + (numpy.ndarray): The processed frame with annotations. + + Examples: + >>> alarm = SecurityAlarm() + >>> frame = cv2.imread("path/to/image.jpg") + >>> processed_frame = alarm.monitor(frame) + """ + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks + + # Iterate over bounding boxes, track ids and classes index + for box, cls in zip(self.boxes, self.clss): + # Draw bounding box + self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True)) + + total_det = len(self.clss) + if total_det > self.records and not self.email_sent: # Only send email If not sent before + self.send_email(im0, total_det) + self.email_sent = True + + self.display_output(im0) # display output with base class function + + return im0 # return output image for more usage diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py new file mode 100644 index 00000000000..3bd59dc9e3c --- /dev/null +++ b/ultralytics/solutions/solutions.py @@ -0,0 +1,178 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +from collections import defaultdict + +import cv2 + +from ultralytics import YOLO +from ultralytics.utils import ASSETS_URL, DEFAULT_CFG_DICT, DEFAULT_SOL_DICT, LOGGER +from ultralytics.utils.checks import check_imshow, check_requirements + + +class BaseSolution: + """ + A base class for managing Ultralytics Solutions. + + This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking, + and region initialization. + + Attributes: + LineString (shapely.geometry.LineString): Class for creating line string geometries. + Polygon (shapely.geometry.Polygon): Class for creating polygon geometries. + Point (shapely.geometry.Point): Class for creating point geometries. + CFG (Dict): Configuration dictionary loaded from a YAML file and updated with kwargs. + region (List[Tuple[int, int]]): List of coordinate tuples defining a region of interest. + line_width (int): Width of lines used in visualizations. + model (ultralytics.YOLO): Loaded YOLO model instance. + names (Dict[int, str]): Dictionary mapping class indices to class names. + env_check (bool): Flag indicating whether the environment supports image display. + track_history (collections.defaultdict): Dictionary to store tracking history for each object. + + Methods: + extract_tracks: Apply object tracking and extract tracks from an input image. + store_tracking_history: Store object tracking history for a given track ID and bounding box. + initialize_region: Initialize the counting region and line segment based on configuration. + display_output: Display the results of processing, including showing frames or saving results. + + Examples: + >>> solution = BaseSolution(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)]) + >>> solution.initialize_region() + >>> image = cv2.imread("image.jpg") + >>> solution.extract_tracks(image) + >>> solution.display_output(image) + """ + + def __init__(self, IS_CLI=False, **kwargs): + """ + Initializes the `BaseSolution` class with configuration settings and the YOLO model for Ultralytics solutions. + + IS_CLI (optional): Enables CLI mode if set. + """ + check_requirements("shapely>=2.0.0") + from shapely.geometry import LineString, Point, Polygon + from shapely.prepared import prep + + self.LineString = LineString + self.Polygon = Polygon + self.Point = Point + self.prep = prep + self.annotator = None # Initialize annotator + self.tracks = None + self.track_data = None + self.boxes = [] + self.clss = [] + self.track_ids = [] + self.track_line = None + self.r_s = None + + # Load config and update with args + DEFAULT_SOL_DICT.update(kwargs) + DEFAULT_CFG_DICT.update(kwargs) + self.CFG = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT} + LOGGER.info(f"Ultralytics Solutions: โœ… {DEFAULT_SOL_DICT}") + + self.region = self.CFG["region"] # Store region data for other classes usage + self.line_width = ( + self.CFG["line_width"] if self.CFG["line_width"] is not None else 2 + ) # Store line_width for usage + + # Load Model and store classes names + if self.CFG["model"] is None: + self.CFG["model"] = "yolo11n.pt" + self.model = YOLO(self.CFG["model"]) + self.names = self.model.names + + self.track_add_args = { # Tracker additional arguments for advance configuration + k: self.CFG[k] for k in ["verbose", "iou", "conf", "device", "max_det", "half", "tracker"] + } + + if IS_CLI and self.CFG["source"] is None: + d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4" + LOGGER.warning(f"โš ๏ธ WARNING: source not provided. using default source {ASSETS_URL}/{d_s}") + from ultralytics.utils.downloads import safe_download + + safe_download(f"{ASSETS_URL}/{d_s}") # download source from ultralytics assets + self.CFG["source"] = d_s # set default source + + # Initialize environment and region setup + self.env_check = check_imshow(warn=True) + self.track_history = defaultdict(list) + + def extract_tracks(self, im0): + """ + Applies object tracking and extracts tracks from an input image or frame. + + Args: + im0 (ndarray): The input image or frame. + + Examples: + >>> solution = BaseSolution() + >>> frame = cv2.imread("path/to/image.jpg") + >>> solution.extract_tracks(frame) + """ + self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args) + + # Extract tracks for OBB or object detection + self.track_data = self.tracks[0].obb or self.tracks[0].boxes + + if self.track_data and self.track_data.id is not None: + self.boxes = self.track_data.xyxy.cpu() + self.clss = self.track_data.cls.cpu().tolist() + self.track_ids = self.track_data.id.int().cpu().tolist() + else: + LOGGER.warning("WARNING โš ๏ธ no tracks found!") + self.boxes, self.clss, self.track_ids = [], [], [] + + def store_tracking_history(self, track_id, box): + """ + Stores the tracking history of an object. + + This method updates the tracking history for a given object by appending the center point of its + bounding box to the track line. It maintains a maximum of 30 points in the tracking history. + + Args: + track_id (int): The unique identifier for the tracked object. + box (List[float]): The bounding box coordinates of the object in the format [x1, y1, x2, y2]. + + Examples: + >>> solution = BaseSolution() + >>> solution.store_tracking_history(1, [100, 200, 300, 400]) + """ + # Store tracking history + self.track_line = self.track_history[track_id] + self.track_line.append(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)) + if len(self.track_line) > 30: + self.track_line.pop(0) + + def initialize_region(self): + """Initialize the counting region and line segment based on configuration settings.""" + if self.region is None: + self.region = [(20, 400), (1080, 400), (1080, 360), (20, 360)] + self.r_s = ( + self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region) + ) # region or line + + def display_output(self, im0): + """ + Display the results of the processing, which could involve showing frames, printing counts, or saving results. + + This method is responsible for visualizing the output of the object detection and tracking process. It displays + the processed frame with annotations, and allows for user interaction to close the display. + + Args: + im0 (numpy.ndarray): The input image or frame that has been processed and annotated. + + Examples: + >>> solution = BaseSolution() + >>> frame = cv2.imread("path/to/image.jpg") + >>> solution.display_output(frame) + + Notes: + - This method will only display output if the 'show' configuration is set to True and the environment + supports image display. + - The display can be closed by pressing the 'q' key. + """ + if self.CFG.get("show") and self.env_check: + cv2.imshow("Ultralytics Solutions", im0) + if cv2.waitKey(1) & 0xFF == ord("q"): + return diff --git a/ultralytics/solutions/speed_estimation.py b/ultralytics/solutions/speed_estimation.py index 70964241fd4..43eaceceb27 100644 --- a/ultralytics/solutions/speed_estimation.py +++ b/ultralytics/solutions/speed_estimation.py @@ -1,116 +1,110 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -from collections import defaultdict from time import time -import cv2 import numpy as np -from ultralytics.utils.checks import check_imshow +from ultralytics.solutions.solutions import BaseSolution from ultralytics.utils.plotting import Annotator, colors -class SpeedEstimator: - """A class to estimate the speed of objects in a real-time video stream based on their tracks.""" +class SpeedEstimator(BaseSolution): + """ + A class to estimate the speed of objects in a real-time video stream based on their tracks. - def __init__(self, names, reg_pts=None, view_img=False, line_thickness=2, spdl_dist_thresh=10): - """ - Initializes the SpeedEstimator with the given parameters. + This class extends the BaseSolution class and provides functionality for estimating object speeds using + tracking data in video streams. - Args: - names (dict): Dictionary of class names. - reg_pts (list, optional): List of region points for speed estimation. Defaults to [(20, 400), (1260, 400)]. - view_img (bool, optional): Whether to display the image with annotations. Defaults to False. - line_thickness (int, optional): Thickness of the lines for drawing boxes and tracks. Defaults to 2. - spdl_dist_thresh (int, optional): Distance threshold for speed calculation. Defaults to 10. - """ - # Region information - self.reg_pts = reg_pts if reg_pts is not None else [(20, 400), (1260, 400)] + Attributes: + spd (Dict[int, float]): Dictionary storing speed data for tracked objects. + trkd_ids (List[int]): List of tracked object IDs that have already been speed-estimated. + trk_pt (Dict[int, float]): Dictionary storing previous timestamps for tracked objects. + trk_pp (Dict[int, Tuple[float, float]]): Dictionary storing previous positions for tracked objects. + annotator (Annotator): Annotator object for drawing on images. + region (List[Tuple[int, int]]): List of points defining the speed estimation region. + track_line (List[Tuple[float, float]]): List of points representing the object's track. + r_s (LineString): LineString object representing the speed estimation region. + + Methods: + initialize_region: Initializes the speed estimation region. + estimate_speed: Estimates the speed of objects based on tracking data. + store_tracking_history: Stores the tracking history for an object. + extract_tracks: Extracts tracks from the current frame. + display_output: Displays the output with annotations. - self.names = names # Classes names + Examples: + >>> estimator = SpeedEstimator() + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = estimator.estimate_speed(frame) + >>> cv2.imshow("Speed Estimation", processed_frame) + """ - # Tracking information - self.trk_history = defaultdict(list) + def __init__(self, **kwargs): + """Initializes the SpeedEstimator object with speed estimation parameters and data structures.""" + super().__init__(**kwargs) + + self.initialize_region() # Initialize speed region - self.view_img = view_img # bool for displaying inference - self.tf = line_thickness # line thickness for annotator self.spd = {} # set for speed data self.trkd_ids = [] # list for already speed_estimated and tracked ID's - self.spdl = spdl_dist_thresh # Speed line distance threshold self.trk_pt = {} # set for tracks previous time self.trk_pp = {} # set for tracks previous point - # Check if the environment supports imshow - self.env_check = check_imshow(warn=True) - - def estimate_speed(self, im0, tracks): + def estimate_speed(self, im0): """ Estimates the speed of objects based on tracking data. Args: - im0 (ndarray): Image. - tracks (list): List of tracks obtained from the object tracking process. + im0 (np.ndarray): Input image for processing. Shape is typically (H, W, C) for RGB images. Returns: - (ndarray): The image with annotated boxes and tracks. - """ - if tracks[0].boxes.id is None: - return im0 + (np.ndarray): Processed image with speed estimations and annotations. - boxes = tracks[0].boxes.xyxy.cpu() - clss = tracks[0].boxes.cls.cpu().tolist() - t_ids = tracks[0].boxes.id.int().cpu().tolist() - annotator = Annotator(im0, line_width=self.tf) - annotator.draw_region(reg_pts=self.reg_pts, color=(255, 0, 255), thickness=self.tf * 2) - - for box, t_id, cls in zip(boxes, t_ids, clss): - track = self.trk_history[t_id] - bbox_center = (float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2)) - track.append(bbox_center) + Examples: + >>> estimator = SpeedEstimator() + >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) + >>> processed_image = estimator.estimate_speed(image) + """ + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + self.extract_tracks(im0) # Extract tracks - if len(track) > 30: - track.pop(0) + self.annotator.draw_region( + reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2 + ) # Draw region - trk_pts = np.hstack(track).astype(np.int32).reshape((-1, 1, 2)) + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + self.store_tracking_history(track_id, box) # Store track history - if t_id not in self.trk_pt: - self.trk_pt[t_id] = 0 + # Check if track_id is already in self.trk_pp or trk_pt initialize if not + if track_id not in self.trk_pt: + self.trk_pt[track_id] = 0 + if track_id not in self.trk_pp: + self.trk_pp[track_id] = self.track_line[-1] - speed_label = f"{int(self.spd[t_id])} km/h" if t_id in self.spd else self.names[int(cls)] - bbox_color = colors(int(t_id), True) + speed_label = f"{int(self.spd[track_id])} km/h" if track_id in self.spd else self.names[int(cls)] + self.annotator.box_label(box, label=speed_label, color=colors(track_id, True)) # Draw bounding box - annotator.box_label(box, speed_label, bbox_color) - cv2.polylines(im0, [trk_pts], isClosed=False, color=bbox_color, thickness=self.tf) - cv2.circle(im0, (int(track[-1][0]), int(track[-1][1])), self.tf * 2, bbox_color, -1) + # Draw tracks of objects + self.annotator.draw_centroid_and_tracks( + self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width + ) - # Calculation of object speed - if not self.reg_pts[0][0] < track[-1][0] < self.reg_pts[1][0]: - return - if self.reg_pts[1][1] - self.spdl < track[-1][1] < self.reg_pts[1][1] + self.spdl: - direction = "known" - elif self.reg_pts[0][1] - self.spdl < track[-1][1] < self.reg_pts[0][1] + self.spdl: + # Calculate object speed and direction based on region intersection + if self.LineString([self.trk_pp[track_id], self.track_line[-1]]).intersects(self.r_s): direction = "known" else: direction = "unknown" - if self.trk_pt.get(t_id) != 0 and direction != "unknown" and t_id not in self.trkd_ids: - self.trkd_ids.append(t_id) - - time_difference = time() - self.trk_pt[t_id] + # Perform speed calculation and tracking updates if direction is valid + if direction == "known" and track_id not in self.trkd_ids: + self.trkd_ids.append(track_id) + time_difference = time() - self.trk_pt[track_id] if time_difference > 0: - self.spd[t_id] = np.abs(track[-1][1] - self.trk_pp[t_id][1]) / time_difference - - self.trk_pt[t_id] = time() - self.trk_pp[t_id] = track[-1] - - if self.view_img and self.env_check: - cv2.imshow("Ultralytics Speed Estimation", im0) - if cv2.waitKey(1) & 0xFF == ord("q"): - return + self.spd[track_id] = np.abs(self.track_line[-1][1] - self.trk_pp[track_id][1]) / time_difference - return im0 + self.trk_pt[track_id] = time() + self.trk_pp[track_id] = self.track_line[-1] + self.display_output(im0) # display output with base class function -if __name__ == "__main__": - names = {0: "person", 1: "car"} # example class names - speed_estimator = SpeedEstimator(names) + return im0 # return output image for more usage diff --git a/ultralytics/solutions/streamlit_inference.py b/ultralytics/solutions/streamlit_inference.py index 85394350dae..50cc2584095 100644 --- a/ultralytics/solutions/streamlit_inference.py +++ b/ultralytics/solutions/streamlit_inference.py @@ -1,149 +1,190 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import io -import time +from typing import Any import cv2 -import torch +from ultralytics import YOLO +from ultralytics.utils import LOGGER from ultralytics.utils.checks import check_requirements from ultralytics.utils.downloads import GITHUB_ASSETS_STEMS -def inference(model=None): - """Runs real-time object detection on video input using Ultralytics YOLOv8 in a Streamlit application.""" - check_requirements("streamlit>=1.29.0") # scope imports for faster ultralytics package load speeds - import streamlit as st - - from ultralytics import YOLO - - # Hide main menu style - menu_style_cfg = """""" - - # Main title of streamlit application - main_title_cfg = """

- Ultralytics YOLOv8 Streamlit Application -

""" - - # Subtitle of streamlit application - sub_title_cfg = """

- Experience real-time object detection on your webcam with the power of Ultralytics YOLOv8! ๐Ÿš€

-
""" - - # Set html page configuration - st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide", initial_sidebar_state="auto") - - # Append the custom HTML - st.markdown(menu_style_cfg, unsafe_allow_html=True) - st.markdown(main_title_cfg, unsafe_allow_html=True) - st.markdown(sub_title_cfg, unsafe_allow_html=True) - - # Add ultralytics logo in sidebar - with st.sidebar: - logo = "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg" - st.image(logo, width=250) +class Inference: + """ + A class to perform object detection, image classification, image segmentation and pose estimation inference using + Streamlit and Ultralytics YOLO models. It provides the functionalities such as loading models, configuring settings, + uploading video files, and performing real-time inference. + + Attributes: + st (module): Streamlit module for UI creation. + temp_dict (dict): Temporary dictionary to store the model path. + model_path (str): Path to the loaded model. + model (YOLO): The YOLO model instance. + source (str): Selected video source. + enable_trk (str): Enable tracking option. + conf (float): Confidence threshold. + iou (float): IoU threshold for non-max suppression. + vid_file_name (str): Name of the uploaded video file. + selected_ind (list): List of selected class indices. + + Methods: + web_ui: Sets up the Streamlit web interface with custom HTML elements. + sidebar: Configures the Streamlit sidebar for model and inference settings. + source_upload: Handles video file uploads through the Streamlit interface. + configure: Configures the model and loads selected classes for inference. + inference: Performs real-time object detection inference. + + Examples: + >>> inf = solutions.Inference(model="path/to/model.pt") # Model is not necessary argument. + >>> inf.inference() + """ + + def __init__(self, **kwargs: Any): + """ + Initializes the Inference class, checking Streamlit requirements and setting up the model path. + + Args: + **kwargs (Any): Additional keyword arguments for model configuration. + """ + check_requirements("streamlit>=1.29.0") # scope imports for faster ultralytics package load speeds + import streamlit as st + + self.st = st # Reference to the Streamlit class instance + self.source = None # Placeholder for video or webcam source details + self.enable_trk = False # Flag to toggle object tracking + self.conf = 0.25 # Confidence threshold for detection + self.iou = 0.45 # Intersection-over-Union (IoU) threshold for non-maximum suppression + self.org_frame = None # Container for the original frame to be displayed + self.ann_frame = None # Container for the annotated frame to be displayed + self.vid_file_name = None # Holds the name of the video file + self.selected_ind = [] # List of selected classes for detection or tracking + self.model = None # Container for the loaded model instance + + self.temp_dict = {"model": None, **kwargs} + self.model_path = None # Store model file name with path + if self.temp_dict["model"] is not None: + self.model_path = self.temp_dict["model"] + + LOGGER.info(f"Ultralytics Solutions: โœ… {self.temp_dict}") + + def web_ui(self): + """Sets up the Streamlit web interface with custom HTML elements.""" + menu_style_cfg = """""" # Hide main menu style + + # Main title of streamlit application + main_title_cfg = """

Ultralytics YOLO Streamlit Application

""" + + # Subtitle of streamlit application + sub_title_cfg = """

Experience real-time object detection on your webcam with the power + of Ultralytics YOLO! ๐Ÿš€

""" + + # Set html page configuration and append custom HTML + self.st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide") + self.st.markdown(menu_style_cfg, unsafe_allow_html=True) + self.st.markdown(main_title_cfg, unsafe_allow_html=True) + self.st.markdown(sub_title_cfg, unsafe_allow_html=True) + + def sidebar(self): + """Configures the Streamlit sidebar for model and inference settings.""" + with self.st.sidebar: # Add Ultralytics LOGO + logo = "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg" + self.st.image(logo, width=250) + + self.st.sidebar.title("User Configuration") # Add elements to vertical setting menu + self.source = self.st.sidebar.selectbox( + "Video", + ("webcam", "video"), + ) # Add source selection dropdown + self.enable_trk = self.st.sidebar.radio("Enable Tracking", ("Yes", "No")) # Enable object tracking + self.conf = float( + self.st.sidebar.slider("Confidence Threshold", 0.0, 1.0, self.conf, 0.01) + ) # Slider for confidence + self.iou = float(self.st.sidebar.slider("IoU Threshold", 0.0, 1.0, self.iou, 0.01)) # Slider for NMS threshold + + col1, col2 = self.st.columns(2) + self.org_frame = col1.empty() + self.ann_frame = col2.empty() + + def source_upload(self): + """Handles video file uploads through the Streamlit interface.""" + self.vid_file_name = "" + if self.source == "video": + vid_file = self.st.sidebar.file_uploader("Upload Video File", type=["mp4", "mov", "avi", "mkv"]) + if vid_file is not None: + g = io.BytesIO(vid_file.read()) # BytesIO Object + with open("ultralytics.mp4", "wb") as out: # Open temporary file as bytes + out.write(g.read()) # Read bytes into file + self.vid_file_name = "ultralytics.mp4" + elif self.source == "webcam": + self.vid_file_name = 0 + + def configure(self): + """Configures the model and loads selected classes for inference.""" + # Add dropdown menu for model selection + available_models = [x.replace("yolo", "YOLO") for x in GITHUB_ASSETS_STEMS if x.startswith("yolo11")] + if self.model_path: # If user provided the custom model, insert model without suffix as *.pt is added later + available_models.insert(0, self.model_path.split(".pt")[0]) + selected_model = self.st.sidebar.selectbox("Model", available_models) + + with self.st.spinner("Model is downloading..."): + self.model = YOLO(f"{selected_model.lower()}.pt") # Load the YOLO model + class_names = list(self.model.names.values()) # Convert dictionary to list of class names + self.st.success("Model loaded successfully!") + + # Multiselect box with class names and get indices of selected classes + selected_classes = self.st.sidebar.multiselect("Classes", class_names, default=class_names[:3]) + self.selected_ind = [class_names.index(option) for option in selected_classes] + + if not isinstance(self.selected_ind, list): # Ensure selected_options is a list + self.selected_ind = list(self.selected_ind) + + def inference(self): + """Performs real-time object detection inference.""" + self.web_ui() # Initialize the web interface + self.sidebar() # Create the sidebar + self.source_upload() # Upload the video source + self.configure() # Configure the app + + if self.st.sidebar.button("Start"): + stop_button = self.st.button("Stop") # Button to stop the inference + cap = cv2.VideoCapture(self.vid_file_name) # Capture the video + if not cap.isOpened(): + self.st.error("Could not open webcam.") + while cap.isOpened(): + success, frame = cap.read() + if not success: + self.st.warning("Failed to read frame from webcam. Please verify the webcam is connected properly.") + break + + # Store model predictions + if self.enable_trk == "Yes": + results = self.model.track( + frame, conf=self.conf, iou=self.iou, classes=self.selected_ind, persist=True + ) + else: + results = self.model(frame, conf=self.conf, iou=self.iou, classes=self.selected_ind) + annotated_frame = results[0].plot() # Add annotations on frame + + if stop_button: + cap.release() # Release the capture + self.st.stop() # Stop streamlit app + + self.org_frame.image(frame, channels="BGR") # Display original frame + self.ann_frame.image(annotated_frame, channels="BGR") # Display processed frame + + cap.release() # Release the capture + cv2.destroyAllWindows() # Destroy window - # Add elements to vertical setting menu - st.sidebar.title("User Configuration") - # Add video source selection dropdown - source = st.sidebar.selectbox( - "Video", - ("webcam", "video"), - ) - - vid_file_name = "" - if source == "video": - vid_file = st.sidebar.file_uploader("Upload Video File", type=["mp4", "mov", "avi", "mkv"]) - if vid_file is not None: - g = io.BytesIO(vid_file.read()) # BytesIO Object - vid_location = "ultralytics.mp4" - with open(vid_location, "wb") as out: # Open temporary file as bytes - out.write(g.read()) # Read bytes into file - vid_file_name = "ultralytics.mp4" - elif source == "webcam": - vid_file_name = 0 - - # Add dropdown menu for model selection - available_models = [x.replace("yolo", "YOLO") for x in GITHUB_ASSETS_STEMS if x.startswith("yolov8")] - if model: - available_models.insert(0, model.split(".pt")[0]) # insert model without suffix as *.pt is added later - - selected_model = st.sidebar.selectbox("Model", available_models) - with st.spinner("Model is downloading..."): - model = YOLO(f"{selected_model.lower()}.pt") # Load the YOLO model - class_names = list(model.names.values()) # Convert dictionary to list of class names - st.success("Model loaded successfully!") - - # Multiselect box with class names and get indices of selected classes - selected_classes = st.sidebar.multiselect("Classes", class_names, default=class_names[:3]) - selected_ind = [class_names.index(option) for option in selected_classes] - - if not isinstance(selected_ind, list): # Ensure selected_options is a list - selected_ind = list(selected_ind) - - enable_trk = st.sidebar.radio("Enable Tracking", ("Yes", "No")) - conf = float(st.sidebar.slider("Confidence Threshold", 0.0, 1.0, 0.25, 0.01)) - iou = float(st.sidebar.slider("IoU Threshold", 0.0, 1.0, 0.45, 0.01)) - - col1, col2 = st.columns(2) - org_frame = col1.empty() - ann_frame = col2.empty() - - fps_display = st.sidebar.empty() # Placeholder for FPS display - - if st.sidebar.button("Start"): - videocapture = cv2.VideoCapture(vid_file_name) # Capture the video - - if not videocapture.isOpened(): - st.error("Could not open webcam.") - - stop_button = st.button("Stop") # Button to stop the inference - - while videocapture.isOpened(): - success, frame = videocapture.read() - if not success: - st.warning("Failed to read frame from webcam. Please make sure the webcam is connected properly.") - break - - prev_time = time.time() - - # Store model predictions - if enable_trk == "Yes": - results = model.track(frame, conf=conf, iou=iou, classes=selected_ind, persist=True) - else: - results = model(frame, conf=conf, iou=iou, classes=selected_ind) - annotated_frame = results[0].plot() # Add annotations on frame - - # Calculate model FPS - curr_time = time.time() - fps = 1 / (curr_time - prev_time) - prev_time = curr_time - - # display frame - org_frame.image(frame, channels="BGR") - ann_frame.image(annotated_frame, channels="BGR") - - if stop_button: - videocapture.release() # Release the capture - torch.cuda.empty_cache() # Clear CUDA memory - st.stop() # Stop streamlit app - - # Display FPS in sidebar - fps_display.metric("FPS", f"{fps:.2f}") - - # Release the capture - videocapture.release() - - # Clear CUDA memory - torch.cuda.empty_cache() - - # Destroy window - cv2.destroyAllWindows() - - -# Main function call if __name__ == "__main__": - inference() + import sys # Import the sys module for accessing command-line arguments + + # Check if a model name is provided as a command-line argument + args = len(sys.argv) + model = sys.argv[1] if args > 1 else None # assign first argument as the model name + # Create an instance of the Inference class and run inference + Inference(model=model).inference() diff --git a/ultralytics/solutions/trackzone.py b/ultralytics/solutions/trackzone.py new file mode 100644 index 00000000000..b1d32f2d8e5 --- /dev/null +++ b/ultralytics/solutions/trackzone.py @@ -0,0 +1,68 @@ +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license + +import cv2 +import numpy as np + +from ultralytics.solutions.solutions import BaseSolution +from ultralytics.utils.plotting import Annotator, colors + + +class TrackZone(BaseSolution): + """ + A class to manage region-based object tracking in a video stream. + + This class extends the BaseSolution class and provides functionality for tracking objects within a specific region + defined by a polygonal area. Objects outside the region are excluded from tracking. It supports dynamic initialization + of the region, allowing either a default region or a user-specified polygon. + + Attributes: + region (ndarray): The polygonal region for tracking, represented as a convex hull. + + Methods: + trackzone: Processes each frame of the video, applying region-based tracking. + + Examples: + >>> tracker = TrackZone() + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = tracker.trackzone(frame) + >>> cv2.imshow("Tracked Frame", processed_frame) + """ + + def __init__(self, **kwargs): + """Initializes the TrackZone class for tracking objects within a defined region in video streams.""" + super().__init__(**kwargs) + default_region = [(150, 150), (1130, 150), (1130, 570), (150, 570)] + self.region = cv2.convexHull(np.array(self.region or default_region, dtype=np.int32)) + + def trackzone(self, im0): + """ + Processes the input frame to track objects within a defined region. + + This method initializes the annotator, creates a mask for the specified region, extracts tracks + only from the masked area, and updates tracking information. Objects outside the region are ignored. + + Args: + im0 (numpy.ndarray): The input image or frame to be processed. + + Returns: + (numpy.ndarray): The processed image with tracking id and bounding boxes annotations. + + Examples: + >>> tracker = TrackZone() + >>> frame = cv2.imread("path/to/image.jpg") + >>> tracker.trackzone(frame) + """ + self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator + # Create a mask for the region and extract tracks from the masked image + masked_frame = cv2.bitwise_and(im0, im0, mask=cv2.fillPoly(np.zeros_like(im0[:, :, 0]), [self.region], 255)) + self.extract_tracks(masked_frame) + + cv2.polylines(im0, [self.region], isClosed=True, color=(255, 255, 255), thickness=self.line_width * 2) + + # Iterate over boxes, track ids, classes indexes list and draw bounding boxes + for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): + self.annotator.box_label(box, label=f"{self.names[cls]}:{track_id}", color=colors(track_id, True)) + + self.display_output(im0) # display output with base class function + + return im0 # return output image for more usage diff --git a/ultralytics/trackers/README.md b/ultralytics/trackers/README.md index d7bc855814d..3743d5374c5 100644 --- a/ultralytics/trackers/README.md +++ b/ultralytics/trackers/README.md @@ -13,7 +13,7 @@ The output from Ultralytics trackers is consistent with standard object detectio - **Ease of Use:** Simple Python API and CLI options for quick integration and deployment. - **Customizability:** Easy to use with custom trained YOLO models, allowing integration into domain-specific applications. -**Video Tutorial:** [Object Detection and Tracking with Ultralytics YOLOv8](https://www.youtube.com/embed/hHyHmOtmEgs?si=VNZtXmm45Nb9s-N-). +**Video Tutorial:** [Object Detection and Tracking with Ultralytics YOLO](https://www.youtube.com/embed/hHyHmOtmEgs?si=VNZtXmm45Nb9s-N-). ## Features at a Glance @@ -34,7 +34,7 @@ The default tracker is BoT-SORT. ## Tracking -To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLOv8n, YOLOv8n-seg and YOLOv8n-pose. +To run the tracker on video streams, use a trained Detect, Segment or Pose model such as YOLO11n, YOLO11n-seg and YOLO11n-pose. #### Python @@ -42,9 +42,9 @@ To run the tracker on video streams, use a trained Detect, Segment or Pose model from ultralytics import YOLO # Load an official or custom model -model = YOLO("yolov8n.pt") # Load an official Detect model -model = YOLO("yolov8n-seg.pt") # Load an official Segment model -model = YOLO("yolov8n-pose.pt") # Load an official Pose model +model = YOLO("yolo11n.pt") # Load an official Detect model +model = YOLO("yolo11n-seg.pt") # Load an official Segment model +model = YOLO("yolo11n-pose.pt") # Load an official Pose model model = YOLO("path/to/best.pt") # Load a custom trained model # Perform tracking with the model @@ -58,9 +58,9 @@ results = model.track( ```bash # Perform tracking with various models using the command line interface -yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model -yolo track model=yolov8n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model -yolo track model=yolov8n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model +yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" # Official Detect model +yolo track model=yolo11n-seg.pt source="https://youtu.be/LNwODJXcvt4" # Official Segment model +yolo track model=yolo11n-pose.pt source="https://youtu.be/LNwODJXcvt4" # Official Pose model yolo track model=path/to/best.pt source="https://youtu.be/LNwODJXcvt4" # Custom trained model # Track using ByteTrack tracker @@ -81,7 +81,7 @@ Tracking configuration shares properties with Predict mode, such as `conf`, `iou from ultralytics import YOLO # Configure the tracking parameters and run the tracker -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True) ``` @@ -89,7 +89,7 @@ results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, ```bash # Configure tracking parameters and run the tracker using the command line interface -yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show +yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show ``` ### Tracker Selection @@ -102,7 +102,7 @@ Ultralytics also allows you to use a modified tracker configuration file. To do from ultralytics import YOLO # Load the model and run the tracker with a custom configuration file -model = YOLO("yolov8n.pt") +model = YOLO("yolo11n.pt") results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tracker.yaml") ``` @@ -110,7 +110,7 @@ results = model.track(source="https://youtu.be/LNwODJXcvt4", tracker="custom_tra ```bash # Load the model and run the tracker with a custom configuration file using the command line interface -yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' +yolo track model=yolo11n.pt source="https://youtu.be/LNwODJXcvt4" tracker='custom_tracker.yaml' ``` For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/trackers](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/trackers) page. @@ -119,7 +119,7 @@ For a comprehensive list of tracking arguments, refer to the [ultralytics/cfg/tr ### Persisting Tracks Loop -Here is a Python script using OpenCV (`cv2`) and YOLOv8 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker than the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image. +Here is a Python script using OpenCV (`cv2`) and YOLO11 to run object tracking on video frames. This script still assumes you have already installed the necessary packages (`opencv-python` and `ultralytics`). The `persist=True` argument tells the tracker than the current image or frame is the next in a sequence and to expect tracks from the previous image in the current image. #### Python @@ -128,8 +128,8 @@ import cv2 from ultralytics import YOLO -# Load the YOLOv8 model -model = YOLO("yolov8n.pt") +# Load the YOLO11 model +model = YOLO("yolo11n.pt") # Open the video file video_path = "path/to/video.mp4" @@ -141,14 +141,14 @@ while cap.isOpened(): success, frame = cap.read() if success: - # Run YOLOv8 tracking on the frame, persisting tracks between frames + # Run YOLO11 tracking on the frame, persisting tracks between frames results = model.track(frame, persist=True) # Visualize the results on the frame annotated_frame = results[0].plot() # Display the annotated frame - cv2.imshow("YOLOv8 Tracking", annotated_frame) + cv2.imshow("YOLO11 Tracking", annotated_frame) # Break the loop if 'q' is pressed if cv2.waitKey(1) & 0xFF == ord("q"): @@ -166,9 +166,9 @@ Please note the change from `model(frame)` to `model.track(frame)`, which enable ### Plotting Tracks Over Time -Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLOv8, plotting these tracks is a seamless and efficient process. +Visualizing object tracks over consecutive frames can provide valuable insights into the movement patterns and behavior of detected objects within a video. With Ultralytics YOLO11, plotting these tracks is a seamless and efficient process. -In the following example, we demonstrate how to utilize YOLOv8's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects. +In the following example, we demonstrate how to utilize YOLO11's tracking capabilities to plot the movement of detected objects across multiple video frames. This script involves opening a video file, reading it frame by frame, and utilizing the YOLO model to identify and track various objects. By retaining the center points of the detected bounding boxes and connecting them, we can draw lines that represent the paths followed by the tracked objects. #### Python @@ -180,8 +180,8 @@ import numpy as np from ultralytics import YOLO -# Load the YOLOv8 model -model = YOLO("yolov8n.pt") +# Load the YOLO11 model +model = YOLO("yolo11n.pt") # Open the video file video_path = "path/to/video.mp4" @@ -196,7 +196,7 @@ while cap.isOpened(): success, frame = cap.read() if success: - # Run YOLOv8 tracking on the frame, persisting tracks between frames + # Run YOLO11 tracking on the frame, persisting tracks between frames results = model.track(frame, persist=True) # Get the boxes and track IDs @@ -225,7 +225,7 @@ while cap.isOpened(): ) # Display the annotated frame - cv2.imshow("YOLOv8 Tracking", annotated_frame) + cv2.imshow("YOLO11 Tracking", annotated_frame) # Break the loop if 'q' is pressed if cv2.waitKey(1) & 0xFF == ord("q"): @@ -247,7 +247,7 @@ In the provided Python script, we make use of Python's `threading` module to run To ensure that each thread receives the correct parameters (the video file and the model to use), we define a function `run_tracker_in_thread` that accepts these parameters and contains the main tracking loop. This function reads the video frame by frame, runs the tracker, and displays the results. -Two different models are used in this example: `yolov8n.pt` and `yolov8n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`. +Two different models are used in this example: `yolo11n.pt` and `yolo11n-seg.pt`, each tracking objects in a different video file. The video files are specified in `video_file1` and `video_file2`. The `daemon=True` parameter in `threading.Thread` means that these threads will be closed as soon as the main program finishes. We then start the threads with `start()` and use `join()` to make the main thread wait until both tracker threads have finished. @@ -278,8 +278,8 @@ def run_tracker_in_thread(filename, model): # Load the models -model1 = YOLO("yolov8n.pt") -model2 = YOLO("yolov8n-seg.pt") +model1 = YOLO("yolo11n.pt") +model2 = YOLO("yolo11n-seg.pt") # Define the video files for the trackers video_file1 = "path/to/video1.mp4" diff --git a/ultralytics/trackers/__init__.py b/ultralytics/trackers/__init__.py index bf51b8df699..2919511ba50 100644 --- a/ultralytics/trackers/__init__.py +++ b/ultralytics/trackers/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .bot_sort import BOTSORT from .byte_tracker import BYTETracker diff --git a/ultralytics/trackers/basetrack.py b/ultralytics/trackers/basetrack.py index f3baaf4e4be..47b27269e2a 100644 --- a/ultralytics/trackers/basetrack.py +++ b/ultralytics/trackers/basetrack.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Module defines the base classes and structures for object tracking in YOLO.""" from collections import OrderedDict @@ -44,7 +44,7 @@ class BaseTrack: start_frame (int): The frame number where tracking started. frame_id (int): The most recent frame ID processed by the track. time_since_update (int): Frames passed since the last update. - location (Tuple): The location of the object in the context of multi-camera tracking. + location (tuple): The location of the object in the context of multi-camera tracking. Methods: end_frame: Returns the ID of the last frame where the object was tracked. diff --git a/ultralytics/trackers/bot_sort.py b/ultralytics/trackers/bot_sort.py index 1f10dc7f59e..8a2d02e0e14 100644 --- a/ultralytics/trackers/bot_sort.py +++ b/ultralytics/trackers/bot_sort.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from collections import deque diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py index 31637de920c..807f4ad6678 100644 --- a/ultralytics/trackers/byte_tracker.py +++ b/ultralytics/trackers/byte_tracker.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import numpy as np diff --git a/ultralytics/trackers/track.py b/ultralytics/trackers/track.py index b0103cf98e2..6e422f0db8a 100644 --- a/ultralytics/trackers/track.py +++ b/ultralytics/trackers/track.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from functools import partial from pathlib import Path @@ -31,6 +31,9 @@ def on_predict_start(predictor: object, persist: bool = False) -> None: >>> predictor = SomePredictorClass() >>> on_predict_start(predictor, persist=True) """ + if predictor.args.task == "classify": + raise ValueError("โŒ Classification doesn't support 'mode=track'") + if hasattr(predictor, "trackers") and persist: return diff --git a/ultralytics/trackers/utils/__init__.py b/ultralytics/trackers/utils/__init__.py index 9e68dc12245..77a19dcf0f8 100644 --- a/ultralytics/trackers/utils/__init__.py +++ b/ultralytics/trackers/utils/__init__.py @@ -1 +1 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license diff --git a/ultralytics/trackers/utils/gmc.py b/ultralytics/trackers/utils/gmc.py index 3619057fa72..e3cd2dc88ca 100644 --- a/ultralytics/trackers/utils/gmc.py +++ b/ultralytics/trackers/utils/gmc.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import copy @@ -26,9 +26,9 @@ class GMC: Methods: __init__: Initializes a GMC object with the specified method and downscale factor. apply: Applies the chosen method to a raw frame and optionally uses provided detections. - applyEcc: Applies the ECC algorithm to a raw frame. - applyFeatures: Applies feature-based methods like ORB or SIFT to a raw frame. - applySparseOptFlow: Applies the Sparse Optical Flow method to a raw frame. + apply_ecc: Applies the ECC algorithm to a raw frame. + apply_features: Applies feature-based methods like ORB or SIFT to a raw frame. + apply_sparseoptflow: Applies the Sparse Optical Flow method to a raw frame. reset_params: Resets the internal parameters of the GMC object. Examples: @@ -108,15 +108,15 @@ def apply(self, raw_frame: np.array, detections: list = None) -> np.array: (480, 640, 3) """ if self.method in {"orb", "sift"}: - return self.applyFeatures(raw_frame, detections) + return self.apply_features(raw_frame, detections) elif self.method == "ecc": - return self.applyEcc(raw_frame) + return self.apply_ecc(raw_frame) elif self.method == "sparseOptFlow": - return self.applySparseOptFlow(raw_frame) + return self.apply_sparseoptflow(raw_frame) else: return np.eye(2, 3) - def applyEcc(self, raw_frame: np.array) -> np.array: + def apply_ecc(self, raw_frame: np.array) -> np.array: """ Apply the ECC (Enhanced Correlation Coefficient) algorithm to a raw frame for motion compensation. @@ -128,7 +128,7 @@ def applyEcc(self, raw_frame: np.array) -> np.array: Examples: >>> gmc = GMC(method="ecc") - >>> processed_frame = gmc.applyEcc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])) + >>> processed_frame = gmc.apply_ecc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])) >>> print(processed_frame) [[1. 0. 0.] [0. 1. 0.]] @@ -161,7 +161,7 @@ def applyEcc(self, raw_frame: np.array) -> np.array: return H - def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.array: + def apply_features(self, raw_frame: np.array, detections: list = None) -> np.array: """ Apply feature-based methods like ORB or SIFT to a raw frame. @@ -175,7 +175,7 @@ def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.arra Examples: >>> gmc = GMC(method="orb") >>> raw_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) - >>> processed_frame = gmc.applyFeatures(raw_frame) + >>> processed_frame = gmc.apply_features(raw_frame) >>> print(processed_frame.shape) (2, 3) """ @@ -304,7 +304,7 @@ def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.arra return H - def applySparseOptFlow(self, raw_frame: np.array) -> np.array: + def apply_sparseoptflow(self, raw_frame: np.array) -> np.array: """ Apply Sparse Optical Flow method to a raw frame. @@ -316,7 +316,7 @@ def applySparseOptFlow(self, raw_frame: np.array) -> np.array: Examples: >>> gmc = GMC() - >>> result = gmc.applySparseOptFlow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])) + >>> result = gmc.apply_sparseoptflow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])) >>> print(result) [[1. 0. 0.] [0. 1. 0.]] diff --git a/ultralytics/trackers/utils/kalman_filter.py b/ultralytics/trackers/utils/kalman_filter.py index d103d0bb112..8a212ba63a9 100644 --- a/ultralytics/trackers/utils/kalman_filter.py +++ b/ultralytics/trackers/utils/kalman_filter.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import numpy as np import scipy.linalg diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py index f969f1126af..f15f64df185 100644 --- a/ultralytics/trackers/utils/matching.py +++ b/ultralytics/trackers/utils/matching.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import numpy as np import scipy @@ -13,7 +13,7 @@ except (ImportError, AssertionError, AttributeError): from ultralytics.utils.checks import check_requirements - check_requirements("lapx>=0.5.2") # update to lap package from https://github.com/rathaROG/lapx + check_requirements("lap>=0.5.12") # https://github.com/gatagat/lap import lap @@ -27,10 +27,9 @@ def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = Tr use_lap (bool): Use lap.lapjv for the assignment. If False, scipy.optimize.linear_sum_assignment is used. Returns: - (tuple): A tuple containing: - - matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches. - - unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,). - - unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,). + matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches. + unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,). + unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,). Examples: >>> cost_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -56,8 +55,8 @@ def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = Tr unmatched_a = list(np.arange(cost_matrix.shape[0])) unmatched_b = list(np.arange(cost_matrix.shape[1])) else: - unmatched_a = list(set(np.arange(cost_matrix.shape[0])) - set(matches[:, 0])) - unmatched_b = list(set(np.arange(cost_matrix.shape[1])) - set(matches[:, 1])) + unmatched_a = list(frozenset(np.arange(cost_matrix.shape[0])) - frozenset(matches[:, 0])) + unmatched_b = list(frozenset(np.arange(cost_matrix.shape[1])) - frozenset(matches[:, 1])) return matches, unmatched_a, unmatched_b diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index 02610b88764..3afa07a973d 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import contextlib import importlib.metadata @@ -12,19 +12,20 @@ import sys import threading import time -import urllib import uuid +import warnings from pathlib import Path from threading import Lock from types import SimpleNamespace from typing import Union +from urllib.parse import unquote import cv2 import matplotlib.pyplot as plt import numpy as np import torch +import tqdm import yaml -from tqdm import tqdm as tqdm_original from ultralytics import __version__ @@ -37,7 +38,9 @@ FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO ASSETS = ROOT / "assets" # default images +ASSETS_URL = "https://github.com/ultralytics/assets/releases/download/v0.0.0" # assets GitHub URL DEFAULT_CFG_PATH = ROOT / "cfg/default.yaml" +DEFAULT_SOL_CFG_PATH = ROOT / "cfg/solutions/default.yaml" # Ultralytics solutions yaml path NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLO multiprocessing threads AUTOINSTALL = str(os.getenv("YOLO_AUTOINSTALL", True)).lower() == "true" # global auto-install mode VERBOSE = str(os.getenv("YOLO_VERBOSE", True)).lower() == "true" # global verbose mode @@ -49,6 +52,20 @@ TORCH_VERSION = torch.__version__ TORCHVISION_VERSION = importlib.metadata.version("torchvision") # faster than importing torchvision IS_VSCODE = os.environ.get("TERM_PROGRAM", False) == "vscode" +RKNN_CHIPS = frozenset( + { + "rk3588", + "rk3576", + "rk3566", + "rk3568", + "rk3562", + "rv1103", + "rv1106", + "rv1103b", + "rv1106b", + "rk2118", + } +) # Rockchip processors available for export HELP_MSG = """ Examples for running Ultralytics: @@ -61,8 +78,8 @@ from ultralytics import YOLO # Load a model - model = YOLO("yolov8n.yaml") # build a new model from scratch - model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training) + model = YOLO("yolo11n.yaml") # build a new model from scratch + model = YOLO("yolo11n.pt") # load a pretrained model (recommended for training) # Use the model results = model.train(data="coco8.yaml", epochs=3) # train the model @@ -77,21 +94,21 @@ yolo TASK MODE ARGS Where TASK (optional) is one of [detect, segment, classify, pose, obb] - MODE (required) is one of [train, val, predict, export, benchmark] + MODE (required) is one of [train, val, predict, export, track, benchmark] ARGS (optional) are any number of custom "arg=value" pairs like "imgsz=320" that override defaults. See all ARGS at https://docs.ultralytics.com/usage/cfg or with "yolo cfg" - Train a detection model for 10 epochs with an initial learning_rate of 0.01 - yolo detect train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01 + yolo detect train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01 - Predict a YouTube video using a pretrained segmentation model at image size 320: - yolo segment predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 + yolo segment predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320 - Val a pretrained detection model at batch-size 1 and image size 640: - yolo detect val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640 + yolo detect val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640 - - Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) - yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 + - Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required) + yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128 - Run special commands: yolo help @@ -111,12 +128,16 @@ np.set_printoptions(linewidth=320, formatter={"float_kind": "{:11.5g}".format}) # format short g, %precision=5 cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ["NUMEXPR_MAX_THREADS"] = str(NUM_THREADS) # NumExpr max threads +os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # for deterministic training to avoid CUDA warning os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # suppress verbose TF compiler warnings in Colab os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR" # suppress "NNPACK.cpp could not initialize NNPACK" warnings os.environ["KINETO_LOG_LEVEL"] = "5" # suppress verbose PyTorch profiler output when computing FLOPs +if TQDM_RICH := str(os.getenv("YOLO_TQDM_RICH", False)).lower() == "true": + from tqdm import rich + -class TQDM(tqdm_original): +class TQDM(rich.tqdm if TQDM_RICH else tqdm.tqdm): """ A custom TQDM progress bar class that extends the original tqdm functionality. @@ -159,7 +180,8 @@ def __init__(self, *args, **kwargs): ... # Your code here ... pass """ - kwargs["disable"] = not VERBOSE or kwargs.get("disable", False) # logical 'and' with default value if passed + warnings.filterwarnings("ignore", category=tqdm.TqdmExperimentalWarning) # suppress tqdm.rich warning + kwargs["disable"] = not VERBOSE or kwargs.get("disable", False) kwargs.setdefault("bar_format", TQDM_BAR_FORMAT) # override default value if passed super().__init__(*args, **kwargs) @@ -507,6 +529,7 @@ def yaml_print(yaml_file: Union[str, Path, dict]) -> None: # Default configuration DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH) +DEFAULT_SOL_DICT = yaml_load(DEFAULT_SOL_CFG_PATH) # Ultralytics solutions configuration for k, v in DEFAULT_CFG_DICT.items(): if isinstance(v, str) and v.lower() == "none": DEFAULT_CFG_DICT[k] = None @@ -520,12 +543,9 @@ def read_device_model() -> str: is_raspberrypi(). Returns: - (str): Model file contents if read successfully or empty string otherwise. + (str): Kernel release information. """ - with contextlib.suppress(Exception): - with open("/proc/device-tree/model") as f: - return f.read() - return "" + return platform.release().lower() def is_ubuntu() -> bool: @@ -535,10 +555,11 @@ def is_ubuntu() -> bool: Returns: (bool): True if OS is Ubuntu, False otherwise. """ - with contextlib.suppress(FileNotFoundError): + try: with open("/etc/os-release") as f: return "ID=ubuntu" in f.read() - return False + except FileNotFoundError: + return False def is_colab(): @@ -563,16 +584,26 @@ def is_kaggle(): def is_jupyter(): """ - Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace. + Check if the current script is running inside a Jupyter Notebook. Returns: (bool): True if running inside a Jupyter Notebook, False otherwise. + + Note: + - Only works on Colab and Kaggle, other environments like Jupyterlab and Paperspace are not reliably detectable. + - "get_ipython" in globals() method suffers false positives when IPython package installed manually. """ - with contextlib.suppress(Exception): - from IPython import get_ipython + return IS_COLAB or IS_KAGGLE - return get_ipython() is not None - return False + +def is_runpod(): + """ + Check if the current script is running inside a RunPod container. + + Returns: + (bool): True if running in RunPod, False otherwise. + """ + return "RUNPOD_POD_ID" in os.environ def is_docker() -> bool: @@ -582,10 +613,11 @@ def is_docker() -> bool: Returns: (bool): True if the script is running inside a Docker container, False otherwise. """ - with contextlib.suppress(Exception): + try: with open("/proc/self/cgroup") as f: return "docker" in f.read() - return False + except Exception: + return False def is_raspberrypi() -> bool: @@ -595,18 +627,17 @@ def is_raspberrypi() -> bool: Returns: (bool): True if running on a Raspberry Pi, False otherwise. """ - return "Raspberry Pi" in PROC_DEVICE_MODEL + return "rpi" in DEVICE_MODEL def is_jetson() -> bool: """ - Determines if the Python environment is running on a Jetson Nano or Jetson Orin device by checking the device model - information. + Determines if the Python environment is running on an NVIDIA Jetson device by checking the device model information. Returns: - (bool): True if running on a Jetson Nano or Jetson Orin, False otherwise. + (bool): True if running on an NVIDIA Jetson device, False otherwise. """ - return "NVIDIA" in PROC_DEVICE_MODEL # i.e. "NVIDIA Jetson Nano" or "NVIDIA Orin NX" + return "tegra" in DEVICE_MODEL def is_online() -> bool: @@ -616,14 +647,15 @@ def is_online() -> bool: Returns: (bool): True if connection is successful, False otherwise. """ - with contextlib.suppress(Exception): + try: assert str(os.getenv("YOLO_OFFLINE", "")).lower() != "true" # check if ENV var YOLO_OFFLINE="True" import socket for dns in ("1.1.1.1", "8.8.8.8"): # check Cloudflare and Google DNS socket.create_connection(address=(dns, 80), timeout=2.0).close() return True - return False + except Exception: + return False def is_pip_package(filepath: str = __name__) -> bool: @@ -710,9 +742,11 @@ def get_git_origin_url(): (str | None): The origin URL of the git repository or None if not git directory. """ if IS_GIT_DIR: - with contextlib.suppress(subprocess.CalledProcessError): + try: origin = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]) return origin.decode().strip() + except subprocess.CalledProcessError: + return None def get_git_branch(): @@ -723,9 +757,11 @@ def get_git_branch(): (str | None): The current git branch name or None if not a git directory. """ if IS_GIT_DIR: - with contextlib.suppress(subprocess.CalledProcessError): + try: origin = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]) return origin.decode().strip() + except subprocess.CalledProcessError: + return None def get_default_args(func): @@ -750,9 +786,11 @@ def get_ubuntu_version(): (str): Ubuntu version or None if not an Ubuntu OS. """ if is_ubuntu(): - with contextlib.suppress(FileNotFoundError, AttributeError): + try: with open("/etc/os-release") as f: return re.search(r'VERSION_ID="(\d+\.\d+)"', f.read())[1] + except (FileNotFoundError, AttributeError): + return None def get_user_config_dir(sub_dir="Ultralytics"): @@ -789,13 +827,13 @@ def get_user_config_dir(sub_dir="Ultralytics"): # Define constants (required below) -PROC_DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant +DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant ONLINE = is_online() IS_COLAB = is_colab() +IS_KAGGLE = is_kaggle() IS_DOCKER = is_docker() IS_JETSON = is_jetson() IS_JUPYTER = is_jupyter() -IS_KAGGLE = is_kaggle() IS_PIP_PACKAGE = is_pip_package() IS_RASPBERRYPI = is_raspberrypi() GIT_DIR = get_git_dir() @@ -970,7 +1008,7 @@ def wrapper(*args, **kwargs): def set_sentry(): """ Initialize the Sentry SDK for error tracking and reporting. Only used if sentry_sdk package is installed and - sync=True in settings. Run 'yolo settings' to see and update settings YAML file. + sync=True in settings. Run 'yolo settings' to see and update settings. Conditions required to send errors (ALL conditions must be met or no errors will be reported): - sentry_sdk package is installed @@ -982,11 +1020,26 @@ def set_sentry(): - online environment - CLI used to run package (checked with 'yolo' as the name of the main CLI command) - The function also configures Sentry SDK to ignore KeyboardInterrupt and FileNotFoundError - exceptions and to exclude events with 'out of memory' in their exception message. + The function also configures Sentry SDK to ignore KeyboardInterrupt and FileNotFoundError exceptions and to exclude + events with 'out of memory' in their exception message. Additionally, the function sets custom tags and user information for Sentry events. """ + if ( + not SETTINGS["sync"] + or RANK not in {-1, 0} + or Path(ARGV[0]).name != "yolo" + or TESTS_RUNNING + or not ONLINE + or not IS_PIP_PACKAGE + or IS_GIT_DIR + ): + return + # If sentry_sdk package is not installed then return and do not use Sentry + try: + import sentry_sdk # noqa + except ImportError: + return def before_send(event, hint): """ @@ -1000,7 +1053,7 @@ def before_send(event, hint): dict: The modified event or None if the event should not be sent to Sentry. """ if "exc_info" in hint: - exc_type, exc_value, tb = hint["exc_info"] + exc_type, exc_value, _ = hint["exc_info"] if exc_type in {KeyboardInterrupt, FileNotFoundError} or "out of memory" in str(exc_value): return None # do not send event @@ -1012,31 +1065,17 @@ def before_send(event, hint): } return event - if ( - SETTINGS["sync"] - and RANK in {-1, 0} - and Path(ARGV[0]).name == "yolo" - and not TESTS_RUNNING - and ONLINE - and IS_PIP_PACKAGE - and not IS_GIT_DIR - ): - # If sentry_sdk package is not installed then return and do not use Sentry - try: - import sentry_sdk # noqa - except ImportError: - return - - sentry_sdk.init( - dsn="https://5ff1556b71594bfea135ff0203a0d290@o4504521589325824.ingest.sentry.io/4504521592406016", - debug=False, - traces_sample_rate=1.0, - release=__version__, - environment="production", # 'dev' or 'production' - before_send=before_send, - ignore_errors=[KeyboardInterrupt, FileNotFoundError], - ) - sentry_sdk.set_user({"id": SETTINGS["uuid"]}) # SHA-256 anonymized UUID hash + sentry_sdk.init( + dsn="https://888e5a0778212e1d0314c37d4b9aae5d@o4504521589325824.ingest.us.sentry.io/4504521592406016", + debug=False, + auto_enabling_integrations=False, + traces_sample_rate=1.0, + release=__version__, + environment="runpod" if is_runpod() else "production", + before_send=before_send, + ignore_errors=[KeyboardInterrupt, FileNotFoundError], + ) + sentry_sdk.set_user({"id": SETTINGS["uuid"]}) # SHA-256 anonymized UUID hash class JSONDict(dict): @@ -1116,7 +1155,8 @@ def __delitem__(self, key): def __str__(self): """Return a pretty-printed JSON string representation of the dictionary.""" - return f'JSONDict("{self.file_path}"):\n{json.dumps(dict(self), indent=2, ensure_ascii=False, default=self._json_default)}' + contents = json.dumps(dict(self), indent=2, ensure_ascii=False, default=self._json_default) + return f'JSONDict("{self.file_path}"):\n{contents}' def update(self, *args, **kwargs): """Update the dictionary and persist changes.""" @@ -1169,25 +1209,26 @@ def __init__(self, file=SETTINGS_FILE, version="0.0.6"): self.file = Path(file) self.version = version self.defaults = { - "settings_version": version, - "datasets_dir": str(datasets_root / "datasets"), - "weights_dir": str(root / "weights"), - "runs_dir": str(root / "runs"), - "uuid": hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(), - "sync": True, - "api_key": "", - "openai_api_key": "", - "clearml": True, # integrations - "comet": True, - "dvc": True, - "hub": True, - "mlflow": True, - "neptune": True, - "raytune": True, - "tensorboard": True, - "wandb": True, - "vscode_msg": True, + "settings_version": version, # Settings schema version + "datasets_dir": str(datasets_root / "datasets"), # Datasets directory + "weights_dir": str(root / "weights"), # Model weights directory + "runs_dir": str(root / "runs"), # Experiment runs directory + "uuid": hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(), # SHA-256 anonymized UUID hash + "sync": True, # Enable synchronization + "api_key": "", # Ultralytics API Key + "openai_api_key": "", # OpenAI API Key + "clearml": True, # ClearML integration + "comet": True, # Comet integration + "dvc": True, # DVC integration + "hub": True, # Ultralytics HUB integration + "mlflow": True, # MLflow integration + "neptune": True, # Neptune integration + "raytune": True, # Ray Tune integration + "tensorboard": True, # TensorBoard logging + "wandb": False, # Weights & Biases logging + "vscode_msg": True, # VSCode messaging } + self.help_msg = ( f"\nView Ultralytics Settings with 'yolo settings' or at '{self.file}'" "\nUpdate Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. " @@ -1205,7 +1246,7 @@ def __init__(self, file=SETTINGS_FILE, version="0.0.6"): def _validate_settings(self): """Validate the current settings and reset if necessary.""" - correct_keys = set(self.keys()) == set(self.defaults.keys()) + correct_keys = frozenset(self.keys()) == frozenset(self.defaults.keys()) correct_types = all(isinstance(self.get(k), type(v)) for k, v in self.defaults.items()) correct_version = self.get("settings_version", "") == self.version @@ -1223,14 +1264,23 @@ def _validate_settings(self): f"Please change one to avoid possible issues during training. {self.help_msg}" ) + def __setitem__(self, key, value): + """Updates one key: value pair.""" + self.update({key: value}) + def update(self, *args, **kwargs): """Updates settings, validating keys and types.""" + for arg in args: + if isinstance(arg, dict): + kwargs.update(arg) for k, v in kwargs.items(): if k not in self.defaults: raise KeyError(f"No Ultralytics setting '{k}'. {self.help_msg}") t = type(self.defaults[k]) if not isinstance(v, t): - raise TypeError(f"Ultralytics setting '{k}' must be of type '{t}', not '{type(v)}'. {self.help_msg}") + raise TypeError( + f"Ultralytics setting '{k}' must be '{t.__name__}' type, not '{type(v).__name__}'. {self.help_msg}" + ) super().update(*args, **kwargs) def reset(self): @@ -1239,15 +1289,18 @@ def reset(self): self.update(self.defaults) -def deprecation_warn(arg, new_arg): +def deprecation_warn(arg, new_arg=None): """Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument.""" - LOGGER.warning(f"WARNING โš ๏ธ '{arg}' is deprecated and will be removed in in the future. Use '{new_arg}' instead.") + msg = f"WARNING โš ๏ธ '{arg}' is deprecated and will be removed in in the future." + if new_arg is not None: + msg += f" Use '{new_arg}' instead." + LOGGER.warning(msg) def clean_url(url): """Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt.""" url = Path(url).as_posix().replace(":/", "://") # Pathlib turns :// -> :/, as_posix() for Windows - return urllib.parse.unquote(url).split("?")[0] # '%2F' to '/', split https://url.com/file.txt?auth + return unquote(url).split("?")[0] # '%2F' to '/', split https://url.com/file.txt?auth def url2file(url): diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py index 2d09c5d894e..085001a153c 100644 --- a/ultralytics/utils/autobatch.py +++ b/ultralytics/utils/autobatch.py @@ -1,6 +1,7 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.""" +import os from copy import deepcopy import numpy as np @@ -10,7 +11,7 @@ from ultralytics.utils.torch_utils import autocast, profile -def check_train_batch_size(model, imgsz=640, amp=True, batch=-1): +def check_train_batch_size(model, imgsz=640, amp=True, batch=-1, max_num_obj=1): """ Compute optimal YOLO training batch size using the autobatch() function. @@ -19,6 +20,7 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1): imgsz (int, optional): Image size used for training. amp (bool, optional): Use automatic mixed precision if True. batch (float, optional): Fraction of GPU memory to use. If -1, use default. + max_num_obj (int, optional): The maximum number of objects from dataset. Returns: (int): Optimal batch size computed using the autobatch() function. @@ -28,10 +30,12 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1): Otherwise, a default fraction of 0.6 is used. """ with autocast(enabled=amp): - return autobatch(deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6) + return autobatch( + deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6, max_num_obj=max_num_obj + ) -def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): +def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch, max_num_obj=1): """ Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory. @@ -40,6 +44,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640. fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60. batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16. + max_num_obj (int, optional): The maximum number of objects from dataset. Returns: (int): The optimal batch size. @@ -57,7 +62,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): # Inspect CUDA memory gb = 1 << 30 # bytes to GiB (1024 ** 3) - d = str(device).upper() # 'CUDA:0' + d = f"CUDA:{os.getenv('CUDA_VISIBLE_DEVICES', '0').strip()[0]}" # 'CUDA:0' properties = torch.cuda.get_device_properties(device) # device properties t = properties.total_memory / gb # GiB total r = torch.cuda.memory_reserved(device) / gb # GiB reserved @@ -66,26 +71,36 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch): LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free") # Profile batch sizes - batch_sizes = [1, 2, 4, 8, 16] + batch_sizes = [1, 2, 4, 8, 16] if t < 16 else [1, 2, 4, 8, 16, 32, 64] try: img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] - results = profile(img, model, n=3, device=device) + results = profile(img, model, n=1, device=device, max_num_obj=max_num_obj) # Fit a solution - y = [x[2] for x in results if x] # memory [2] - p = np.polyfit(batch_sizes[: len(y)], y, deg=1) # first degree polynomial fit - b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) + xy = [ + [x, y[2]] + for i, (x, y) in enumerate(zip(batch_sizes, results)) + if y # valid result + and isinstance(y[2], (int, float)) # is numeric + and 0 < y[2] < t # between 0 and GPU limit + and (i == 0 or not results[i - 1] or y[2] > results[i - 1][2]) # first item or increasing memory + ] + fit_x, fit_y = zip(*xy) if xy else ([], []) + p = np.polyfit(np.log(fit_x), np.log(fit_y), deg=1) # first-degree polynomial fit in log space + b = int(round(np.exp((np.log(f * fraction) - p[1]) / p[0]))) # y intercept (optimal batch size) if None in results: # some sizes failed i = results.index(None) # first fail index if b >= batch_sizes[i]: # y intercept above failure point b = batch_sizes[max(i - 1, 0)] # select prior safe point if b < 1 or b > 1024: # b outside of safe range + LOGGER.info(f"{prefix}WARNING โš ๏ธ batch={b} outside safe range, using default batch-size {batch_size}.") b = batch_size - LOGGER.info(f"{prefix}WARNING โš ๏ธ CUDA anomaly detected, using default batch-size {batch_size}.") - fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted + fraction = (np.exp(np.polyval(p, np.log(b))) + r + a) / t # predicted fraction LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) โœ…") return b except Exception as e: LOGGER.warning(f"{prefix}WARNING โš ๏ธ error detected: {e}, using default batch-size {batch_size}.") return batch_size + finally: + torch.cuda.empty_cache() diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index d5850a181a1..280a31d54f5 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -1,27 +1,29 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ Benchmark a YOLO model formats for speed and accuracy. Usage: from ultralytics.utils.benchmarks import ProfileModels, benchmark - ProfileModels(['yolov8n.yaml', 'yolov8s.yaml']).profile() - benchmark(model='yolov8n.pt', imgsz=160) + ProfileModels(['yolo11n.yaml', 'yolov8s.yaml']).profile() + benchmark(model='yolo11n.pt', imgsz=160) Format | `format=argument` | Model --- | --- | --- -PyTorch | - | yolov8n.pt -TorchScript | `torchscript` | yolov8n.torchscript -ONNX | `onnx` | yolov8n.onnx -OpenVINO | `openvino` | yolov8n_openvino_model/ -TensorRT | `engine` | yolov8n.engine -CoreML | `coreml` | yolov8n.mlpackage -TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ -TensorFlow GraphDef | `pb` | yolov8n.pb -TensorFlow Lite | `tflite` | yolov8n.tflite -TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite -TensorFlow.js | `tfjs` | yolov8n_web_model/ -PaddlePaddle | `paddle` | yolov8n_paddle_model/ -NCNN | `ncnn` | yolov8n_ncnn_model/ +PyTorch | - | yolo11n.pt +TorchScript | `torchscript` | yolo11n.torchscript +ONNX | `onnx` | yolo11n.onnx +OpenVINO | `openvino` | yolo11n_openvino_model/ +TensorRT | `engine` | yolo11n.engine +CoreML | `coreml` | yolo11n.mlpackage +TensorFlow SavedModel | `saved_model` | yolo11n_saved_model/ +TensorFlow GraphDef | `pb` | yolo11n.pb +TensorFlow Lite | `tflite` | yolo11n.tflite +TensorFlow Edge TPU | `edgetpu` | yolo11n_edgetpu.tflite +TensorFlow.js | `tfjs` | yolo11n_web_model/ +PaddlePaddle | `paddle` | yolo11n_paddle_model/ +MNN | `mnn` | yolo11n.mnn +NCNN | `ncnn` | yolo11n_ncnn_model/ +RKNN | `rknn` | yolo11n_rknn_model/ """ import glob @@ -39,15 +41,15 @@ from ultralytics import YOLO, YOLOWorld from ultralytics.cfg import TASK2DATA, TASK2METRIC from ultralytics.engine.exporter import export_formats -from ultralytics.utils import ARM64, ASSETS, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR -from ultralytics.utils.checks import IS_PYTHON_3_12, check_requirements, check_yolo +from ultralytics.utils import ARM64, ASSETS, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR +from ultralytics.utils.checks import IS_PYTHON_3_12, check_imgsz, check_requirements, check_yolo, is_rockchip from ultralytics.utils.downloads import safe_download from ultralytics.utils.files import file_size from ultralytics.utils.torch_utils import get_cpu_info, select_device def benchmark( - model=WEIGHTS_DIR / "yolov8n.pt", + model=WEIGHTS_DIR / "yolo11n.pt", data=None, imgsz=160, half=False, @@ -57,6 +59,7 @@ def benchmark( separate_outputs=False, export_hw_optimized=False, eps=1e-3, + format="", ): """ Benchmark a YOLO model across different formats for speed and accuracy. @@ -70,6 +73,7 @@ def benchmark( device (str): Device to run the benchmark on, either 'cpu' or 'cuda'. verbose (bool | float): If True or a float, assert benchmarks pass with given metric. eps (float): Epsilon value for divide by zero prevention. + format (str): Export format for benchmarking. If not supplied all formats are benchmarked. Returns: (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size, metric, @@ -78,8 +82,11 @@ def benchmark( Examples: Benchmark a YOLO model with default settings: >>> from ultralytics.utils.benchmarks import benchmark - >>> benchmark(model="yolov8n.pt", imgsz=640) + >>> benchmark(model="yolo11n.pt", imgsz=640) """ + imgsz = check_imgsz(imgsz) + assert imgsz[0] == imgsz[1] if isinstance(imgsz, list) else True, "benchmark() only supports square imgsz." + import pandas as pd # scope for faster 'import ultralytics' pd.options.display.max_columns = 10 @@ -91,30 +98,50 @@ def benchmark( y = [] t0 = time.time() - for i, (name, format, suffix, cpu, gpu) in enumerate(zip(*export_formats().values())): + + format_arg = format.lower() + if format_arg: + formats = frozenset(export_formats()["Argument"]) + assert format in formats, f"Expected format to be one of {formats}, but got '{format_arg}'." + for i, (name, format, suffix, cpu, gpu, _) in enumerate(zip(*export_formats().values())): emoji, filename = "โŒ", None # export defaults try: + if format_arg and format_arg != format: + continue + # Checks if i == 7: # TF GraphDef assert model.task != "obb", "TensorFlow GraphDef not supported for OBB task" elif i == 9: # Edge TPU assert LINUX and not ARM64, "Edge TPU export only supported on non-aarch64 Linux" elif i in {5, 10}: # CoreML and TF.js - assert MACOS or LINUX, "CoreML and TF.js export only supported on macOS and Linux" - assert not IS_RASPBERRYPI, "CoreML and TF.js export not supported on Raspberry Pi" - assert not IS_JETSON, "CoreML and TF.js export not supported on NVIDIA Jetson" + assert MACOS or (LINUX and not ARM64), ( + "CoreML and TF.js export only supported on macOS and non-aarch64 Linux" + ) if i in {5}: # CoreML assert not IS_PYTHON_3_12, "CoreML not supported on Python 3.12" if i in {6, 7, 8}: # TF SavedModel, TF GraphDef, and TFLite assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet" if i in {9, 10}: # TF EdgeTPU and TF.js assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet" - if i in {11}: # Paddle + if i == 11: # Paddle assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet" assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet" assert LINUX or MACOS, "Windows Paddle exports not supported yet" - if i in {12}: # NCNN + if i == 12: # MNN + assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet" + if i == 13: # NCNN assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet" + if i == 14: # IMX + assert not is_end2end + assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported" + assert model.task == "detect", "IMX only supported for detection task" + assert "C2f" in model.__str__(), "IMX only supported for YOLOv8" + if i == 15: # RKNN + assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet" + assert not is_end2end, "End-to-end models not supported by RKNN yet" + assert LINUX, "RKNN only supported on Linux" + assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices" if "cpu" in device.type: assert cpu, "inference not supported on CPU" if "cuda" in device.type: @@ -126,7 +153,7 @@ def benchmark( # Export if format == "-": - filename = model.ckpt_path or model.cfg + filename = model.pt_path or model.ckpt_path or model.model_name exported_model = model # PyTorch format else: filename = model.export(imgsz=imgsz, format=format, half=half, int8=int8, device=device, verbose=False, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized) @@ -138,16 +165,16 @@ def benchmark( assert model.task != "pose" or i != 7, "GraphDef Pose inference is not supported" assert i not in {9, 10}, "inference not supported" # Edge TPU and TF.js are unsupported assert i != 5 or platform.system() == "Darwin", "inference only supported on macOS>=10.13" # CoreML - if i in {12}: + if i in {13}: assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet" - exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized) + exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, verbose=False, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized) # Validate data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect - key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect results = exported_model.val( data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, int8=int8, verbose=False, separate_outputs=separate_outputs, export_hw_optimized=export_hw_optimized ) + key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect metric, speed = results.results_dict[key], results.speed["inference"] fps = round(1000 / (speed + eps), 2) # frames per second y.append([name, "โœ…", round(file_size(filename), 1), round(metric, 4), round(speed, 2), fps]) @@ -161,8 +188,10 @@ def benchmark( check_yolo(device=device) # print system info df = pd.DataFrame(y, columns=["Format", "Statusโ”", "Size (MB)", key, "Inference time (ms/im)", "FPS"]) - name = Path(model.ckpt_path).name - s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz}, separate_outputs={separate_outputs}, export_hw_optimized={export_hw_optimized} ({time.time() - t0:.2f}s)\n{df}\n" + name = model.model_name + dt = time.time() - t0 + legend = "Benchmarks legend: - โœ… Success - โŽ Export passed but validation failed - โŒ๏ธ Export failed" + s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz}, separate_outputs={separate_outputs}, export_hw_optimized={export_hw_optimized} ({dt:.2f}s)\n{legend}\n{df.fillna('-')}\n" LOGGER.info(s) with open("benchmarks.log", "a", errors="ignore", encoding="utf-8") as f: f.write(s) @@ -332,7 +361,7 @@ class ProfileModels: Examples: Profile models and print results >>> from ultralytics.utils.benchmarks import ProfileModels - >>> profiler = ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640) + >>> profiler = ProfileModels(["yolo11n.yaml", "yolov8s.yaml"], imgsz=640) >>> profiler.profile() """ @@ -366,7 +395,7 @@ def __init__( Examples: Initialize and profile models >>> from ultralytics.utils.benchmarks import ProfileModels - >>> profiler = ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640) + >>> profiler = ProfileModels(["yolo11n.yaml", "yolov8s.yaml"], imgsz=640) >>> profiler.profile() """ self.paths = paths @@ -438,7 +467,8 @@ def get_files(self): print(f"Profiling: {sorted(files)}") return [Path(file) for file in sorted(files)] - def get_onnx_model_info(self, onnx_file: str): + @staticmethod + def get_onnx_model_info(onnx_file: str): """Extracts metadata from an ONNX model file including parameters, GFLOPs, and input shape.""" return 0.0, 0.0, 0.0, 0.0 # return (num_layers, num_params, num_gradients, num_flops) @@ -461,7 +491,7 @@ def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3): # Model and input model = YOLO(engine_file) - input_data = np.random.rand(self.imgsz, self.imgsz, 3).astype(np.float32) # must be FP32 + input_data = np.zeros((self.imgsz, self.imgsz, 3), dtype=np.uint8) # use uint8 for Classify # Warmup runs elapsed = 0.0 @@ -542,8 +572,8 @@ def generate_table_row(self, model_name, t_onnx, t_engine, model_info): """Generates a table row string with model performance metrics including inference times and model details.""" layers, params, gradients, flops = model_info return ( - f"| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ยฑ {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ยฑ " - f"{t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |" + f"| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.1f}ยฑ{t_onnx[1]:.1f} ms | {t_engine[0]:.1f}ยฑ" + f"{t_engine[1]:.1f} ms | {params / 1e6:.1f} | {flops:.1f} |" ) @staticmethod diff --git a/ultralytics/utils/callbacks/__init__.py b/ultralytics/utils/callbacks/__init__.py index 116babe9b7f..920cc4fad9d 100644 --- a/ultralytics/utils/callbacks/__init__.py +++ b/ultralytics/utils/callbacks/__init__.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from .base import add_integration_callbacks, default_callbacks, get_default_callbacks diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py index 98b20256e52..11e0a8979e0 100644 --- a/ultralytics/utils/callbacks/base.py +++ b/ultralytics/utils/callbacks/base.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Base callbacks.""" from collections import defaultdict diff --git a/ultralytics/utils/callbacks/clearml.py b/ultralytics/utils/callbacks/clearml.py index e076e55fa74..5afc7a3659f 100644 --- a/ultralytics/utils/callbacks/clearml.py +++ b/ultralytics/utils/callbacks/clearml.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING @@ -68,9 +68,9 @@ def on_pretrain_routine_start(trainer): PatchedMatplotlib.update_current_task(None) else: task = Task.init( - project_name=trainer.args.project or "YOLOv8", + project_name=trainer.args.project or "Ultralytics", task_name=trainer.args.name, - tags=["YOLOv8"], + tags=["Ultralytics"], output_uri=True, reuse_last_task_id=False, auto_connect_frameworks={"pytorch": False, "matplotlib": False}, diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py index 7e90a538638..910e3c424d2 100644 --- a/ultralytics/utils/callbacks/comet.py +++ b/ultralytics/utils/callbacks/comet.py @@ -1,6 +1,7 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import LOGGER, RANK, SETTINGS, TESTS_RUNNING, ops +from ultralytics.utils.metrics import ClassifyMetrics, DetMetrics, OBBMetrics, PoseMetrics, SegmentMetrics try: assert not TESTS_RUNNING # do not log pytest @@ -15,9 +16,12 @@ # Ensures certain logging functions only run for supported tasks COMET_SUPPORTED_TASKS = ["detect"] - # Names of plots created by YOLOv8 that are logged to Comet - EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve", "confusion_matrix" + # Names of plots created by Ultralytics that are logged to Comet + CONFUSION_MATRIX_PLOT_NAMES = "confusion_matrix", "confusion_matrix_normalized" + EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve" LABEL_PLOT_NAMES = "labels", "labels_correlogram" + SEGMENT_METRICS_PLOT_PREFIX = "Box", "Mask" + POSE_METRICS_PLOT_PREFIX = "Box", "Pose" _comet_image_prediction_count = 0 @@ -31,8 +35,8 @@ def _get_comet_mode(): def _get_comet_model_name(): - """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'.""" - return os.getenv("COMET_MODEL_NAME", "YOLOv8") + """Returns the model name for Comet from the environment variable COMET_MODEL_NAME or defaults to 'Ultralytics'.""" + return os.getenv("COMET_MODEL_NAME", "Ultralytics") def _get_eval_batch_logging_interval(): @@ -86,7 +90,7 @@ def _create_experiment(args): "max_image_predictions": _get_max_image_predictions_to_log(), } ) - experiment.log_other("Created from", "yolov8") + experiment.log_other("Created from", "ultralytics") except Exception as e: LOGGER.warning(f"WARNING โš ๏ธ Comet installed but not initialized correctly, not logging this run. {e}") @@ -110,7 +114,7 @@ def _fetch_trainer_metadata(trainer): def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad): """ - YOLOv8 resizes images during training and the label values are normalized based on this resized shape. + YOLO resizes images during training and the label values are normalized based on this resized shape. This function rescales the bounding box labels to the original image shape. """ @@ -274,11 +278,31 @@ def _log_image_predictions(experiment, validator, curr_step): def _log_plots(experiment, trainer): """Logs evaluation plots and label plots for the experiment.""" - plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES] - _log_images(experiment, plot_filenames, None) - - label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES] - _log_images(experiment, label_plot_filenames, None) + plot_filenames = None + if isinstance(trainer.validator.metrics, SegmentMetrics) and trainer.validator.metrics.task == "segment": + plot_filenames = [ + trainer.save_dir / f"{prefix}{plots}.png" + for plots in EVALUATION_PLOT_NAMES + for prefix in SEGMENT_METRICS_PLOT_PREFIX + ] + elif isinstance(trainer.validator.metrics, PoseMetrics): + plot_filenames = [ + trainer.save_dir / f"{prefix}{plots}.png" + for plots in EVALUATION_PLOT_NAMES + for prefix in POSE_METRICS_PLOT_PREFIX + ] + elif isinstance(trainer.validator.metrics, (DetMetrics, OBBMetrics)): + plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES] + + if plot_filenames is not None: + _log_images(experiment, plot_filenames, None) + + confusion_matrix_filenames = [trainer.save_dir / f"{plots}.png" for plots in CONFUSION_MATRIX_PLOT_NAMES] + _log_images(experiment, confusion_matrix_filenames, None) + + if not isinstance(trainer.validator.metrics, ClassifyMetrics): + label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES] + _log_images(experiment, label_plot_filenames, None) def _log_model(experiment, trainer): @@ -307,9 +331,6 @@ def on_train_epoch_end(trainer): experiment.log_metrics(trainer.label_loss_items(trainer.tloss, prefix="train"), step=curr_step, epoch=curr_epoch) - if curr_epoch == 1: - _log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step) - def on_fit_epoch_end(trainer): """Logs model assets at the end of each epoch.""" @@ -356,6 +377,8 @@ def on_train_end(trainer): _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch) _log_image_predictions(experiment, trainer.validator, curr_step) + _log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step) + _log_images(experiment, trainer.save_dir.glob("val_batch*.jpg"), curr_step) experiment.end() global _comet_image_prediction_count diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py index ab51dc52946..1cc0c632ecb 100644 --- a/ultralytics/utils/callbacks/dvc.py +++ b/ultralytics/utils/callbacks/dvc.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, checks diff --git a/ultralytics/utils/callbacks/hub.py b/ultralytics/utils/callbacks/hub.py index fbcd1667efd..4709fbea8ba 100644 --- a/ultralytics/utils/callbacks/hub.py +++ b/ultralytics/utils/callbacks/hub.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import json from time import time @@ -15,16 +15,14 @@ def on_pretrain_routine_start(trainer): def on_pretrain_routine_end(trainer): """Logs info before starting timer for upload rate limit.""" - session = getattr(trainer, "hub_session", None) - if session: + if session := getattr(trainer, "hub_session", None): # Start timer for upload rate limit session.timers = {"metrics": time(), "ckpt": time()} # start timer on session.rate_limit def on_fit_epoch_end(trainer): """Uploads training progress metrics at the end of each epoch.""" - session = getattr(trainer, "hub_session", None) - if session: + if session := getattr(trainer, "hub_session", None): # Upload metrics after val end all_plots = { **trainer.label_loss_items(trainer.tloss, prefix="train"), @@ -49,8 +47,7 @@ def on_fit_epoch_end(trainer): def on_model_save(trainer): """Saves checkpoints to Ultralytics HUB with rate limiting.""" - session = getattr(trainer, "hub_session", None) - if session: + if session := getattr(trainer, "hub_session", None): # Upload checkpoints with rate limiting is_best = trainer.best_fitness == trainer.fitness if time() - session.timers["ckpt"] > session.rate_limits["ckpt"]: @@ -61,8 +58,7 @@ def on_model_save(trainer): def on_train_end(trainer): """Upload final model and metrics to Ultralytics HUB at the end of training.""" - session = getattr(trainer, "hub_session", None) - if session: + if session := getattr(trainer, "hub_session", None): # Upload final model and metrics with exponential standoff LOGGER.info(f"{PREFIX}Syncing final model...") session.upload_model( @@ -72,7 +68,7 @@ def on_train_end(trainer): final=True, ) session.alive = False # stop heartbeats - LOGGER.info(f"{PREFIX}Done โœ…\n" f"{PREFIX}View model at {session.model_url} ๐Ÿš€") + LOGGER.info(f"{PREFIX}Done โœ…\n{PREFIX}View model at {session.model_url} ๐Ÿš€") def on_train_start(trainer): diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py index bbae4cc0cca..9d5dc2f16f6 100644 --- a/ultralytics/utils/callbacks/mlflow.py +++ b/ultralytics/utils/callbacks/mlflow.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """ MLflow Logging for Ultralytics YOLO. @@ -69,7 +69,7 @@ def on_pretrain_routine_end(trainer): mlflow.set_tracking_uri(uri) # Set experiment and run names - experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/YOLOv8" + experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/Ultralytics" run_name = os.environ.get("MLFLOW_RUN") or trainer.args.name mlflow.set_experiment(experiment_name) @@ -82,7 +82,7 @@ def on_pretrain_routine_end(trainer): LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'") mlflow.log_params(dict(trainer.args)) except Exception as e: - LOGGER.warning(f"{PREFIX}WARNING โš ๏ธ Failed to initialize: {e}\n" f"{PREFIX}WARNING โš ๏ธ Not tracking this run") + LOGGER.warning(f"{PREFIX}WARNING โš ๏ธ Failed to initialize: {e}\n{PREFIX}WARNING โš ๏ธ Not tracking this run") def on_train_epoch_end(trainer): diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py index 6be8a821f5d..7adfdad1fdb 100644 --- a/ultralytics/utils/callbacks/neptune.py +++ b/ultralytics/utils/callbacks/neptune.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING @@ -52,7 +52,11 @@ def on_pretrain_routine_start(trainer): """Callback function called before the training routine starts.""" try: global run - run = neptune.init_run(project=trainer.args.project or "YOLOv8", name=trainer.args.name, tags=["YOLOv8"]) + run = neptune.init_run( + project=trainer.args.project or "Ultralytics", + name=trainer.args.name, + tags=["Ultralytics"], + ) run["Configuration/Hyperparameters"] = {k: "" if v is None else v for k, v in vars(trainer.args).items()} except Exception as e: LOGGER.warning(f"WARNING โš ๏ธ NeptuneAI installed but not initialized correctly, not logging this run. {e}") diff --git a/ultralytics/utils/callbacks/raytune.py b/ultralytics/utils/callbacks/raytune.py index 1a368db6637..e7e01d0985f 100644 --- a/ultralytics/utils/callbacks/raytune.py +++ b/ultralytics/utils/callbacks/raytune.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import SETTINGS @@ -16,8 +16,7 @@ def on_fit_epoch_end(trainer): """Sends training metrics to Ray Tune at end of each epoch.""" if ray.train._internal.session._get_session(): # replacement for deprecated ray.tune.is_session_enabled() metrics = trainer.metrics - metrics["epoch"] = trainer.epoch - session.report(metrics) + session.report({**metrics, **{"epoch": trainer.epoch + 1}}) callbacks = ( diff --git a/ultralytics/utils/callbacks/tensorboard.py b/ultralytics/utils/callbacks/tensorboard.py index 2aa114b53b7..2920fa23bf1 100644 --- a/ultralytics/utils/callbacks/tensorboard.py +++ b/ultralytics/utils/callbacks/tensorboard.py @@ -1,6 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license - -import contextlib +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr @@ -45,26 +43,27 @@ def _log_tensorboard_graph(trainer): warnings.simplefilter("ignore", category=torch.jit.TracerWarning) # suppress jit trace warning # Try simple method first (YOLO) - with contextlib.suppress(Exception): + try: trainer.model.eval() # place in .eval() mode to avoid BatchNorm statistics changes WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), []) LOGGER.info(f"{PREFIX}model graph visualization added โœ…") return - # Fallback to TorchScript export steps (RTDETR) - try: - model = deepcopy(de_parallel(trainer.model)) - model.eval() - model = model.fuse(verbose=False) - for m in model.modules(): - if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class) - m.export = True - m.format = "torchscript" - model(im) # dry run - WRITER.add_graph(torch.jit.trace(model, im, strict=False), []) - LOGGER.info(f"{PREFIX}model graph visualization added โœ…") - except Exception as e: - LOGGER.warning(f"{PREFIX}WARNING โš ๏ธ TensorBoard graph visualization failure {e}") + except Exception: + # Fallback to TorchScript export steps (RTDETR) + try: + model = deepcopy(de_parallel(trainer.model)) + model.eval() + model = model.fuse(verbose=False) + for m in model.modules(): + if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class) + m.export = True + m.format = "torchscript" + model(im) # dry run + WRITER.add_graph(torch.jit.trace(model, im, strict=False), []) + LOGGER.info(f"{PREFIX}model graph visualization added โœ…") + except Exception as e: + LOGGER.warning(f"{PREFIX}WARNING โš ๏ธ TensorBoard graph visualization failure {e}") def on_pretrain_routine_start(trainer): diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py index 7b69b7a45a8..7242d51e3d8 100644 --- a/ultralytics/utils/callbacks/wb.py +++ b/ultralytics/utils/callbacks/wb.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import SETTINGS, TESTS_RUNNING from ultralytics.utils.torch_utils import model_info_for_loggers @@ -109,7 +109,12 @@ def _log_plots(plots, step): def on_pretrain_routine_start(trainer): """Initiate and start project if module is present.""" - wb.run or wb.init(project=trainer.args.project or "YOLOv8", name=trainer.args.name, config=vars(trainer.args)) + if not wb.run: + wb.init( + project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics", + name=str(trainer.args.name).replace("/", "-"), + config=vars(trainer.args), + ) def on_fit_epoch_end(trainer): @@ -137,17 +142,19 @@ def on_train_end(trainer): if trainer.best.exists(): art.add_file(trainer.best) wb.run.log_artifact(art, aliases=["best"]) - for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results): - x, y, x_title, y_title = curve_values - _plot_curve( - x, - y, - names=list(trainer.validator.metrics.names.values()), - id=f"curves/{curve_name}", - title=curve_name, - x_title=x_title, - y_title=y_title, - ) + # Check if we actually have plots to save + if trainer.args.plots and hasattr(trainer.validator.metrics, "curves_results"): + for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results): + x, y, x_title, y_title = curve_values + _plot_curve( + x, + y, + names=list(trainer.validator.metrics.names.values()), + id=f"curves/{curve_name}", + title=curve_name, + x_title=x_title, + y_title=y_title, + ) wb.run.finish() # required or run continues on dashboard diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 383c8562538..52f60f89c07 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import contextlib import glob import inspect import math @@ -20,11 +19,11 @@ import torch from ultralytics.utils import ( + ARM64, ASSETS, AUTOINSTALL, IS_COLAB, IS_GIT_DIR, - IS_JUPYTER, IS_KAGGLE, IS_PIP_PACKAGE, LINUX, @@ -32,6 +31,7 @@ MACOS, ONLINE, PYTHON_VERSION, + RKNN_CHIPS, ROOT, TORCHVISION_VERSION, USER_CONFIG_DIR, @@ -77,8 +77,7 @@ def parse_requirements(file_path=ROOT.parent / "requirements.txt", package=""): line = line.strip() if line and not line.startswith("#"): line = line.split("#")[0].strip() # ignore inline comments - match = re.match(r"([a-zA-Z0-9-_]+)\s*([<>!=~]+.*)?", line) - if match: + if match := re.match(r"([a-zA-Z0-9-_]+)\s*([<>!=~]+.*)?", line): requirements.append(SimpleNamespace(name=match[1], specifier=match[2].strip() if match[2] else "")) return requirements @@ -239,12 +238,14 @@ def check_version( c = parse_version(current) # '1.2.3' -> (1, 2, 3) for r in required.strip(",").split(","): op, version = re.match(r"([^0-9]*)([\d.]+)", r).groups() # split '>=22.04' -> ('>=', '22.04') + if not op: + op = ">=" # assume >= if no op passed v = parse_version(version) # '1.2.3' -> (1, 2, 3) if op == "==" and c != v: result = False elif op == "!=" and c == v: result = False - elif op in {">=", ""} and not (c >= v): # if no constraint passed assume '>=required' + elif op == ">=" and not (c >= v): result = False elif op == "<=" and not (c <= v): result = False @@ -271,11 +272,13 @@ def check_latest_pypi_version(package_name="ultralytics"): Returns: (str): The latest version of the package. """ - with contextlib.suppress(Exception): + try: requests.packages.urllib3.disable_warnings() # Disable the InsecureRequestWarning response = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=3) if response.status_code == 200: return response.json()["info"]["version"] + except Exception: + return None def check_pip_update_available(): @@ -286,7 +289,7 @@ def check_pip_update_available(): (bool): True if an update is available, False otherwise. """ if ONLINE and IS_PIP_PACKAGE: - with contextlib.suppress(Exception): + try: from ultralytics import __version__ latest = check_latest_pypi_version() @@ -296,6 +299,8 @@ def check_pip_update_available(): f"Update with 'pip install -U ultralytics'" ) return True + except Exception: + pass return False @@ -330,18 +335,19 @@ def check_font(font="Arial.ttf"): return file -def check_python(minimum: str = "3.8.0", hard: bool = True) -> bool: +def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = False) -> bool: """ Check current python version against the required minimum version. Args: minimum (str): Required minimum version of python. hard (bool, optional): If True, raise an AssertionError if the requirement is not met. + verbose (bool, optional): If True, print warning message if requirement is not met. Returns: (bool): Whether the installed Python version meets the minimum constraints. """ - return check_version(PYTHON_VERSION, minimum, name="Python", hard=hard) + return check_version(PYTHON_VERSION, minimum, name="Python", hard=hard, verbose=verbose) @TryExcept() @@ -371,8 +377,6 @@ def check_requirements(requirements=ROOT.parent / "requirements.txt", exclude=() ``` """ prefix = colorstr("red", "bold", "requirements:") - check_python() # check python version - check_torchvision() # check torch-torchvision compatibility if isinstance(requirements, Path): # requirements.txt file file = requirements.resolve() assert file.exists(), f"{prefix} {file} not found, check failed." @@ -429,8 +433,9 @@ def check_torchvision(): The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible Torchvision versions. """ - # Compatibility table compatibility_table = { + "2.6": ["0.21"], + "2.5": ["0.20"], "2.4": ["0.19"], "2.3": ["0.18"], "2.2": ["0.17"], @@ -440,7 +445,7 @@ def check_torchvision(): "1.12": ["0.13"], } - # Extract only the major and minor versions + # Check major and minor versions v_torch = ".".join(torch.__version__.split("+")[0].split(".")[:2]) if v_torch in compatibility_table: compatible_versions = compatibility_table[v_torch] @@ -454,7 +459,7 @@ def check_torchvision(): ) -def check_suffix(file="yolov8n.pt", suffix=".pt", msg=""): +def check_suffix(file="yolo11n.pt", suffix=".pt", msg=""): """Check file(s) for acceptable suffix.""" if file and suffix: if isinstance(suffix, str): @@ -484,10 +489,10 @@ def check_yolov5u_filename(file: str, verbose: bool = True): return file -def check_model_file_from_stem(model="yolov8n"): +def check_model_file_from_stem(model="yolo11n"): """Return a model filename from a valid model stem.""" if model and not Path(model).suffix and Path(model).stem in downloads.GITHUB_ASSETS_STEMS: - return Path(model).with_suffix(".pt") # add suffix, i.e. yolov8n -> yolov8n.pt + return Path(model).with_suffix(".pt") # add suffix, i.e. yolo11n -> yolo11n.pt else: return model @@ -565,11 +570,8 @@ def check_yolo(verbose=True, device=""): from ultralytics.utils.torch_utils import select_device - if IS_JUPYTER: - if check_requirements("wandb", install=False): - os.system("pip uninstall -y wandb") # uninstall wandb: unwanted account creation prompt with infinite hang - if IS_COLAB: - shutil.rmtree("sample_data", ignore_errors=True) # remove colab /sample_data directory + if IS_COLAB: + shutil.rmtree("sample_data", ignore_errors=True) # remove colab /sample_data directory if verbose: # System info @@ -577,10 +579,12 @@ def check_yolo(verbose=True, device=""): ram = psutil.virtual_memory().total total, used, free = shutil.disk_usage("/") s = f"({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)" - with contextlib.suppress(Exception): # clear display if ipython is installed + try: from IPython import display - display.clear_output() + display.clear_output() # clear display if notebook + except ImportError: + pass else: s = "" @@ -593,38 +597,54 @@ def collect_system_info(): import psutil from ultralytics.utils import ENVIRONMENT # scope to avoid circular import - from ultralytics.utils.torch_utils import get_cpu_info + from ultralytics.utils.torch_utils import get_cpu_info, get_gpu_info - ram_info = psutil.virtual_memory().total / (1024**3) # Convert bytes to GB + gib = 1 << 30 # bytes per GiB + cuda = torch and torch.cuda.is_available() check_yolo() - LOGGER.info( - f"\n{'OS':<20}{platform.platform()}\n" - f"{'Environment':<20}{ENVIRONMENT}\n" - f"{'Python':<20}{PYTHON_VERSION}\n" - f"{'Install':<20}{'git' if IS_GIT_DIR else 'pip' if IS_PIP_PACKAGE else 'other'}\n" - f"{'RAM':<20}{ram_info:.2f} GB\n" - f"{'CPU':<20}{get_cpu_info()}\n" - f"{'CUDA':<20}{torch.version.cuda if torch and torch.cuda.is_available() else None}\n" - ) + total, used, free = shutil.disk_usage("/") + + info_dict = { + "OS": platform.platform(), + "Environment": ENVIRONMENT, + "Python": PYTHON_VERSION, + "Install": "git" if IS_GIT_DIR else "pip" if IS_PIP_PACKAGE else "other", + "RAM": f"{psutil.virtual_memory().total / gib:.2f} GB", + "Disk": f"{(total - free) / gib:.1f}/{total / gib:.1f} GB", + "CPU": get_cpu_info(), + "CPU count": os.cpu_count(), + "GPU": get_gpu_info(index=0) if cuda else None, + "GPU count": torch.cuda.device_count() if cuda else None, + "CUDA": torch.version.cuda if cuda else None, + } + LOGGER.info("\n" + "\n".join(f"{k:<20}{v}" for k, v in info_dict.items()) + "\n") + package_info = {} for r in parse_requirements(package="ultralytics"): try: current = metadata.version(r.name) - is_met = "โœ… " if check_version(current, str(r.specifier), hard=True) else "โŒ " + is_met = "โœ… " if check_version(current, str(r.specifier), name=r.name, hard=True) else "โŒ " except metadata.PackageNotFoundError: current = "(not installed)" is_met = "โŒ " - LOGGER.info(f"{r.name:<20}{is_met}{current}{r.specifier}") + package_info[r.name] = f"{is_met}{current}{r.specifier}" + LOGGER.info(f"{r.name:<20}{package_info[r.name]}") + + info_dict["Package Info"] = package_info if is_github_action_running(): - LOGGER.info( - f"\nRUNNER_OS: {os.getenv('RUNNER_OS')}\n" - f"GITHUB_EVENT_NAME: {os.getenv('GITHUB_EVENT_NAME')}\n" - f"GITHUB_WORKFLOW: {os.getenv('GITHUB_WORKFLOW')}\n" - f"GITHUB_ACTOR: {os.getenv('GITHUB_ACTOR')}\n" - f"GITHUB_REPOSITORY: {os.getenv('GITHUB_REPOSITORY')}\n" - f"GITHUB_REPOSITORY_OWNER: {os.getenv('GITHUB_REPOSITORY_OWNER')}\n" - ) + github_info = { + "RUNNER_OS": os.getenv("RUNNER_OS"), + "GITHUB_EVENT_NAME": os.getenv("GITHUB_EVENT_NAME"), + "GITHUB_WORKFLOW": os.getenv("GITHUB_WORKFLOW"), + "GITHUB_ACTOR": os.getenv("GITHUB_ACTOR"), + "GITHUB_REPOSITORY": os.getenv("GITHUB_REPOSITORY"), + "GITHUB_REPOSITORY_OWNER": os.getenv("GITHUB_REPOSITORY_OWNER"), + } + LOGGER.info("\n" + "\n".join(f"{k}: {v}" for k, v in github_info.items())) + info_dict["GitHub Info"] = github_info + + return info_dict def check_amp(model): @@ -651,21 +671,35 @@ def check_amp(model): from ultralytics.utils.torch_utils import autocast device = next(model.parameters()).device # get model device + prefix = colorstr("AMP: ") if device.type in {"cpu", "mps"}: return False # AMP only used on CUDA devices + else: + # GPUs that have issues with AMP + pattern = re.compile( + r"(nvidia|geforce|quadro|tesla).*?(1660|1650|1630|t400|t550|t600|t1000|t1200|t2000|k40m)", re.IGNORECASE + ) + + gpu = torch.cuda.get_device_name(device) + if bool(pattern.search(gpu)): + LOGGER.warning( + f"{prefix}checks failed โŒ. AMP training on {gpu} GPU may cause " + f"NaN losses or zero-mAP results, so AMP will be disabled during training." + ) + return False def amp_allclose(m, im): """All close FP32 vs AMP results.""" batch = [im] * 8 - a = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # FP32 inference + imgsz = max(256, int(model.stride.max() * 4)) # max stride P5-32 and P6-64 + a = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # FP32 inference with autocast(enabled=True): - b = m(batch, imgsz=128, device=device, verbose=False)[0].boxes.data # AMP inference + b = m(batch, imgsz=imgsz, device=device, verbose=False)[0].boxes.data # AMP inference del m return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance im = ASSETS / "bus.jpg" # image to check - prefix = colorstr("AMP: ") - LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks with YOLO11n...") + LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks...") warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False." try: from ultralytics import YOLO @@ -673,11 +707,13 @@ def amp_allclose(m, im): assert amp_allclose(YOLO("yolo11n.pt"), im) LOGGER.info(f"{prefix}checks passed โœ…") except ConnectionError: - LOGGER.warning(f"{prefix}checks skipped โš ๏ธ, offline and unable to download YOLO11n. {warning_msg}") + LOGGER.warning( + f"{prefix}checks skipped โš ๏ธ. Offline and unable to download YOLO11n for AMP checks. {warning_msg}" + ) except (AttributeError, ModuleNotFoundError): LOGGER.warning( f"{prefix}checks skipped โš ๏ธ. " - f"Unable to load YOLO11n due to possible Ultralytics package modifications. {warning_msg}" + f"Unable to load YOLO11n for AMP checks due to possible Ultralytics package modifications. {warning_msg}" ) except AssertionError: LOGGER.warning( @@ -690,9 +726,10 @@ def amp_allclose(m, im): def git_describe(path=ROOT): # path must be a directory """Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe.""" - with contextlib.suppress(Exception): + try: return subprocess.check_output(f"git -C {path} describe --tags --long --always", shell=True).decode()[:-1] - return "" + except Exception: + return "" def print_args(args: Optional[dict] = None, show_file=True, show_func=False): @@ -747,6 +784,38 @@ def cuda_is_available() -> bool: return cuda_device_count() > 0 +def is_rockchip(): + """Check if the current environment is running on a Rockchip SoC.""" + if LINUX and ARM64: + try: + with open("/proc/device-tree/compatible") as f: + dev_str = f.read() + *_, soc = dev_str.split(",") + if soc.replace("\x00", "") in RKNN_CHIPS: + return True + except OSError: + return False + else: + return False + + +def is_sudo_available() -> bool: + """ + Check if the sudo command is available in the environment. + + Returns: + (bool): True if the sudo command is available, False otherwise. + """ + if WINDOWS: + return False + cmd = "sudo --version" + return subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode == 0 + + +# Run checks and define constants +check_python("3.8", hard=False, verbose=True) # check python version +check_torchvision() # check torch-torchvision compatibility + # Define constants IS_PYTHON_MINIMUM_3_10 = check_python("3.10", hard=False) IS_PYTHON_3_12 = PYTHON_VERSION.startswith("3.12") diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py index ff980967fb9..8b7e5bbe4ce 100644 --- a/ultralytics/utils/dist.py +++ b/ultralytics/utils/dist.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import os import shutil @@ -37,7 +37,7 @@ def generate_ddp_file(trainer): cfg = DEFAULT_CFG_DICT.copy() cfg.update(save_dir='') # handle the extra key 'save_dir' trainer = {name}(cfg=cfg, overrides=overrides) - trainer.args.model = "{getattr(trainer.hub_session, 'model_url', trainer.args.model)}" + trainer.args.model = "{getattr(trainer.hub_session, "model_url", trainer.args.model)}" results = trainer.train() """ (USER_CONFIG_DIR / "DDP").mkdir(exist_ok=True) diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py index 5cbc868ab69..be33ae8a114 100644 --- a/ultralytics/utils/downloads.py +++ b/ultralytics/utils/downloads.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import contextlib import re import shutil import subprocess @@ -53,7 +52,7 @@ def is_url(url, check=False): valid = is_url("https://www.example.com") ``` """ - with contextlib.suppress(Exception): + try: url = str(url) result = parse.urlparse(url) assert all([result.scheme, result.netloc]) # check if is url @@ -61,7 +60,8 @@ def is_url(url, check=False): with request.urlopen(url) as response: return response.getcode() == 200 # check if exists online return True - return False + except Exception: + return False def delete_dsstore(path, files_to_delete=(".DS_Store", "__MACOSX")): @@ -138,7 +138,7 @@ def unzip_file(file, path=None, exclude=(".DS_Store", "__MACOSX"), exist_ok=Fals If a path is not provided, the function will use the parent directory of the zipfile as the default path. Args: - file (str): The path to the zipfile to be extracted. + file (str | Path): The path to the zipfile to be extracted. path (str, optional): The path to extract the zipfile to. Defaults to None. exclude (tuple, optional): A tuple of filename strings to be excluded. Defaults to ('.DS_Store', '__MACOSX'). exist_ok (bool, optional): Whether to overwrite existing contents if they exist. Defaults to False. @@ -269,8 +269,7 @@ def get_google_drive_file_info(link): for k, v in response.cookies.items(): if k.startswith("download_warning"): drive_url += f"&confirm={v}" # v is token - cd = response.headers.get("content-disposition") - if cd: + if cd := response.headers.get("content-disposition"): filename = re.findall('filename="(.+)"', cd)[0] return drive_url, filename @@ -406,7 +405,7 @@ def get_github_assets(repo="ultralytics/assets", version="latest", retry=False): LOGGER.warning(f"โš ๏ธ GitHub assets check failure for {url}: {r.status_code} {r.reason}") return "", [] data = r.json() - return data["tag_name"], [x["name"] for x in data["assets"]] # tag, assets i.e. ['yolov8n.pt', 'yolov8s.pt', ...] + return data["tag_name"], [x["name"] for x in data["assets"]] # tag, assets i.e. ['yolo11n.pt', 'yolov8s.pt', ...] def attempt_download_asset(file, repo="ultralytics/assets", release="v8.3.0", **kwargs): @@ -425,7 +424,7 @@ def attempt_download_asset(file, repo="ultralytics/assets", release="v8.3.0", ** Example: ```python - file_path = attempt_download_asset("yolov8n.pt", repo="ultralytics/assets", release="latest") + file_path = attempt_download_asset("yolo11n.pt", repo="ultralytics/assets", release="latest") ``` """ from ultralytics.utils import SETTINGS # scoped for circular import diff --git a/ultralytics/utils/errors.py b/ultralytics/utils/errors.py index 86aee1d90aa..8cb7aae13f1 100644 --- a/ultralytics/utils/errors.py +++ b/ultralytics/utils/errors.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from ultralytics.utils import emojis diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py index d0953c748e2..0af6b0c2332 100644 --- a/ultralytics/utils/files.py +++ b/ultralytics/utils/files.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import contextlib import glob @@ -183,7 +183,7 @@ def get_latest_run(search_dir="."): return max(last_list, key=os.path.getctime) if last_list else "" -def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_names=False): +def update_models(model_names=("yolo11n.pt",), source_dir=Path("."), update_names=False): """ Updates and re-saves specified YOLO models in an 'updated_models' subdirectory. @@ -195,7 +195,7 @@ def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_name Examples: Update specified YOLO models and save them in 'updated_models' subdirectory: >>> from ultralytics.utils.files import update_models - >>> model_names = ("yolov8n.pt", "yolov8s.pt") + >>> model_names = ("yolo11n.pt", "yolov8s.pt") >>> update_models(model_names, source_dir=Path("/models"), update_names=True) """ from ultralytics import YOLO @@ -219,4 +219,4 @@ def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_name # Save model using model.save() print(f"Re-saving {model_name} model to {save_path}") - model.save(save_path, use_dill=False) + model.save(save_path) diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py index f8838957198..71ce36269f8 100644 --- a/ultralytics/utils/instance.py +++ b/ultralytics/utils/instance.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from collections import abc from itertools import repeat @@ -7,7 +7,7 @@ import numpy as np -from .ops import ltwh2xywh, ltwh2xyxy, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh +from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh def _ntuple(n): @@ -28,7 +28,7 @@ def parse(x): # `ltwh` means left top and width, height(COCO format) _formats = ["xyxy", "xywh", "ltwh"] -__all__ = ("Bboxes",) # tuple or list +__all__ = ("Bboxes", "Instances") # tuple or list class Bboxes: @@ -176,7 +176,7 @@ def __getitem__(self, index) -> "Bboxes": length as the number of bounding boxes. """ if isinstance(index, int): - return Bboxes(self.bboxes[index].view(1, -1)) + return Bboxes(self.bboxes[index].reshape(1, -1)) b = self.bboxes[index] assert b.ndim == 2, f"Indexing on Bboxes with {index} failed to return a matrix!" return Bboxes(b) @@ -406,7 +406,20 @@ def concatenate(cls, instances_list: List["Instances"], axis=0) -> "Instances": normalized = instances_list[0].normalized cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis) - cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) + seg_len = [b.segments.shape[1] for b in instances_list] + if len(frozenset(seg_len)) > 1: # resample segments if there's different length + max_len = max(seg_len) + cat_segments = np.concatenate( + [ + resample_segments(list(b.segments), max_len) + if len(b.segments) + else np.zeros((0, max_len, 2), dtype=np.float32) # re-generating empty segments + for b in instances_list + ], + axis=axis, + ) + else: + cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized) diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py index aa1c02d6188..311b3071e81 100644 --- a/ultralytics/utils/loss.py +++ b/ultralytics/utils/loss.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch import torch.nn as nn @@ -189,8 +189,7 @@ def preprocess(self, targets, batch_size, scale_tensor): out = torch.zeros(batch_size, counts.max(), ne - 1, device=self.device) for j in range(batch_size): matches = i == j - n = matches.sum() - if n: + if n := matches.sum(): out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out @@ -298,7 +297,7 @@ def __call__(self, preds, batch): raise TypeError( "ERROR โŒ segment dataset incorrectly formatted or not a segment dataset.\n" "This error can occur when incorrectly training a 'segment' model on a 'detect' dataset, " - "i.e. 'yolo train model=yolov8n-seg.pt data=coco8.yaml'.\nVerify your dataset is a " + "i.e. 'yolo train model=yolo11n-seg.pt data=coco8.yaml'.\nVerify your dataset is a " "correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' " "as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help." ) from e @@ -552,9 +551,8 @@ def calculate_keypoints_loss( pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim). Returns: - (tuple): Returns a tuple containing: - - kpts_loss (torch.Tensor): The keypoints loss. - - kpts_obj_loss (torch.Tensor): The keypoints object loss. + kpts_loss (torch.Tensor): The keypoints loss. + kpts_obj_loss (torch.Tensor): The keypoints object loss. """ batch_idx = batch_idx.flatten() batch_size = len(masks) @@ -605,6 +603,7 @@ class v8ClassificationLoss: def __call__(self, preds, batch): """Compute the classification loss between predictions and true labels.""" + preds = preds[1] if isinstance(preds, (list, tuple)) else preds loss = F.cross_entropy(preds, batch["cls"], reduction="mean") loss_items = loss.detach() return loss, loss_items @@ -630,8 +629,7 @@ def preprocess(self, targets, batch_size, scale_tensor): out = torch.zeros(batch_size, counts.max(), 6, device=self.device) for j in range(batch_size): matches = i == j - n = matches.sum() - if n: + if n := matches.sum(): bboxes = targets[matches, 2:] bboxes[..., :4].mul_(scale_tensor) out[j, :n] = torch.cat([targets[matches, 1:2], bboxes], dim=-1) @@ -668,7 +666,7 @@ def __call__(self, preds, batch): raise TypeError( "ERROR โŒ OBB dataset incorrectly formatted or not a OBB dataset.\n" "This error can occur when incorrectly training a 'OBB' model on a 'detect' dataset, " - "i.e. 'yolo train model=yolov8n-obb.pt data=dota8.yaml'.\nVerify your dataset is a " + "i.e. 'yolo train model=yolo11n-obb.pt data=dota8.yaml'.\nVerify your dataset is a " "correctly formatted 'OBB' dataset using 'data=dota8.yaml' " "as an example.\nSee https://docs.ultralytics.com/datasets/obb/ for help." ) from e diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py index 37a06b43a12..4c9755a4af0 100644 --- a/ultralytics/utils/metrics.py +++ b/ultralytics/utils/metrics.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Model validation metrics.""" import math @@ -74,11 +74,16 @@ def box_iou(box1, box2, eps=1e-7): def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): """ - Calculate Intersection over Union (IoU) of box1(1, 4) to box2(n, 4). + Calculates the Intersection over Union (IoU) between bounding boxes. + + This function supports various shapes for `box1` and `box2` as long as the last dimension is 4. + For instance, you may pass tensors shaped like (4,), (N, 4), (B, N, 4), or (B, N, 1, 4). + Internally, the code will split the last dimension into (x, y, w, h) if `xywh=True`, + or (x1, y1, x2, y2) if `xywh=False`. Args: - box1 (torch.Tensor): A tensor representing a single bounding box with shape (1, 4). - box2 (torch.Tensor): A tensor representing n bounding boxes with shape (n, 4). + box1 (torch.Tensor): A tensor representing one or more bounding boxes, with the last dimension being 4. + box2 (torch.Tensor): A tensor representing one or more bounding boxes, with the last dimension being 4. xywh (bool, optional): If True, input boxes are in (x, y, w, h) format. If False, input boxes are in (x1, y1, x2, y2) format. Defaults to True. GIoU (bool, optional): If True, calculate Generalized IoU. Defaults to False. @@ -271,7 +276,7 @@ def batch_probiou(obb1, obb2, eps=1e-7): return 1 - hd -def smooth_BCE(eps=0.1): +def smooth_bce(eps=0.1): """ Computes smoothed positive and negative Binary Cross-Entropy targets. @@ -373,10 +378,9 @@ def process_batch(self, detections, gt_bboxes, gt_cls): else: self.matrix[self.nc, gc] += 1 # true background - if n: - for i, dc in enumerate(detection_classes): - if not any(m1 == i): - self.matrix[dc, self.nc] += 1 # predicted background + for i, dc in enumerate(detection_classes): + if not any(m1 == i): + self.matrix[dc, self.nc] += 1 # predicted background def matrix(self): """Returns the confusion matrix.""" @@ -429,7 +433,7 @@ def plot(self, normalize=True, save_dir="", names=(), on_plot=None): ax.set_xlabel("True") ax.set_ylabel("Predicted") ax.set_title(title) - plot_fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png' + plot_fname = Path(save_dir) / f"{title.lower().replace(' ', '_')}.png" fig.savefig(plot_fname, dpi=250) plt.close(fig) if on_plot: @@ -550,19 +554,18 @@ def ap_per_class( prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string. Returns: - (tuple): A tuple of six arrays and one array of unique classes, where: - tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,). - fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,). - p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,). - r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,). - f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,). - ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10). - unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,). - p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000). - r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000). - f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000). - x (np.ndarray): X-axis values for the curves. Shape: (1000,). - prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000). + tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,). + fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,). + p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,). + r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,). + f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,). + ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10). + unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,). + p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000). + r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000). + f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000). + x (np.ndarray): X-axis values for the curves. Shape: (1000,). + prec_values (np.ndarray): Precision values at mAP@0.5 for each class. Shape: (nc, 1000). """ # Sort by objectness i = np.argsort(-conf) @@ -599,7 +602,7 @@ def ap_per_class( # AP from recall-precision curve for j in range(tp.shape[1]): ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) - if plot and j == 0: + if j == 0: prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5 prec_values = np.array(prec_values) # (nc, 1000) diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index b76168f95e5..af41ffee3d0 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import contextlib import math @@ -75,6 +75,10 @@ def segment2box(segment, width=640, height=640): (np.ndarray): the minimum and maximum x and y values of the segment. """ x, y = segment.T # segment xy + # any 3 out of 4 sides are outside the image, clip coordinates first, https://github.com/ultralytics/ultralytics/pull/18294 + if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3: + x = x.clip(0, width) + y = y.clip(0, height) inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) x = x[inside] y = y[inside] @@ -139,7 +143,7 @@ def make_divisible(x, divisor): return math.ceil(x / divisor) * divisor -def nms_rotated(boxes, scores, threshold=0.45): +def nms_rotated(boxes, scores, threshold=0.45, use_triu=True): """ NMS for oriented bounding boxes using probiou and fast-nms. @@ -147,16 +151,30 @@ def nms_rotated(boxes, scores, threshold=0.45): boxes (torch.Tensor): Rotated bounding boxes, shape (N, 5), format xywhr. scores (torch.Tensor): Confidence scores, shape (N,). threshold (float, optional): IoU threshold. Defaults to 0.45. + use_triu (bool, optional): Whether to use `torch.triu` operator. It'd be useful for disable it + when exporting obb models to some formats that do not support `torch.triu`. Returns: (torch.Tensor): Indices of boxes to keep after NMS. """ - if len(boxes) == 0: - return np.empty((0,), dtype=np.int8) sorted_idx = torch.argsort(scores, descending=True) boxes = boxes[sorted_idx] - ious = batch_probiou(boxes, boxes).triu_(diagonal=1) - pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1) + ious = batch_probiou(boxes, boxes) + if use_triu: + ious = ious.triu_(diagonal=1) + # pick = torch.nonzero(ious.max(dim=0)[0] < threshold).squeeze_(-1) + # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition + pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1) + else: + n = boxes.shape[0] + row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n) + col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1) + upper_mask = row_idx < col_idx + ious = ious * upper_mask + # Zeroing these scores ensures the additional indices would not affect the final results + scores[~((ious >= threshold).sum(0) <= 0)] = 0 + # NOTE: return indices with fixed length to avoid TFLite reshape error + pick = torch.topk(scores, scores.shape[0]).indices return sorted_idx[pick] @@ -175,6 +193,7 @@ def non_max_suppression( max_wh=7680, in_place=True, rotated=False, + end2end=False, ): """ Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. @@ -201,6 +220,7 @@ def non_max_suppression( max_wh (int): The maximum box width and height in pixels. in_place (bool): If True, the input prediction tensor will be modified in place. rotated (bool): If Oriented Bounding Boxes (OBB) are being passed for NMS. + end2end (bool): If the model doesn't require NMS. Returns: (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of @@ -217,7 +237,7 @@ def non_max_suppression( if classes is not None: classes = torch.tensor(classes, device=prediction.device) - if prediction.shape[-1] == 6: # end-to-end model (BNC, i.e. 1,300,6) + if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6) output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction] if classes is not None: output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output] @@ -317,11 +337,11 @@ def clip_boxes(boxes, shape): Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape. Args: - boxes (torch.Tensor): the bounding boxes to clip - shape (tuple): the shape of the image + boxes (torch.Tensor): The bounding boxes to clip. + shape (tuple): The shape of the image. Returns: - (torch.Tensor | numpy.ndarray): Clipped boxes + (torch.Tensor | numpy.ndarray): The clipped boxes. """ if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug) boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1 @@ -359,9 +379,9 @@ def scale_image(masks, im0_shape, ratio_pad=None): Takes a mask, and resizes it to the original image size. Args: - masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. - im0_shape (tuple): the original image shape - ratio_pad (tuple): the ratio of the padding to the original image. + masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3]. + im0_shape (tuple): The original image shape. + ratio_pad (tuple): The ratio of the padding to the original image. Returns: masks (np.ndarray): The masks that are being returned with shape [h, w, num]. @@ -401,7 +421,7 @@ def xyxy2xywh(x): y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format. """ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center y[..., 2] = x[..., 2] - x[..., 0] # width @@ -421,7 +441,7 @@ def xywh2xyxy(x): y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. """ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy xy = x[..., :2] # centers wh = x[..., 2:] / 2 # half width-height y[..., :2] = xy - wh # top left xy @@ -444,7 +464,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box. """ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x @@ -470,7 +490,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): if clip: x = clip_boxes(x, (h - eps, w - eps)) assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center y[..., 2] = (x[..., 2] - x[..., 0]) / w # width @@ -625,9 +645,12 @@ def resample_segments(segments, n=1000): segments (list): the resampled segments. """ for i, s in enumerate(segments): + if len(s) == n: + continue s = np.concatenate((s, s[0:1, :]), axis=0) - x = np.linspace(0, len(s) - 1, n) + x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n) xp = np.arange(len(s)) + x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x segments[i] = ( np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T ) # segment xy @@ -692,12 +715,12 @@ def process_mask_native(protos, masks_in, bboxes, shape): Args: protos (torch.Tensor): [mask_dim, mask_h, mask_w] - masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms - bboxes (torch.Tensor): [n, 4], n is number of masks after nms - shape (tuple): the size of the input image (h,w) + masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms. + bboxes (torch.Tensor): [n, 4], n is number of masks after nms. + shape (tuple): The size of the input image (h,w). Returns: - masks (torch.Tensor): The returned masks with dimensions [h, w, n] + masks (torch.Tensor): The returned masks with dimensions [h, w, n]. """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) @@ -783,23 +806,29 @@ def regularize_rboxes(rboxes): return torch.stack([x, y, w_, h_, t], dim=-1) # regularized boxes -def masks2segments(masks, strategy="largest"): +def masks2segments(masks, strategy="all"): """ It takes a list of masks(n,h,w) and returns a list of segments(n,xy). Args: masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160) - strategy (str): 'concat' or 'largest'. Defaults to largest + strategy (str): 'all' or 'largest'. Defaults to all Returns: segments (List): list of segment masks """ + from ultralytics.data.converter import merge_multi_segment + segments = [] for x in masks.int().cpu().numpy().astype("uint8"): c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] if c: - if strategy == "concat": # concatenate all segments - c = np.concatenate([x.reshape(-1, 2) for x in c]) + if strategy == "all": # merge and concatenate all segments + c = ( + np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c])) + if len(c) > 1 + else c[0].reshape(-1, 2) + ) elif strategy == "largest": # select largest segment c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) else: @@ -832,3 +861,10 @@ def clean_str(s): (str): a string with special characters replaced by an underscore _ """ return re.sub(pattern="[|@#!ยกยท$โ‚ฌ%&()=?ยฟ^*;:,ยจยด><+]", repl="_", string=s) + + +def empty_like(x): + """Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype.""" + return ( + torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32) + ) diff --git a/ultralytics/utils/patches.py b/ultralytics/utils/patches.py index d918e0efeac..1531cd7f8f6 100644 --- a/ultralytics/utils/patches.py +++ b/ultralytics/utils/patches.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license """Monkey patches to update/extend functionality of existing functions.""" import time @@ -86,25 +86,15 @@ def torch_load(*args, **kwargs): return _torch_load(*args, **kwargs) -def torch_save(*args, use_dill=True, **kwargs): +def torch_save(*args, **kwargs): """ Optionally use dill to serialize lambda functions where pickle does not, adding robustness with 3 retries and exponential standoff in case of save failure. Args: *args (tuple): Positional arguments to pass to torch.save. - use_dill (bool): Whether to try using dill for serialization if available. Defaults to True. **kwargs (Any): Keyword arguments to pass to torch.save. """ - try: - assert use_dill - import dill as pickle - except (AssertionError, ImportError): - import pickle - - if "pickle_module" not in kwargs: - kwargs["pickle_module"] = pickle - for i in range(4): # 3 retries try: return _torch_save(*args, **kwargs) diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py index a70d8c259c6..2c211df4880 100644 --- a/ultralytics/utils/plotting.py +++ b/ultralytics/utils/plotting.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import contextlib import math import warnings from pathlib import Path @@ -13,7 +12,7 @@ from PIL import Image, ImageDraw, ImageFont from PIL import __version__ as pil_version -from ultralytics.utils import LOGGER, TryExcept, ops, plt_settings, threaded +from ultralytics.utils import IS_COLAB, IS_KAGGLE, LOGGER, TryExcept, ops, plt_settings, threaded from ultralytics.utils.checks import check_font, check_version, is_ascii from ultralytics.utils.files import increment_path @@ -215,7 +214,16 @@ def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=Fa self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] def get_txt_color(self, color=(128, 128, 128), txt_color=(255, 255, 255)): - """Assign text color based on background color.""" + """ + Assign text color based on background color. + + Args: + color (tuple, optional): The background color of the rectangle for text (B, G, R). + txt_color (tuple, optional): The color of the text (R, G, B). + + Returns: + txt_color (tuple): Text color for label + """ if color in self.dark_colors: return 104, 31, 17 elif color in self.light_colors: @@ -500,13 +508,21 @@ def result(self): def show(self, title=None): """Show the annotated image.""" - Image.fromarray(np.asarray(self.im)[..., ::-1]).show(title) + im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert numpy array to PIL Image with RGB to BGR + if IS_COLAB or IS_KAGGLE: # can not use IS_JUPYTER as will run for all ipython environments + try: + display(im) # noqa - display() function only available in ipython environments + except ImportError as e: + LOGGER.warning(f"Unable to display image in Jupyter notebooks: {e}") + else: + im.show(title=title) def save(self, filename="image.jpg"): """Save the annotated image to 'filename'.""" cv2.imwrite(filename, np.asarray(self.im)) - def get_bbox_dimension(self, bbox=None): + @staticmethod + def get_bbox_dimension(bbox=None): """ Calculate the area of a bounding box. @@ -514,7 +530,9 @@ def get_bbox_dimension(self, bbox=None): bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max). Returns: - angle (degree): Degree value of angle between three points + width (float): Width of the bounding box. + height (float): Height of the bounding box. + area (float): Area enclosed by the bounding box. """ x_min, y_min, x_max, y_max = bbox width = x_max - x_min @@ -554,10 +572,10 @@ def queue_counts_display(self, label, points=None, region_color=(255, 255, 255), Displays queue counts on an image centered at the points with customizable font size and colors. Args: - label (str): queue counts label - points (tuple): region points for center point calculation to display text - region_color (RGB): queue region color - txt_color (RGB): text display color + label (str): Queue counts label. + points (tuple): Region points for center point calculation to display text. + region_color (tuple): RGB queue region color. + txt_color (tuple): RGB text display color. """ x_values = [point[0] for point in points] y_values = [point[1] for point in points] @@ -594,13 +612,13 @@ def display_objects_labels(self, im0, text, txt_color, bg_color, x_center, y_cen Display the bounding boxes labels in parking management app. Args: - im0 (ndarray): inference image - text (str): object/class name - txt_color (bgr color): display color for text foreground - bg_color (bgr color): display color for text background - x_center (float): x position center point for bounding box - y_center (float): y position center point for bounding box - margin (int): gap between text and rectangle for better display + im0 (ndarray): Inference image. + text (str): Object/class name. + txt_color (tuple): Display color for text foreground. + bg_color (tuple): Display color for text background. + x_center (float): The x position center point for bounding box. + y_center (float): The y position center point for bounding box. + margin (int): The gap between text and rectangle for better display. """ text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0] text_x = x_center - text_size[0] // 2 @@ -618,11 +636,11 @@ def display_analytics(self, im0, text, txt_color, bg_color, margin): Display the overall statistics for parking lots. Args: - im0 (ndarray): inference image - text (dict): labels dictionary - txt_color (bgr color): display color for text foreground - bg_color (bgr color): display color for text background - margin (int): gap between text and rectangle for better display + im0 (ndarray): Inference image. + text (dict): Labels dictionary. + txt_color (tuple): Display color for text foreground. + bg_color (tuple): Display color for text background. + margin (int): Gap between text and rectangle for better display. """ horizontal_gap = int(im0.shape[1] * 0.02) vertical_gap = int(im0.shape[0] * 0.01) @@ -662,14 +680,13 @@ def estimate_pose_angle(a, b, c): angle = 360 - angle return angle - def draw_specific_points(self, keypoints, indices=None, shape=(640, 640), radius=2, conf_thres=0.25): + def draw_specific_points(self, keypoints, indices=None, radius=2, conf_thres=0.25): """ Draw specific keypoints for gym steps counting. Args: keypoints (list): Keypoints data to be plotted. indices (list, optional): Keypoint indices to be plotted. Defaults to [2, 5, 7]. - shape (tuple, optional): Image size for model inference. Defaults to (640, 640). radius (int, optional): Keypoint radius. Defaults to 2. conf_thres (float, optional): Confidence threshold for keypoints. Defaults to 0.25. @@ -680,142 +697,157 @@ def draw_specific_points(self, keypoints, indices=None, shape=(640, 640), radius Keypoint format: [x, y] or [x, y, confidence]. Modifies self.im in-place. """ - if indices is None: - indices = [2, 5, 7] - for i, k in enumerate(keypoints): - if i in indices: - x_coord, y_coord = k[0], k[1] - if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: - if len(k) == 3: - conf = k[2] - if conf < conf_thres: - continue - cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, (0, 255, 0), -1, lineType=cv2.LINE_AA) + indices = indices or [2, 5, 7] + points = [(int(k[0]), int(k[1])) for i, k in enumerate(keypoints) if i in indices and k[2] >= conf_thres] + + # Draw lines between consecutive points + for start, end in zip(points[:-1], points[1:]): + cv2.line(self.im, start, end, (0, 255, 0), 2, lineType=cv2.LINE_AA) + + # Draw circles for keypoints + for pt in points: + cv2.circle(self.im, pt, radius, (0, 0, 255), -1, lineType=cv2.LINE_AA) + return self.im - def plot_angle_and_count_and_stage( - self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255) - ): + def plot_workout_information(self, display_text, position, color=(104, 31, 17), txt_color=(255, 255, 255)): """ - Plot the pose angle, count value and step stage. + Draw text with a background on the image. Args: - angle_text (str): angle value for workout monitoring - count_text (str): counts value for workout monitoring - stage_text (str): stage decision for workout monitoring - center_kpt (list): centroid pose index for workout monitoring - color (tuple): text background color for workout monitoring - txt_color (tuple): text foreground color for workout monitoring + display_text (str): The text to be displayed. + position (tuple): Coordinates (x, y) on the image where the text will be placed. + color (tuple, optional): Text background color + txt_color (tuple, optional): Text foreground color """ - angle_text, count_text, stage_text = (f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}") + (text_width, text_height), _ = cv2.getTextSize(display_text, 0, self.sf, self.tf) - # Draw angle - (angle_text_width, angle_text_height), _ = cv2.getTextSize(angle_text, 0, self.sf, self.tf) - angle_text_position = (int(center_kpt[0]), int(center_kpt[1])) - angle_background_position = (angle_text_position[0], angle_text_position[1] - angle_text_height - 5) - angle_background_size = (angle_text_width + 2 * 5, angle_text_height + 2 * 5 + (self.tf * 2)) + # Draw background rectangle cv2.rectangle( self.im, - angle_background_position, - ( - angle_background_position[0] + angle_background_size[0], - angle_background_position[1] + angle_background_size[1], - ), + (position[0], position[1] - text_height - 5), + (position[0] + text_width + 10, position[1] - text_height - 5 + text_height + 10 + self.tf), color, -1, ) - cv2.putText(self.im, angle_text, angle_text_position, 0, self.sf, txt_color, self.tf) - - # Draw Counts - (count_text_width, count_text_height), _ = cv2.getTextSize(count_text, 0, self.sf, self.tf) - count_text_position = (angle_text_position[0], angle_text_position[1] + angle_text_height + 20) - count_background_position = ( - angle_background_position[0], - angle_background_position[1] + angle_background_size[1] + 5, - ) - count_background_size = (count_text_width + 10, count_text_height + 10 + self.tf) + # Draw text + cv2.putText(self.im, display_text, position, 0, self.sf, txt_color, self.tf) - cv2.rectangle( - self.im, - count_background_position, - ( - count_background_position[0] + count_background_size[0], - count_background_position[1] + count_background_size[1], - ), - color, - -1, - ) - cv2.putText(self.im, count_text, count_text_position, 0, self.sf, txt_color, self.tf) + return text_height - # Draw Stage - (stage_text_width, stage_text_height), _ = cv2.getTextSize(stage_text, 0, self.sf, self.tf) - stage_text_position = (int(center_kpt[0]), int(center_kpt[1]) + angle_text_height + count_text_height + 40) - stage_background_position = (stage_text_position[0], stage_text_position[1] - stage_text_height - 5) - stage_background_size = (stage_text_width + 10, stage_text_height + 10) + def plot_angle_and_count_and_stage( + self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255) + ): + """ + Plot the pose angle, count value, and step stage. - cv2.rectangle( - self.im, - stage_background_position, - ( - stage_background_position[0] + stage_background_size[0], - stage_background_position[1] + stage_background_size[1], - ), - color, - -1, + Args: + angle_text (str): Angle value for workout monitoring + count_text (str): Counts value for workout monitoring + stage_text (str): Stage decision for workout monitoring + center_kpt (list): Centroid pose index for workout monitoring + color (tuple, optional): Text background color + txt_color (tuple, optional): Text foreground color + """ + # Format text + angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}" + + # Draw angle, count and stage text + angle_height = self.plot_workout_information( + angle_text, (int(center_kpt[0]), int(center_kpt[1])), color, txt_color + ) + count_height = self.plot_workout_information( + count_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + 20), color, txt_color + ) + self.plot_workout_information( + stage_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + count_height + 40), color, txt_color ) - cv2.putText(self.im, stage_text, stage_text_position, 0, self.sf, txt_color, self.tf) def seg_bbox(self, mask, mask_color=(255, 0, 255), label=None, txt_color=(255, 255, 255)): """ Function for drawing segmented object in bounding box shape. Args: - mask (list): masks data list for instance segmentation area plotting - mask_color (RGB): mask foreground color - label (str): Detection label text - txt_color (RGB): text color + mask (np.ndarray): A 2D array of shape (N, 2) containing the contour points of the segmented object. + mask_color (tuple): RGB color for the contour and label background. + label (str, optional): Text label for the object. If None, no label is drawn. + txt_color (tuple): RGB color for the label text. """ + if mask.size == 0: # no masks to plot + return + cv2.polylines(self.im, [np.int32([mask])], isClosed=True, color=mask_color, thickness=2) - text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf) + if label: + text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf) + cv2.rectangle( + self.im, + (int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10), + (int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)), + mask_color, + -1, + ) + cv2.putText( + self.im, label, (int(mask[0][0]) - text_size[0] // 2, int(mask[0][1])), 0, self.sf, txt_color, self.tf + ) - cv2.rectangle( - self.im, - (int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10), - (int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)), - mask_color, - -1, - ) + def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)): + """ + Function for drawing a sweep annotation line and an optional label. + + Args: + line_x (int): The x-coordinate of the sweep line. + line_y (int): The y-coordinate limit of the sweep line. + label (str, optional): Text label to be drawn in center of sweep line. If None, no label is drawn. + color (tuple): RGB color for the line and label background. + txt_color (tuple): RGB color for the label text. + """ + # Draw the sweep line + cv2.line(self.im, (line_x, 0), (line_x, line_y), color, self.tf * 2) + # Draw label, if provided if label: + (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf) + cv2.rectangle( + self.im, + (line_x - text_width // 2 - 10, line_y // 2 - text_height // 2 - 10), + (line_x + text_width // 2 + 10, line_y // 2 + text_height // 2 + 10), + color, + -1, + ) cv2.putText( - self.im, label, (int(mask[0][0]) - text_size[0] // 2, int(mask[0][1])), 0, self.sf, txt_color, self.tf + self.im, + label, + (line_x - text_width // 2, line_y // 2 + text_height // 2), + cv2.FONT_HERSHEY_SIMPLEX, + self.sf, + txt_color, + self.tf, ) - def plot_distance_and_line(self, pixels_distance, centroids, line_color, centroid_color): + def plot_distance_and_line( + self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255) + ): """ Plot the distance and line on frame. Args: pixels_distance (float): Pixels distance between two bbox centroids. centroids (list): Bounding box centroids data. - line_color (RGB): Distance line color. - centroid_color (RGB): Bounding box centroid color. + line_color (tuple, optional): Distance line color. + centroid_color (tuple, optional): Bounding box centroid color. """ # Get the text size - (text_width_m, text_height_m), _ = cv2.getTextSize( - f"Pixels Distance: {pixels_distance:.2f}", 0, self.sf, self.tf - ) + text = f"Pixels Distance: {pixels_distance:.2f}" + (text_width_m, text_height_m), _ = cv2.getTextSize(text, 0, self.sf, self.tf) # Define corners with 10-pixel margin and draw rectangle - top_left = (15, 25) - bottom_right = (15 + text_width_m + 20, 25 + text_height_m + 20) - cv2.rectangle(self.im, top_left, bottom_right, centroid_color, -1) + cv2.rectangle(self.im, (15, 25), (15 + text_width_m + 20, 25 + text_height_m + 20), line_color, -1) # Calculate the position for the text with a 10-pixel margin and draw text - text_position = (top_left[0] + 10, top_left[1] + text_height_m + 10) + text_position = (25, 25 + text_height_m + 10) cv2.putText( self.im, - f"Pixels Distance: {pixels_distance:.2f}", + text, text_position, 0, self.sf, @@ -1101,10 +1133,12 @@ def plot_images( mask = mask.astype(bool) else: mask = image_masks[j].astype(bool) - with contextlib.suppress(Exception): + try: im[y : y + h, x : x + w, :][mask] = ( im[y : y + h, x : x + w, :][mask] * 0.4 + np.array(color) * 0.6 ) + except Exception: + pass annotator.fromarray(im) if not save: return np.asarray(annotator.im) @@ -1141,19 +1175,19 @@ def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False, save_dir = Path(file).parent if file else Path(dir) if classify: fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True) - index = [1, 4, 2, 3] + index = [2, 5, 3, 4] elif segment: fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) - index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12] + index = [2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 8, 9, 12, 13] elif pose: fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True) - index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13] + index = [2, 3, 4, 5, 6, 7, 8, 11, 12, 15, 16, 17, 18, 19, 9, 10, 13, 14] elif regress: fig, ax = plt.subplots(2, 2, figsize=(6, 6), tight_layout=True) index = [1, 4, 2, 3] else: fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) - index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7] + index = [2, 3, 4, 5, 6, 9, 10, 11, 7, 8] ax = ax.ravel() files = list(save_dir.glob("results*.csv")) assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." @@ -1213,7 +1247,7 @@ def plt_color_scatter(v, f, bins=20, cmap="viridis", alpha=0.8, edgecolors="none def plot_tune_results(csv_file="tune_results.csv"): """ - Plot the evolution results stored in an 'tune_results.csv' file. The function generates a scatter plot for each key + Plot the evolution results stored in a 'tune_results.csv' file. The function generates a scatter plot for each key in the CSV, color-coded based on fitness scores. The best-performing configurations are highlighted on the plots. Args: diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py index 74604eda23c..e4a40f5e241 100644 --- a/ultralytics/utils/tal.py +++ b/ultralytics/utils/tal.py @@ -1,8 +1,9 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license import torch import torch.nn as nn +from . import LOGGER from .checks import check_version from .metrics import bbox_iou, probiou from .ops import xywhr2xyxyxyxy @@ -58,17 +59,46 @@ def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_g """ self.bs = pd_scores.shape[0] self.n_max_boxes = gt_bboxes.shape[1] + device = gt_bboxes.device if self.n_max_boxes == 0: - device = gt_bboxes.device return ( - torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), - torch.zeros_like(pd_bboxes).to(device), - torch.zeros_like(pd_scores).to(device), - torch.zeros_like(pd_scores[..., 0]).to(device), - torch.zeros_like(pd_scores[..., 0]).to(device), + torch.full_like(pd_scores[..., 0], self.bg_idx), + torch.zeros_like(pd_bboxes), + torch.zeros_like(pd_scores), + torch.zeros_like(pd_scores[..., 0]), + torch.zeros_like(pd_scores[..., 0]), ) + try: + return self._forward(pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt) + except torch.OutOfMemoryError: + # Move tensors to CPU, compute, then move back to original device + LOGGER.warning("WARNING: CUDA OutOfMemoryError in TaskAlignedAssigner, using CPU") + cpu_tensors = [t.cpu() for t in (pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)] + result = self._forward(*cpu_tensors) + return tuple(t.to(device) for t in result) + + def _forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): + """ + Compute the task-aligned assignment. Reference code is available at + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py. + + Args: + pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) + pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) + anc_points (Tensor): shape(num_total_anchors, 2) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + target_gt_idx (Tensor): shape(bs, num_total_anchors) + """ mask_pos, align_metric, overlaps = self.get_pos_mask( pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt ) @@ -306,7 +336,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5): assert feats is not None dtype, device = feats[0].dtype, feats[0].device for i, stride in enumerate(strides): - _, _, h, w = feats[i].shape + h, w = feats[i].shape[2:] if isinstance(feats, list) else (int(feats[i][0]), int(feats[i][1])) sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx) diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py index e7fcca0ad70..1f87ec79388 100644 --- a/ultralytics/utils/torch_utils.py +++ b/ultralytics/utils/torch_utils.py @@ -1,6 +1,5 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import contextlib import gc import math import os @@ -13,6 +12,7 @@ from typing import Union import numpy as np +import thop import torch import torch.distributed as dist import torch.nn as nn @@ -31,11 +31,6 @@ ) from ultralytics.utils.checks import check_version -try: - import thop -except ImportError: - thop = None - # Version checks (all default to version>=min_version) TORCH_1_9 = check_version(torch.__version__, "1.9.0") TORCH_1_13 = check_version(torch.__version__, "1.13.0") @@ -113,16 +108,24 @@ def get_cpu_info(): from ultralytics.utils import PERSISTENT_CACHE # avoid circular import error if "cpu_info" not in PERSISTENT_CACHE: - with contextlib.suppress(Exception): + try: import cpuinfo # pip install py-cpuinfo k = "brand_raw", "hardware_raw", "arch_string_raw" # keys sorted by preference info = cpuinfo.get_cpu_info() # info dict string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown") PERSISTENT_CACHE["cpu_info"] = string.replace("(R)", "").replace("CPU ", "").replace("@ ", "") + except Exception: + pass return PERSISTENT_CACHE.get("cpu_info", "unknown") +def get_gpu_info(index): + """Return a string with system GPU information, i.e. 'Tesla T4, 15102MiB'.""" + properties = torch.cuda.get_device_properties(index) + return f"{properties.name}, {properties.total_memory / (1 << 20):.0f}MiB" + + def select_device(device="", batch=0, newline=False, verbose=True): """ Selects the appropriate PyTorch device based on the provided arguments. @@ -156,7 +159,7 @@ def select_device(device="", batch=0, newline=False, verbose=True): Note: Sets the 'CUDA_VISIBLE_DEVICES' environment variable for specifying which GPUs to use. """ - if isinstance(device, torch.device): + if isinstance(device, torch.device) or str(device).startswith("tpu"): return device s = f"Ultralytics {__version__} ๐Ÿš€ Python-{PYTHON_VERSION} torch-{torch.__version__} " @@ -170,6 +173,8 @@ def select_device(device="", batch=0, newline=False, verbose=True): elif device: # non-cpu device requested if device == "cuda": device = "0" + if "," in device: + device = ",".join([x for x in device.split(",") if x]) # remove sequential commas, i.e. "0,,1" -> "0,1" visible = os.environ.get("CUDA_VISIBLE_DEVICES", None) os.environ["CUDA_VISIBLE_DEVICES"] = device # set environment variable - must be before assert is_available() if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.split(","))): @@ -191,7 +196,7 @@ def select_device(device="", batch=0, newline=False, verbose=True): ) if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available - devices = device.split(",") if device else "0" # range(torch.cuda.device_count()) # i.e. 0,1,6,7 + devices = device.split(",") if device else "0" # i.e. "0,1" -> ["0", "1"] n = len(devices) # device count if n > 1: # multi-GPU if batch < 1: @@ -206,8 +211,7 @@ def select_device(device="", batch=0, newline=False, verbose=True): ) space = " " * (len(s) + 1) for i, d in enumerate(devices): - p = torch.cuda.get_device_properties(i) - s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB + s += f"{'' if i == 0 else space}CUDA:{d} ({get_gpu_info(i)})\n" # bytes to MB arg = "cuda:0" elif mps and TORCH_2_0 and torch.backends.mps.is_available(): # Prefer MPS if available @@ -293,28 +297,22 @@ def fuse_deconv_and_bn(deconv, bn): def model_info(model, detailed=False, verbose=True, imgsz=640): - """ - Model information. - - imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]. - """ + """Print and return detailed model information layer by layer.""" if not verbose: return n_p = get_num_params(model) # number of parameters n_g = get_num_gradients(model) # number of gradients n_l = len(list(model.modules())) # number of layers if detailed: - LOGGER.info( - f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}" - ) + LOGGER.info(f"{'layer':>5}{'name':>40}{'gradient':>10}{'parameters':>12}{'shape':>20}{'mu':>10}{'sigma':>10}") for i, (name, p) in enumerate(model.named_parameters()): name = name.replace("module_list.", "") LOGGER.info( - "%5g %40s %9s %12g %20s %10.3g %10.3g %10s" - % (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype) + f"{i:>5g}{name:>40s}{p.requires_grad!r:>10}{p.numel():>12g}{str(list(p.shape)):>20s}" + f"{p.mean():>10.3g}{p.std():>10.3g}{str(p.dtype):>15s}" ) - flops = get_flops(model, imgsz) + flops = get_flops(model, imgsz) # imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320] fused = " (fused)" if getattr(model, "is_fused", lambda: False)() else "" fs = f", {flops:.1f} GFLOPs" if flops else "" yaml_file = getattr(model, "yaml_file", "") or getattr(model, "yaml", {}).get("yaml_file", "") @@ -365,9 +363,6 @@ def model_info_for_loggers(trainer): def get_flops(model, imgsz=640): """Return a YOLO model's FLOPs.""" - if not thop: - return 0.0 # if not installed return 0.0 GFLOPs - try: model = de_parallel(model) p = next(model.parameters()) @@ -595,7 +590,7 @@ def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "", updates: dict # Save combined = {**metadata, **x, **(updates or {})} - torch.save(combined, s or f, use_dill=False) # combine dicts (prefer to the right) + torch.save(combined, s or f) # combine dicts (prefer to the right) mb = os.path.getsize(s or f) / 1e6 # file size LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") return combined @@ -615,7 +610,33 @@ def convert_optimizer_state_dict_to_fp16(state_dict): return state_dict -def profile(input, ops, n=10, device=None): +@contextmanager +def cuda_memory_usage(device=None): + """ + Monitor and manage CUDA memory usage. + + This function checks if CUDA is available and, if so, empties the CUDA cache to free up unused memory. + It then yields a dictionary containing memory usage information, which can be updated by the caller. + Finally, it updates the dictionary with the amount of memory reserved by CUDA on the specified device. + + Args: + device (torch.device, optional): The CUDA device to query memory usage for. Defaults to None. + + Yields: + (dict): A dictionary with a key 'memory' initialized to 0, which will be updated with the reserved memory. + """ + cuda_info = dict(memory=0) + if torch.cuda.is_available(): + torch.cuda.empty_cache() + try: + yield cuda_info + finally: + cuda_info["memory"] = torch.cuda.memory_reserved(device) + else: + yield cuda_info + + +def profile(input, ops, n=10, device=None, max_num_obj=0): """ Ultralytics speed, memory and FLOPs profiler. @@ -636,7 +657,8 @@ def profile(input, ops, n=10, device=None): f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" f"{'input':>24s}{'output':>24s}" ) - + gc.collect() # attempt to free unused memory + torch.cuda.empty_cache() for x in input if isinstance(input, list) else [input]: x = x.to(device) x.requires_grad = True @@ -645,24 +667,36 @@ def profile(input, ops, n=10, device=None): m = m.half() if hasattr(m, "half") and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward try: - flops = thop.profile(m, inputs=[x], verbose=False)[0] / 1e9 * 2 if thop else 0 # GFLOPs + flops = thop.profile(deepcopy(m), inputs=[x], verbose=False)[0] / 1e9 * 2 # GFLOPs except Exception: flops = 0 try: + mem = 0 for _ in range(n): - t[0] = time_sync() - y = m(x) - t[1] = time_sync() - try: - (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() - t[2] = time_sync() - except Exception: # no backward method - # print(e) # for debug - t[2] = float("nan") + with cuda_memory_usage(device) as cuda_info: + t[0] = time_sync() + y = m(x) + t[1] = time_sync() + try: + (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() + t[2] = time_sync() + except Exception: # no backward method + # print(e) # for debug + t[2] = float("nan") + mem += cuda_info["memory"] / 1e9 # (GB) tf += (t[1] - t[0]) * 1000 / n # ms per op forward tb += (t[2] - t[1]) * 1000 / n # ms per op backward - mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # (GB) + if max_num_obj: # simulate training with predictions per image grid (for AutoBatch) + with cuda_memory_usage(device) as cuda_info: + torch.randn( + x.shape[0], + max_num_obj, + int(sum((x.shape[-1] / s) * (x.shape[-2] / s) for s in m.stride.tolist())), + device=device, + dtype=torch.float32, + ) + mem += cuda_info["memory"] / 1e9 # (GB) s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else "list" for x in (x, y)) # shapes p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters LOGGER.info(f"{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}") @@ -670,8 +704,9 @@ def profile(input, ops, n=10, device=None): except Exception as e: LOGGER.info(e) results.append(None) - gc.collect() # attempt to free unused memory - torch.cuda.empty_cache() + finally: + gc.collect() # attempt to free unused memory + torch.cuda.empty_cache() return results @@ -719,3 +754,48 @@ def __call__(self, epoch, fitness): f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping." ) return stop + + +class FXModel(nn.Module): + """ + A custom model class for torch.fx compatibility. + + This class extends `torch.nn.Module` and is designed to ensure compatibility with torch.fx for tracing and graph manipulation. + It copies attributes from an existing model and explicitly sets the model attribute to ensure proper copying. + + Args: + model (torch.nn.Module): The original model to wrap for torch.fx compatibility. + """ + + def __init__(self, model): + """ + Initialize the FXModel. + + Args: + model (torch.nn.Module): The original model to wrap for torch.fx compatibility. + """ + super().__init__() + copy_attr(self, model) + # Explicitly set `model` since `copy_attr` somehow does not copy it. + self.model = model.model + + def forward(self, x): + """ + Forward pass through the model. + + This method performs the forward pass through the model, handling the dependencies between layers and saving intermediate outputs. + + Args: + x (torch.Tensor): The input tensor to the model. + + Returns: + (torch.Tensor): The output tensor from the model. + """ + y = [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + # from earlier layers + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] + x = m(x) # run + y.append(x) # save output + return x diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py index 3f873a6fafc..e8b97d89f07 100644 --- a/ultralytics/utils/triton.py +++ b/ultralytics/utils/triton.py @@ -1,4 +1,4 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license from typing import List from urllib.parse import urlsplit @@ -66,6 +66,7 @@ def __init__(self, url: str, endpoint: str = "", scheme: str = ""): self.np_input_formats = [type_map[x] for x in self.input_formats] self.input_names = [x["name"] for x in config["input"]] self.output_names = [x["name"] for x in config["output"]] + self.metadata = eval(config.get("parameters", {}).get("metadata", {}).get("string_value", "None")) def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]: """ diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py index 1329bfe6ecc..831f0fa0f2d 100644 --- a/ultralytics/utils/tuner.py +++ b/ultralytics/utils/tuner.py @@ -1,13 +1,16 @@ -# Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license +# Ultralytics ๐Ÿš€ AGPL-3.0 License - https://ultralytics.com/license -import subprocess - -from ultralytics.cfg import TASK2DATA, TASK2METRIC, get_save_dir +from ultralytics.cfg import TASK2DATA, TASK2METRIC, get_cfg, get_save_dir from ultralytics.utils import DEFAULT_CFG, DEFAULT_CFG_DICT, LOGGER, NUM_THREADS, checks def run_ray_tune( - model, space: dict = None, grace_period: int = 10, gpu_per_trial: int = None, max_samples: int = 10, **train_args + model, + space: dict = None, + grace_period: int = 10, + gpu_per_trial: int = None, + max_samples: int = 10, + **train_args, ): """ Runs hyperparameter tuning using Ray Tune. @@ -27,10 +30,10 @@ def run_ray_tune( ```python from ultralytics import YOLO - # Load a YOLOv8n model - model = YOLO("yolov8n.pt") + # Load a YOLO11n model + model = YOLO("yolo11n.pt") - # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset + # Start tuning hyperparameters for YOLO11n training on the COCO8 dataset result_grid = model.tune(data="coco8.yaml", use_ray=True) ``` """ @@ -39,7 +42,7 @@ def run_ray_tune( train_args = {} try: - subprocess.run("pip install ray[tune]".split(), check=True) # do not add single quotes here + checks.check_requirements("ray[tune]") import ray from ray import tune @@ -131,7 +134,9 @@ def _tune(config): tuner_callbacks = [WandbLoggerCallback(project="YOLOv8-tune")] if wandb else [] # Create the Ray Tune hyperparameter search tuner - tune_dir = get_save_dir(DEFAULT_CFG, name="tune").resolve() # must be absolute dir + tune_dir = get_save_dir( + get_cfg(DEFAULT_CFG, train_args), name=train_args.pop("name", "tune") + ).resolve() # must be absolute dir tune_dir.mkdir(parents=True, exist_ok=True) tuner = tune.Tuner( trainable_with_resources,