From c1730f00541d444db004fddc2fad5b9e4f241c88 Mon Sep 17 00:00:00 2001 From: SthPhoenix Date: Sat, 6 Nov 2021 16:48:45 +0300 Subject: [PATCH] Update version to 0.7.0.0 --- README.md | 32 ++++++++++++++++++++++++++++++++ deploy_trt.sh | 7 +++++-- src/api_trt/app.py | 2 +- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1707a88..120e5dc 100755 --- a/README.md +++ b/README.md @@ -251,6 +251,38 @@ bounding box, detection probability and detection number. ## Changelog: +### 2021-11-06 v0.7.0.0 + +Since a lot of updates happened since last release version is updated straight to v0.7.0.0 + +Additions: +- Added experimental support for msgpack serializer: helps reduce network traffic for embeddings for ~2x. +- Output names no longer required for detection models when building TRT engine - correct output order is now extracted + from onnx models. +- Detection models now can be exported to TRT engine with batch size > 1 - inference code doesn't support it yet, though + now they could be used in Triton Inference Server without issues. + +Model Zoo: +- Added support for WebFace600k based recognition models from InsightFace repo: `w600k_r50` and `w600k_mbf` +- Added md5 check for models to allow automatic re-download if models have changed. +- All `scrfd` based models now supports batch dimension/ + +Improvements: +- 1.5x-2x faster SCRFD re-implementation with Numba: 4.5 ms. vs 10 ms. for `lumia.jpg` example with + `scrfd_10g_gnkps` and threshold = 0.3 (432 faces detected)). +- Move image normalization step to GPU with help of CuPy (4x lower data transfer from CPU to GPU, about 6% + inference speedup, and some computations offloaded from CPU). +- 4.5x Faster `face_align.norm_crop` implementation with help of Numba and removal of unused computations. + (Cropping 432 faces from `lumia.jpg` example tooks 45 ms. vs 205 ms.). +- Face crops are now extracted only when needed - when face data or embeddings are requested, improving + detection only performance. +- Added Numba njit cache to reduce subsequent starts time. +- Logging timings rounded to ms for better readability. +- Minor refactoring + +Fixes: +- Since gender/age estimation model is currently not supported exclude it from models preparing step. + ### 2021-09-09 v0.6.2.0 REST-API diff --git a/deploy_trt.sh b/deploy_trt.sh index ed83848..8b3ad99 100755 --- a/deploy_trt.sh +++ b/deploy_trt.sh @@ -1,7 +1,7 @@ #! /bin/bash IMAGE='insightface-rest' -TAG='v0.6.3.0' +TAG='v0.7.0.0' # Change InsightFace-REST logging level (DEBUG,INFO,WARNING,ERROR) log_level=INFO @@ -42,12 +42,14 @@ force_fp16=False det_model=scrfd_10g_gnkps # REC MODELS: -## arcface_r100_v1, glintr100 +## arcface_r100_v1, glintr100, w600k_r50, w600k_mbf rec_model=glintr100 ## Do not load recognition model: rec_ignore=False ## Maximum batch size for recognition model rec_batch_size=1 +## Maximum batch size for detection model, use only for building models for Triton Server +det_batch_size=1 # GENDER/AGE MODELS: ## genderage_v1 @@ -109,6 +111,7 @@ for i in $(seq 0 $(($n_gpu - 1)) ); do -e REC_NAME=$rec_model\ -e REC_IGNORE=$rec_ignore\ -e REC_BATCH_SIZE=$rec_batch_size\ + -e DET_BATCH_SIZE=$det_batch_size\ -e GA_NAME=$ga_model\ -e GA_IGNORE=$ga_ignore\ -e TRITON_URI=$triton_uri\ diff --git a/src/api_trt/app.py b/src/api_trt/app.py index a24e79a..630bdf9 100755 --- a/src/api_trt/app.py +++ b/src/api_trt/app.py @@ -20,7 +20,7 @@ from env_parser import EnvConfigs from schemas import BodyDraw, BodyExtract -__version__ = "0.6.3.0" +__version__ = "0.7.0.0" dir_path = os.path.dirname(os.path.realpath(__file__))