Remove unused yolox functions and fix nim client shut down. (#299)

edknv · web-flow · commit 53111bbd2527 · 2025-01-14T21:31:24.000-08:00
diff --git a/src/nv_ingest/extraction_workflows/image/image_handlers.py b/src/nv_ingest/extraction_workflows/image/image_handlers.py
@@ -107,79 +107,6 @@ def convert_svg_to_bitmap(image_stream: io.BytesIO) -> np.ndarray:
     return image_array
 
 
-# TODO(Devin): Move to common file
-def process_inference_results(
-    output_array: np.ndarray,
-    original_image_shapes: List[Tuple[int, int]],
-    num_classes: int,
-    conf_thresh: float,
-    iou_thresh: float,
-    min_score: float,
-    final_thresh: float,
-):
-    """
-    Process the model output to generate detection results and expand bounding boxes.
-
-    Parameters
-    ----------
-    output_array : np.ndarray
-        The raw output from the model inference.
-    original_image_shapes : List[Tuple[int, int]]
-        The shapes of the original images before resizing, used for scaling bounding boxes.
-    num_classes : int
-        The number of classes the model can detect.
-    conf_thresh : float
-        The confidence threshold for detecting objects.
-    iou_thresh : float
-        The Intersection Over Union (IoU) threshold for non-maximum suppression.
-    min_score : float
-        The minimum score for keeping a detection.
-    final_thresh: float
-        Threshold for keeping a bounding box applied after postprocessing.
-
-
-    Returns
-    -------
-    List[dict]
-        A list of dictionaries, each containing processed detection results including expanded bounding boxes.
-
-    Notes
-    -----
-    This function applies non-maximum suppression to the model's output and scales the bounding boxes back to the
-    original image size.
-
-    Examples
-    --------
-    >>> output_array = np.random.rand(2, 100, 85)
-    >>> original_image_shapes = [(1536, 1536), (1536, 1536)]
-    >>> results = process_inference_results(output_array, original_image_shapes, 80, 0.5, 0.5, 0.1)
-    >>> len(results)
-    2
-    """
-    pred = yolox_utils.postprocess_model_prediction(
-        output_array, num_classes, conf_thresh, iou_thresh, class_agnostic=True
-    )
-    results = yolox_utils.postprocess_results(pred, original_image_shapes, min_score=min_score)
-    logger.debug(f"Number of results: {len(results)}")
-    logger.debug(f"Results: {results}")
-
-    annotation_dicts = [yolox_utils.expand_chart_bboxes(annotation_dict) for annotation_dict in results]
-    inference_results = []
-
-    # Filter out bounding boxes below the final threshold
-    for annotation_dict in annotation_dicts:
-        new_dict = {}
-        if "table" in annotation_dict:
-            new_dict["table"] = [bb for bb in annotation_dict["table"] if bb[4] >= final_thresh]
-        if "chart" in annotation_dict:
-            new_dict["chart"] = [bb for bb in annotation_dict["chart"] if bb[4] >= final_thresh]
-        if "title" in annotation_dict:
-            new_dict["title"] = annotation_dict["title"]
-        inference_results.append(new_dict)
-
-    return inference_results
-
-
 def extract_table_and_chart_images(
     annotation_dict: Dict[str, List[List[float]]],
     original_image: np.ndarray,
diff --git a/src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py b/src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py
@@ -142,76 +142,6 @@ def extract_tables_and_charts_using_image_ensemble(
     return tables_and_charts
 
 
-def process_inference_results(
-    output_array: np.ndarray,
-    original_image_shapes: List[Tuple[int, int]],
-    num_classes: int,
-    conf_thresh: float,
-    iou_thresh: float,
-    min_score: float,
-    final_thresh: float,
-):
-    """
-    Process the model output to generate detection results and expand bounding boxes.
-
-    Parameters
-    ----------
-    output_array : np.ndarray
-        The raw output from the model inference.
-    original_image_shapes : List[Tuple[int, int]]
-        The shapes of the original images before resizing, used for scaling bounding boxes.
-    num_classes : int
-        The number of classes the model can detect.
-    conf_thresh : float
-        The confidence threshold for detecting objects.
-    iou_thresh : float
-        The Intersection Over Union (IoU) threshold for non-maximum suppression.
-    min_score : float
-        The minimum score for keeping a detection.
-    final_thresh: float
-        Threshold for keeping a bounding box applied after postprocessing.
-
-
-    Returns
-    -------
-    List[dict]
-        A list of dictionaries, each containing processed detection results including expanded bounding boxes.
-
-    Notes
-    -----
-    This function applies non-maximum suppression to the model's output and scales the bounding boxes back to the
-    original image size.
-
-    Examples
-    --------
-    >>> output_array = np.random.rand(2, 100, 85)
-    >>> original_image_shapes = [(1536, 1536), (1536, 1536)]
-    >>> results = process_inference_results(output_array, original_image_shapes, 80, 0.5, 0.5, 0.1)
-    >>> len(results)
-    2
-    """
-    pred = yolox_utils.postprocess_model_prediction(
-        output_array, num_classes, conf_thresh, iou_thresh, class_agnostic=True
-    )
-    results = yolox_utils.postprocess_results(pred, original_image_shapes, min_score=min_score)
-
-    annotation_dicts = [yolox_utils.expand_chart_bboxes(annotation_dict) for annotation_dict in results]
-    inference_results = []
-
-    # Filter out bounding boxes below the final threshold
-    for annotation_dict in annotation_dicts:
-        new_dict = {}
-        if "table" in annotation_dict:
-            new_dict["table"] = [bb for bb in annotation_dict["table"] if bb[4] >= final_thresh]
-        if "chart" in annotation_dict:
-            new_dict["chart"] = [bb for bb in annotation_dict["chart"] if bb[4] >= final_thresh]
-        if "title" in annotation_dict:
-            new_dict["title"] = annotation_dict["title"]
-        inference_results.append(new_dict)
-
-    return inference_results
-
-
 # Handle individual table/chart extraction and model inference
 def extract_table_and_chart_images(
     annotation_dict,
diff --git a/src/nv_ingest/stages/nim/chart_extraction.py b/src/nv_ingest/stages/nim/chart_extraction.py
@@ -10,7 +10,6 @@
 from typing import Tuple
 
 import pandas as pd
-import tritonclient.grpc as grpcclient
 from morpheus.config import Config
 
 from nv_ingest.schemas.chart_extractor_schema import ChartExtractorSchema
@@ -190,10 +189,8 @@ def _extract_chart_data(
         logger.error("Error occurred while extracting chart data.", exc_info=True)
         raise
     finally:
-        if isinstance(cached_client, grpcclient.InferenceServerClient):
-            cached_client.close()
-        if isinstance(deplot_client, grpcclient.InferenceServerClient):
-            deplot_client.close()
+        cached_client.close()
+        deplot_client.close()
 
 
 def generate_chart_extractor_stage(
diff --git a/src/nv_ingest/stages/nim/table_extraction.py b/src/nv_ingest/stages/nim/table_extraction.py
@@ -172,8 +172,7 @@ def _extract_table_data(
         logger.error("Error occurred while extracting table data.", exc_info=True)
         raise
     finally:
-        if isinstance(paddle_client, NimClient):
-            paddle_client.close()
+        paddle_client.close()
 
 
 def generate_table_extractor_stage(
diff --git a/tests/nv_ingest/extraction_workflows/image/test_image_handlers.py b/tests/nv_ingest/extraction_workflows/image/test_image_handlers.py
@@ -7,7 +7,6 @@
 from nv_ingest.extraction_workflows.image.image_handlers import convert_svg_to_bitmap
 from nv_ingest.extraction_workflows.image.image_handlers import extract_table_and_chart_images
 from nv_ingest.extraction_workflows.image.image_handlers import load_and_preprocess_image
-from nv_ingest.extraction_workflows.image.image_handlers import process_inference_results
 from nv_ingest.util.pdf.metadata_aggregators import CroppedImageWithContent
 
 
@@ -119,142 +118,6 @@ def test_convert_svg_to_bitmap_large_svg():
     assert np.all(result[:, :, 2] == 255)  # Blue channel fully on
 
 
-def test_process_inference_results_basic_case():
-    """Test process_inference_results with a typical valid input."""
-
-    # Simulated model output array for a single image with several detections.
-    # Array format is (batch_size, num_detections, 85) - 80 classes + 5 box coordinates
-    # For simplicity, use random values for the boxes and class predictions.
-    output_array = np.zeros((1, 3, 85), dtype=np.float32)
-
-    # Mock bounding box coordinates
-    output_array[0, 0, :4] = [0.5, 0.5, 0.2, 0.2]  # x_center, y_center, width, height
-    output_array[0, 1, :4] = [0.6, 0.6, 0.2, 0.2]
-    output_array[0, 2, :4] = [0.7, 0.7, 0.2, 0.2]
-
-    # Mock object confidence scores
-    output_array[0, :, 4] = [0.8, 0.9, 0.85]
-
-    # Mock class scores (set class 1 with highest confidence for simplicity)
-    output_array[0, 0, 5 + 1] = 0.7
-    output_array[0, 1, 5 + 1] = 0.75
-    output_array[0, 2, 5 + 1] = 0.72
-
-    original_image_shapes = [(640, 640)]  # Original shape of the image before resizing
-
-    # Process inference results with thresholds that should retain all mock detections
-    results = process_inference_results(
-        output_array,
-        original_image_shapes,
-        num_classes=80,
-        conf_thresh=0.5,
-        iou_thresh=0.5,
-        min_score=0.1,
-        final_thresh=0.3,
-    )
-
-    # Check output structure
-    assert isinstance(results, list)
-    assert len(results) == 1
-    assert isinstance(results[0], dict)
-
-    # Validate bounding box scaling and structure
-    assert "chart" in results[0] or "table" in results[0]
-    if "chart" in results[0]:
-        assert isinstance(results[0]["chart"], list)
-        assert len(results[0]["chart"]) > 0
-        # Check bounding box format for each detected "chart" item (5 values per box)
-        for bbox in results[0]["chart"]:
-            assert len(bbox) == 5  # [x1, y1, x2, y2, score]
-            assert bbox[4] >= 0.3  # score meets final threshold
-
-    print("Processed inference results:", results)
-
-
-def test_process_inference_results_multiple_images():
-    """Test with multiple images to verify batch processing."""
-    # Simulate model output with 2 images and 3 detections each
-    output_array = np.zeros((2, 3, 85), dtype=np.float32)
-    # Set bounding boxes and confidence for the mock detections
-    output_array[0, 0, :5] = [0.5, 0.5, 0.2, 0.2, 0.8]
-    output_array[0, 1, :5] = [0.6, 0.6, 0.2, 0.2, 0.7]
-    output_array[1, 0, :5] = [0.4, 0.4, 0.1, 0.1, 0.9]
-    # Assign class confidences for classes 0 and 1
-    output_array[0, 0, 5 + 1] = 0.75
-    output_array[0, 1, 5 + 1] = 0.65
-    output_array[1, 0, 5 + 0] = 0.8
-
-    original_image_shapes = [(640, 640), (800, 800)]
-
-    results = process_inference_results(
-        output_array,
-        original_image_shapes,
-        num_classes=80,
-        conf_thresh=0.5,
-        iou_thresh=0.5,
-        min_score=0.1,
-        final_thresh=0.3,
-    )
-
-    assert isinstance(results, list)
-    assert len(results) == 2
-    for result in results:
-        assert isinstance(result, dict)
-        if "chart" in result:
-            assert all(len(bbox) == 5 and bbox[4] >= 0.3 for bbox in result["chart"])
-
-
-def test_process_inference_results_high_confidence_threshold():
-    """Test with a high confidence threshold to verify filtering."""
-    output_array = np.zeros((1, 5, 85), dtype=np.float32)
-    # Set low confidence scores below the threshold
-    output_array[0, :, 4] = [0.2, 0.3, 0.4, 0.4, 0.2]
-    output_array[0, :, 5] = [0.5] * 5  # Class confidence
-
-    original_image_shapes = [(640, 640)]
-
-    results = process_inference_results(
-        output_array,
-        original_image_shapes,
-        num_classes=80,
-        conf_thresh=0.9,  # High confidence threshold
-        iou_thresh=0.5,
-        min_score=0.1,
-        final_thresh=0.3,
-    )
-
-    assert isinstance(results, list)
-    assert len(results) == 1
-    assert results[0] == {}  # No detections should pass the high confidence threshold
-
-
-def test_process_inference_results_varied_num_classes():
-    """Test compatibility with different model class counts."""
-    output_array = np.zeros((1, 3, 25), dtype=np.float32)  # 20 classes + 5 box coords
-    # Assign box, object confidence, and class scores
-    output_array[0, 0, :5] = [0.5, 0.5, 0.2, 0.2, 0.8]
-    output_array[0, 1, :5] = [0.6, 0.6, 0.3, 0.3, 0.7]
-    output_array[0, 0, 5 + 1] = 0.9  # Assign highest confidence to class 1
-
-    original_image_shapes = [(640, 640)]
-
-    results = process_inference_results(
-        output_array,
-        original_image_shapes,
-        num_classes=20,  # Different class count
-        conf_thresh=0.5,
-        iou_thresh=0.5,
-        min_score=0.1,
-        final_thresh=0.3,
-    )
-
-    assert isinstance(results, list)
-    assert len(results) == 1
-    assert isinstance(results[0], dict)
-    assert "chart" in results[0]
-    assert len(results[0]["chart"]) > 0  # Verify detections processed correctly with 20 classes
-
-
 def crop_image(image: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray:
     """Mock function to simulate cropping an image."""
     h1, w1, h2, w2 = bbox