Skip to content

Commit

Permalink
Merge pull request #21 from stefanklut/api
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanklut authored Feb 1, 2024
2 parents 51f206a + 2729e06 commit 7f55d8c
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions api/flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,17 @@ def predict_image(
)
images_processed_counter.inc()
return input_args
except Exception as e:
return input_args | {"exception": e.with_traceback(e.__traceback__)}
except Exception as exception:
# Catch CUDA out of memory errors
if isinstance(exception, torch.cuda.OutOfMemoryError) or (
isinstance(exception, RuntimeError) and "NVML_SUCCESS == r INTERNAL ASSERT FAILED" in str(exception)
):
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
# HACK remove traceback to prevent complete halt of program, not sure why this happens
exception = exception.with_traceback(None)

return input_args | {"exception": exception}


class ResponseInfo(TypedDict, total=False):
Expand Down Expand Up @@ -308,5 +317,16 @@ def metrics() -> bytes:
return generate_latest()


@app.route("/health", methods=["GET"])
def health_check() -> tuple[str, int]:
"""
Health check endpoint for Kubernetes checks
Returns:
tuple[str, int]: Response and status code
"""
return "OK", 200


if __name__ == "__main__":
app.run()

0 comments on commit 7f55d8c

Please sign in to comment.