Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Replicate demo and API #24

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<a href="https://arxiv.org/abs/2401.10891"><img src='https://img.shields.io/badge/arXiv-Depth Anything-red' alt='Paper PDF'></a>
<a href='https://depth-anything.github.io'><img src='https://img.shields.io/badge/Project_Page-Depth Anything-green' alt='Project Page'></a>
<a href='https://huggingface.co/spaces/LiheYoung/Depth-Anything'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
[![Replicate](https://replicate.com/cjwbw/depth-anything/badge)](https://replicate.com/cjwbw/depth-anything)
</div>

This work presents Depth Anything, a highly practical solution for robust monocular depth estimation by training on a combination of 1.5M labeled images and **62M+ unlabeled images**.
Expand Down
16 changes: 16 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
gpu: true
system_packages:
- "libgl1-mesa-glx"
- "libglib2.0-0"
python_version: "3.11"
python_packages:
- "opencv-python==4.9.0.80"
- "torch==2.0.1"
- "torchvision==0.15.2"
- "tqdm==4.66.1"
- "huggingface_hub==0.20.3"
predict: "predict.py:Predictor"
85 changes: 85 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

import cv2
import numpy as np
import torch
import torch.nn.functional as F
from torchvision.transforms import Compose
from cog import BasePredictor, Input, Path

from depth_anything.dpt import DepthAnything
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet


class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
encoder_options = ["vits", "vitb", "vitl"]
self.device = "cuda:0"
model_cache = "model_cache"
self.models = {
k: DepthAnything.from_pretrained(
f"LiheYoung/depth_anything_{k}14", cache_dir=model_cache
).to(self.device)
for k in encoder_options
}
self.total_params = {
k: sum(param.numel() for param in self.models[k].parameters())
for k in encoder_options
}

self.transform = Compose(
[
Resize(
width=518,
height=518,
resize_target=False,
keep_aspect_ratio=True,
ensure_multiple_of=14,
resize_method="lower_bound",
image_interpolation_method=cv2.INTER_CUBIC,
),
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
PrepareForNet(),
]
)

def predict(
self,
image: Path = Input(description="Input image"),
encoder: str = Input(
description="Choose an encoder.",
default="vitl",
choices=["vits", "vitb", "vitl"],
),
) -> Path:
"""Run a single prediction on the model"""
depth_anything = self.models[encoder]
total_params = self.total_params[encoder]
print("Total parameters: {:.2f}M".format(total_params / 1e6))

depth_anything.eval()

raw_image = cv2.imread(str(image))
image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0

h, w = image.shape[:2]

image = self.transform({"image": image})["image"]
image = torch.from_numpy(image).unsqueeze(0).to(self.device)

with torch.no_grad():
depth = depth_anything(image)

depth = F.interpolate(
depth[None], (h, w), mode="bilinear", align_corners=False
)[0, 0]
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0

depth = depth.cpu().numpy().astype(np.uint8)
depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
output_path = "/tmp/out.png"
cv2.imwrite(output_path, depth_color)

return Path(output_path)