Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated metric depth to point cloud code #89

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ We provide **four models** of varying scales for robust relative depth estimatio
| Depth-Anything-V2-Large | 335.3M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true) |
| Depth-Anything-V2-Giant | 1.3B | Coming soon |

You may use the script to directly download as well, using the following instructions:

```python
python checkpoint_downloader.py --size [small/s/base/b/large/l]
```

## Usage

Expand Down Expand Up @@ -173,6 +178,7 @@ We are sincerely grateful to the awesome Hugging Face team ([@Pedro Cuenca](http

We also thank the [DINOv2](https://github.com/facebookresearch/dinov2) team for contributing such impressive models to our community.

The test.jpg image is from [here](https://rankcomfort.com/top-digital-cameras-for-travel-photography-2024/).

## LICENSE

Expand Down
54 changes: 54 additions & 0 deletions checkpoint_downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os
import requests
import argparse
from tqdm import tqdm

def download_file(url, local_filename):
try:
response = requests.get(url, stream=True)
response.raise_for_status()

total_size = int(response.headers.get('content-length', 0))

with open(local_filename, 'wb') as file:
for data in tqdm(response.iter_content(chunk_size=8192), total=total_size // 8192, unit='KB', unit_scale=True):
file.write(data)

print(f"File downloaded successfully: {local_filename}")
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"Other error occurred: {err}")

def main():
parser = argparse.ArgumentParser(description="Download checkpoint files.")
parser.add_argument("--size", "-s", choices=["large", "small", "base", "l", "s", "b"],
default="large", help="Specify the size of the file to download (large/l, small/s, base/b). Default is large.")
args = parser.parse_args()

urls = {
"large": "https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true",
"small": "https://huggingface.co/depth-anything/Depth-Anything-V2-Small/resolve/main/depth_anything_v2_vits.pth?download=true",
"base": "https://huggingface.co/depth-anything/Depth-Anything-V2-Base/resolve/main/depth_anything_v2_vitb.pth?download=true"
}

size_mapping = {
"l": "large",
"s": "small",
"b": "base"
}

# Normalize the input size to its full form (e.g., "l" to "large")
normalized_size = size_mapping.get(args.size, args.size) # Retrieve full form or use the default if mapping not found

# Get the URL for the specified size
url = urls[normalized_size]

checkpoints_dir = "checkpoints"
local_filename = os.path.join(checkpoints_dir, f"depth_anything_v2_vit{normalized_size[0]}.pth") # Using the first letter of the full size form

os.makedirs(checkpoints_dir, exist_ok=True)
download_file(url, local_filename)

if __name__ == "__main__":
main()
96 changes: 56 additions & 40 deletions metric_depth/depth_to_pointcloud.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Born out of Depth Anything V1 Issue 36
# Born out of Depth Anything V1 Issue 36: Code by @1ssb
# Make sure you have the necessary libraries
# Code by @1ssb
# Note that this code is meant for batch processing, to make individual predictions on different parameters, rewrite the loop execution
# Load the images you want to perform inference on in the input_images directory

import argparse
import cv2
Expand All @@ -13,28 +14,27 @@

from depth_anything_v2.dpt import DepthAnythingV2


if __name__ == '__main__':
def parse_arguments():
parser = argparse.ArgumentParser()

# Model Parameters
parser.add_argument('--encoder', default='vitl', type=str, choices=['vits', 'vitb', 'vitl', 'vitg'])
parser.add_argument('--load-from', default='', type=str)
parser.add_argument('--max-depth', default=20, type=float)
parser.add_argument('--load-from', default='checkpoints/depth_anything_v2_metric_hypersim_vitl.pth', type=str)
parser.add_argument('--max-depth', default=10, type=float)

parser.add_argument('--img-path', type=str)
# I/O Information
parser.add_argument('--img-path', default='./input_images', type=str)
parser.add_argument('--outdir', type=str, default='./vis_pointcloud')

args = parser.parse_args()

# Global settings
FL = 715.0873
FY = 784 * 0.6
FX = 784 * 0.6
NYU_DATA = False
FINAL_HEIGHT = 518
FINAL_WIDTH = 518

DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

# Inference Parameters
parser.add_argument('--focal-length-x', default=470.4, type=float, help='Focal length along the x-axis.')
parser.add_argument('--focal-length-y', default=470.4, type=float, help='Focal length along the y-axis.')
parser.add_argument('--final_width', default=360, type=float, help='Final Width of the images.')
parser.add_argument('--final_height', default=640, type=float, help='Final Height of the images.')

return parser.parse_args()

def initialize_model(args, DEVICE):
model_configs = {
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
Expand All @@ -46,38 +46,54 @@
depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu'))
depth_anything = depth_anything.to(DEVICE).eval()

if os.path.isfile(args.img_path):
if args.img_path.endswith('txt'):
with open(args.img_path, 'r') as f:
return depth_anything

def get_filenames(img_path):
if os.path.isfile(img_path):
if img_path.endswith('txt'):
with open(img_path, 'r') as f:
filenames = f.read().splitlines()
else:
filenames = [args.img_path]
filenames = [img_path]
else:
filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)

os.makedirs(args.outdir, exist_ok=True)
filenames = glob.glob(os.path.join(img_path, '**/*'), recursive=True)
return filenames

def process_images(filenames, depth_anything, args, DEVICE):
FX, FY = args.focal_length_x, args.focal_length_y
H, W = args.final_height, args.final_width

for k, filename in enumerate(filenames):
print(f'Progress {k+1}/{len(filenames)}: {filename}')

color_image = Image.open(filename).convert('RGB')

image = cv2.imread(filename)
pred = depth_anything.infer_image(image, FINAL_HEIGHT)
pred = depth_anything.infer_image(image, H)

# Resize color image and depth to final size
resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
resized_color_image = color_image.resize((W, H), Image.LANCZOS)
resized_pred = Image.fromarray(pred).resize((W, H), Image.NEAREST)

focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
x = (x - FINAL_WIDTH / 2) / focal_length_x
y = (y - FINAL_HEIGHT / 2) / focal_length_y
focal_length_x, focal_length_y = (FX, FY)
x, y = np.meshgrid(np.arange(W), np.arange(H))
x = (x - W / 2) / focal_length_x
y = (y - H / 2) / focal_length_y
z = np.array(resized_pred)
points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
colors = np.array(resized_color_image).reshape(-1, 3) / 255.0

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)
save_point_cloud(points, colors, args.outdir, filename)

def save_point_cloud(points, colors, outdir, filename):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
pcd.colors = o3d.utility.Vector3dVector(colors)
o3d.io.write_point_cloud(os.path.join(outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)

if __name__ == '__main__':
args = parse_arguments()

DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

depth_anything = initialize_model(args, DEVICE)
filenames = get_filenames(args.img_path)
os.makedirs(args.outdir, exist_ok=True)
process_images(filenames, depth_anything, args, DEVICE)
59 changes: 59 additions & 0 deletions metric_depth/metric_checkpoint_downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
import requests
import argparse
from tqdm import tqdm

def download_file(url, local_filename):
try:
response = requests.get(url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))

with open(local_filename, 'wb') as file:
for data in tqdm(response.iter_content(chunk_size=8192), total=total_size // 8192, unit='KB', unit_scale=True):
file.write(data)

print(f"File downloaded successfully: {local_filename}")
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"Other error occurred: {err}")

def main():
parser = argparse.ArgumentParser(description="Download checkpoint files.")
parser.add_argument("--size", "-s", choices=["large", "small", "base", "l", "s", "b"],
default="large", help="Specify the size of the file to download (large/l, small/s, base/b). Default is large.")
parser.add_argument("--environment", "-e", choices=["indoor", "outdoor", "i", "o"],
required=True, help="Specify the environment type for the model (indoor/i or outdoor/o).")
args = parser.parse_args()

size_mapping = {
"l": "large",
"s": "small",
"b": "base"
}
environment_mapping = {
"i": "indoor",
"o": "outdoor"
}

normalized_size = size_mapping.get(args.size, args.size)
normalized_environment = environment_mapping.get(args.environment, args.environment)

urls = {
("indoor", "large"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Large/resolve/main/depth_anything_v2_metric_hypersim_vitl.pth?download=true",
("indoor", "small"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Small/resolve/main/depth_anything_v2_metric_hypersim_vits.pth?download=true",
("indoor", "base"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Base/resolve/main/depth_anything_v2_metric_hypersim_vitb.pth?download=true",
("outdoor", "large"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Large/resolve/main/depth_anything_v2_metric_vkitti_vitl.pth?download=true",
("outdoor", "small"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Small/resolve/main/depth_anything_v2_metric_vkitti_vits.pth?download=true",
("outdoor", "base"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Base/resolve/main/depth_anything_v2_metric_vkitti_vitb.pth?download=true"
}

url = urls[(normalized_environment, normalized_size)]
local_filename = os.path.join("checkpoints", url.split('/')[-1].split("?")[0])

os.makedirs("checkpoints", exist_ok=True)
download_file(url, local_filename)

if __name__ == "__main__":
main()