DepthAnything · 1ssb · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/README.md b/README.md
@@ -40,6 +40,11 @@ We provide **four models** of varying scales for robust relative depth estimatio
 | Depth-Anything-V2-Large | 335.3M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true) |
 | Depth-Anything-V2-Giant | 1.3B | Coming soon |
 
+You may use the script to directly download as well, using the following instructions:
+
+```python
+python checkpoint_downloader.py --size [small/s/base/b/large/l]
+```
 
 ## Usage
 
@@ -173,6 +178,7 @@ We are sincerely grateful to the awesome Hugging Face team ([@Pedro Cuenca](http
 
 We also thank the [DINOv2](https://github.com/facebookresearch/dinov2) team for contributing such impressive models to our community.
 
+The test.jpg image is from [here](https://rankcomfort.com/top-digital-cameras-for-travel-photography-2024/).
 
 ## LICENSE
 

diff --git a/checkpoint_downloader.py b/checkpoint_downloader.py
@@ -0,0 +1,54 @@
+import os
+import requests
+import argparse
+from tqdm import tqdm
+
+def download_file(url, local_filename):
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+
+        total_size = int(response.headers.get('content-length', 0))
+
+        with open(local_filename, 'wb') as file:
+            for data in tqdm(response.iter_content(chunk_size=8192), total=total_size // 8192, unit='KB', unit_scale=True):
+                file.write(data)
+
+        print(f"File downloaded successfully: {local_filename}")
+    except requests.exceptions.HTTPError as http_err:
+        print(f"HTTP error occurred: {http_err}")
+    except Exception as err:
+        print(f"Other error occurred: {err}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Download checkpoint files.")
+    parser.add_argument("--size", "-s", choices=["large", "small", "base", "l", "s", "b"], 
+                        default="large", help="Specify the size of the file to download (large/l, small/s, base/b). Default is large.")
+    args = parser.parse_args()
+
+    urls = {
+        "large": "https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true",
+        "small": "https://huggingface.co/depth-anything/Depth-Anything-V2-Small/resolve/main/depth_anything_v2_vits.pth?download=true",
+        "base": "https://huggingface.co/depth-anything/Depth-Anything-V2-Base/resolve/main/depth_anything_v2_vitb.pth?download=true"
+    }
+
+    size_mapping = {
+        "l": "large",
+        "s": "small",
+        "b": "base"
+    }
+
+    # Normalize the input size to its full form (e.g., "l" to "large")
+    normalized_size = size_mapping.get(args.size, args.size)  # Retrieve full form or use the default if mapping not found
+
+    # Get the URL for the specified size
+    url = urls[normalized_size]
+
+    checkpoints_dir = "checkpoints"
+    local_filename = os.path.join(checkpoints_dir, f"depth_anything_v2_vit{normalized_size[0]}.pth")  # Using the first letter of the full size form
+
+    os.makedirs(checkpoints_dir, exist_ok=True)
+    download_file(url, local_filename)
+
+if __name__ == "__main__":
+    main()
diff --git a/metric_depth/depth_to_pointcloud.py b/metric_depth/depth_to_pointcloud.py
@@ -1,6 +1,7 @@
-# Born out of Depth Anything V1 Issue 36
+# Born out of Depth Anything V1 Issue 36: Code by @1ssb
 # Make sure you have the necessary libraries
-# Code by @1ssb
+# Note that this code is meant for batch processing, to make individual predictions on different parameters, rewrite the loop execution
+# Load the images you want to perform inference on in the input_images directory
 
 import argparse
 import cv2
@@ -13,28 +14,27 @@
 
 from depth_anything_v2.dpt import DepthAnythingV2
 
-
-if __name__ == '__main__':
+def parse_arguments():
     parser = argparse.ArgumentParser()
+
+    # Model Parameters
     parser.add_argument('--encoder', default='vitl', type=str, choices=['vits', 'vitb', 'vitl', 'vitg'])
-    parser.add_argument('--load-from', default='', type=str)
-    parser.add_argument('--max-depth', default=20, type=float)
+    parser.add_argument('--load-from', default='checkpoints/depth_anything_v2_metric_hypersim_vitl.pth', type=str)
+    parser.add_argument('--max-depth', default=10, type=float)
 
-    parser.add_argument('--img-path', type=str)
+    # I/O Information
+    parser.add_argument('--img-path', default='./input_images', type=str)
     parser.add_argument('--outdir', type=str, default='./vis_pointcloud')
 
-    args = parser.parse_args()
-
-    # Global settings
-    FL = 715.0873
-    FY = 784 * 0.6
-    FX = 784 * 0.6
-    NYU_DATA = False
-    FINAL_HEIGHT = 518
-    FINAL_WIDTH = 518
-
-    DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
-
+    # Inference Parameters
+    parser.add_argument('--focal-length-x', default=470.4, type=float, help='Focal length along the x-axis.')
+    parser.add_argument('--focal-length-y', default=470.4, type=float, help='Focal length along the y-axis.')
+    parser.add_argument('--final_width', default=360, type=float, help='Final Width of the images.')
+    parser.add_argument('--final_height', default=640, type=float, help='Final Height of the images.')
+
+    return parser.parse_args()
+
+def initialize_model(args, DEVICE):
     model_configs = {
         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
@@ -46,38 +46,54 @@
     depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu'))
     depth_anything = depth_anything.to(DEVICE).eval()
 
-    if os.path.isfile(args.img_path):
-        if args.img_path.endswith('txt'):
-            with open(args.img_path, 'r') as f:
+    return depth_anything
+
+def get_filenames(img_path):
+    if os.path.isfile(img_path):
+        if img_path.endswith('txt'):
+            with open(img_path, 'r') as f:
                 filenames = f.read().splitlines()
         else:
-            filenames = [args.img_path]
+            filenames = [img_path]
     else:
-        filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)
-
-    os.makedirs(args.outdir, exist_ok=True)
+        filenames = glob.glob(os.path.join(img_path, '**/*'), recursive=True)
+    return filenames
+
+def process_images(filenames, depth_anything, args, DEVICE):
+    FX, FY = args.focal_length_x, args.focal_length_y
+    H, W = args.final_height, args.final_width
 
     for k, filename in enumerate(filenames):
         print(f'Progress {k+1}/{len(filenames)}: {filename}')
-
         color_image = Image.open(filename).convert('RGB')
-
         image = cv2.imread(filename)
-        pred = depth_anything.infer_image(image, FINAL_HEIGHT)
+        pred = depth_anything.infer_image(image, H)
 
-        # Resize color image and depth to final size
-        resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
-        resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
+        resized_color_image = color_image.resize((W, H), Image.LANCZOS)
+        resized_pred = Image.fromarray(pred).resize((W, H), Image.NEAREST)
 
-        focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
-        x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
-        x = (x - FINAL_WIDTH / 2) / focal_length_x
-        y = (y - FINAL_HEIGHT / 2) / focal_length_y
+        focal_length_x, focal_length_y = (FX, FY)
+        x, y = np.meshgrid(np.arange(W), np.arange(H))
+        x = (x - W / 2) / focal_length_x
+        y = (y - H / 2) / focal_length_y
         z = np.array(resized_pred)
         points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
         colors = np.array(resized_color_image).reshape(-1, 3) / 255.0
 
-        pcd = o3d.geometry.PointCloud()
-        pcd.points = o3d.utility.Vector3dVector(points)
-        pcd.colors = o3d.utility.Vector3dVector(colors)
-        o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)
+        save_point_cloud(points, colors, args.outdir, filename)
+
+def save_point_cloud(points, colors, outdir, filename):
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(points)
+    pcd.colors = o3d.utility.Vector3dVector(colors)
+    o3d.io.write_point_cloud(os.path.join(outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)
+
+if __name__ == '__main__':
+    args = parse_arguments()
+
+    DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
+
+    depth_anything = initialize_model(args, DEVICE)
+    filenames = get_filenames(args.img_path)
+    os.makedirs(args.outdir, exist_ok=True)
+    process_images(filenames, depth_anything, args, DEVICE)
diff --git a/metric_depth/metric_checkpoint_downloader.py b/metric_depth/metric_checkpoint_downloader.py
@@ -0,0 +1,59 @@
+import os
+import requests
+import argparse
+from tqdm import tqdm
+
+def download_file(url, local_filename):
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+
+        with open(local_filename, 'wb') as file:
+            for data in tqdm(response.iter_content(chunk_size=8192), total=total_size // 8192, unit='KB', unit_scale=True):
+                file.write(data)
+
+        print(f"File downloaded successfully: {local_filename}")
+    except requests.exceptions.HTTPError as http_err:
+        print(f"HTTP error occurred: {http_err}")
+    except Exception as err:
+        print(f"Other error occurred: {err}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Download checkpoint files.")
+    parser.add_argument("--size", "-s", choices=["large", "small", "base", "l", "s", "b"], 
+                        default="large", help="Specify the size of the file to download (large/l, small/s, base/b). Default is large.")
+    parser.add_argument("--environment", "-e", choices=["indoor", "outdoor", "i", "o"], 
+                        required=True, help="Specify the environment type for the model (indoor/i or outdoor/o).")
+    args = parser.parse_args()
+
+    size_mapping = {
+        "l": "large",
+        "s": "small",
+        "b": "base"
+    }
+    environment_mapping = {
+        "i": "indoor",
+        "o": "outdoor"
+    }
+
+    normalized_size = size_mapping.get(args.size, args.size)
+    normalized_environment = environment_mapping.get(args.environment, args.environment)
+
+    urls = {
+        ("indoor", "large"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Large/resolve/main/depth_anything_v2_metric_hypersim_vitl.pth?download=true",
+        ("indoor", "small"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Small/resolve/main/depth_anything_v2_metric_hypersim_vits.pth?download=true",
+        ("indoor", "base"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Base/resolve/main/depth_anything_v2_metric_hypersim_vitb.pth?download=true",
+        ("outdoor", "large"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Large/resolve/main/depth_anything_v2_metric_vkitti_vitl.pth?download=true",
+        ("outdoor", "small"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Small/resolve/main/depth_anything_v2_metric_vkitti_vits.pth?download=true",
+        ("outdoor", "base"): "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Base/resolve/main/depth_anything_v2_metric_vkitti_vitb.pth?download=true"
+    }
+
+    url = urls[(normalized_environment, normalized_size)]
+    local_filename = os.path.join("checkpoints", url.split('/')[-1].split("?")[0])
+
+    os.makedirs("checkpoints", exist_ok=True)
+    download_file(url, local_filename)
+
+if __name__ == "__main__":
+    main()