test_eval.py

from __future__ import absolute_import, division, print_function

import os
import cv2
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

from layers import disp_to_depth
from utils import readlines
from options import MonodepthOptions
import datasets
import math
import networks

cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)

splits_dir = os.path.join(os.path.dirname(__file__), "splits")

# Models which were trained with stereo supervision were trained with a nominal
# baseline of 0.1 units. The KITTI rig has a baseline of 54cm. Therefore,
# to convert our stereo predictions to real-world scale we multiply our depths by 5.4.
STEREO_SCALE_FACTOR = 5.4


def compute_errors(gt, pred, flag=False):
    """Computation of error metrics between predicted and ground truth depths
    """
    if not flag:
        thresh = np.maximum((gt / pred), (pred / gt))
        a1 = (thresh < 1.25).mean()
        a2 = (thresh < 1.25 ** 2).mean()
        a3 = (thresh < 1.25 ** 3).mean()

    rmse = (gt - pred) ** 2
    rmse = np.sqrt(rmse.mean())

    rmse_log = (np.log(gt) - np.log(pred)) ** 2
    rmse_log = np.sqrt(rmse_log.mean())

    abs_rel = np.mean(np.abs(gt - pred) / gt)

    sq_rel = np.mean(((gt - pred) ** 2) / gt)

    if not flag:
        return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
    else:
        return abs_rel, sq_rel, rmse, rmse_log


def batch_post_process_disparity(l_disp, r_disp):
    """Apply the disparity post-processing method as introduced in Monodepthv1
    """
    _, h, w = l_disp.shape
    m_disp = 0.5 * (l_disp + r_disp)
    l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
    l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
    r_mask = l_mask[:, :, ::-1]
    return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp


def evaluate_test(opt, encoder, depth_decoder):
    MIN_DEPTH = 1e-3
    MAX_DEPTH = 80
    device = 'cuda'

    # encoder1.to(device)
    # encoder1.eval()
    # encoder = torch.quantization.convert(encoder1, inplace=False)
    # encoder.to(device)
    encoder.eval()

    # depth_decoder.to(device)
    depth_decoder.eval()

    filenames = readlines(os.path.join(splits_dir, opt.eval_split, "test_files.txt"))
    img_ext = '.png' if opt.png else '.jpg'
    dataset = datasets.KITTIRAWDataset(opt.data_path, filenames,
                                       192, 640,
                                       [0], 4, is_train=False, img_ext=img_ext)
    dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers,
                            pin_memory=True, drop_last=False)
    pred_disps = []

    print("-> Computing predictions with size {}x{}".format(
        640, 192))

    with torch.no_grad():
        for data in dataloader:
            input_color = data[("color", 0, 0)].to(device)

            if opt.post_process:
                # Post-processed results require each image to have two forward passes
                input_color = torch.cat((input_color, torch.flip(input_color, [3])), 0)

            output = depth_decoder(encoder(input_color))

            pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
            pred_disp = pred_disp.cpu()[:, 0].numpy()

            if opt.post_process:
                N = pred_disp.shape[0] // 2
                pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])

            pred_disps.append(pred_disp)

    pred_disps = np.concatenate(pred_disps)

    if opt.save_pred_disps:
        output_path = os.path.join(
            opt.load_weights_folder, "disps_{}_split.npy".format(opt.eval_split))
        print("-> Saving predicted disparities to ", output_path)
        np.save(output_path, pred_disps)

    if opt.no_eval:
        print("-> Evaluation disabled. Done.")
        quit()

    elif opt.eval_split == 'benchmark':
        save_dir = os.path.join(opt.load_weights_folder, "benchmark_predictions")
        print("-> Saving out benchmark predictions to {}".format(save_dir))
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for idx in range(len(pred_disps)):
            disp_resized = cv2.resize(pred_disps[idx], (1216, 352))
            depth = STEREO_SCALE_FACTOR / disp_resized
            depth = np.clip(depth, 0, 80)
            depth = np.uint16(depth * 256)
            save_path = os.path.join(save_dir, "{:010d}.png".format(idx))
            cv2.imwrite(save_path, depth)

        print("-> No ground truth is available for the KITTI benchmark, so not evaluating. Done.")
        quit()

    gt_path = os.path.join(splits_dir, opt.eval_split, "gt_depths.npz")
    gt_depths = np.load(gt_path, fix_imports=True, encoding='latin1', allow_pickle=True)["data"]

    print("-> Evaluating")

    if opt.eval_stereo:
        print("   Stereo evaluation - "
              "disabling median scaling, scaling by {}".format(STEREO_SCALE_FACTOR))
        opt.disable_median_scaling = True
        opt.pred_depth_scale_factor = STEREO_SCALE_FACTOR
    else:
        print("   Mono evaluation - using median scaling")

    errors = []
    ratios = []

    for i in range(pred_disps.shape[0]):

        gt_depth = gt_depths[i]
        gt_height, gt_width = gt_depth.shape[:2]

        pred_disp = pred_disps[i]
        pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
        pred_depth = 1 / pred_disp

        if opt.obstacle_detection:
            mask_list = []
            if opt.eval_split == "eigen":
                mask = np.logical_and(gt_depth >= MIN_DEPTH, gt_depth <= MAX_DEPTH)
                crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                                 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
                crop_mask = np.zeros(mask.shape)
                for h in range(crop[0], crop[1], opt.grid_size):
                    if h + opt.grid_size > crop[1]: break
                    for w in range(crop[2], crop[3], opt.grid_size):
                        if w + opt.grid_size > crop[3]: break
                        crop_mask[h:h+opt.grid_size, w:w+opt.grid_size] = 1
                        mask = np.logical_and(mask, crop_mask)
                        mask_list.append(mask)
            else:
                mask = gt_depth > 0

            pred = []
            gt = []
            ratio_grid = []
            for m in mask_list:
                pred_temp = pred_depth[m]
                gt_temp = gt_depth[m]

                if np.array(pred_temp) != [] and np.array(gt_temp) != []:
                    pred_temp *= opt.pred_depth_scale_factor
                    if not opt.disable_median_scaling and np.median(pred_temp) != 0:
                        ratio = np.median(gt_temp) / np.median(pred_temp)
                        ratio_grid.append(ratio)
                        pred_temp *= ratio
                    else:
                        continue

                    pred_temp[pred_temp < MIN_DEPTH] = MIN_DEPTH
                    pred_temp[pred_temp > MAX_DEPTH] = MAX_DEPTH

                    pred_temp = np.median(pred_temp)
                    gt_temp = np.median(gt_temp)
                    if pred_temp and gt_temp:
                        pred.append(pred_temp)
                        gt.append(gt_temp)
                    # e = np.sqrt((gt_temp - pred_temp) ** 2)
                    # if not np.array(e).any(): print(pred_temp, gt_temp, e)
                    # if not e: print('e', pred_temp, gt_temp)

            # if error_grid != []:
            if np.array(pred) != [] or np.array(gt) != []:
                errors.append(compute_errors(np.array(gt), np.array(pred)))
                ratios.append(np.median(ratio_grid))
            else:
                print(np.array(gt).shape, np.array(pred).shape)
                print('r', np.median(ratio_grid))

        else:
            if opt.eval_split == "eigen":
                mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)

                crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
                                 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32)
                crop_mask = np.zeros(mask.shape)
                crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
                mask = np.logical_and(mask, crop_mask)

            else:
                mask = gt_depth > 0

            pred_depth = pred_depth[mask]
            gt_depth = gt_depth[mask]

            pred_depth *= opt.pred_depth_scale_factor
            if not opt.disable_median_scaling:
                ratio = np.median(gt_depth) / np.median(pred_depth)
                ratios.append(ratio)
                pred_depth *= ratio

            pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
            pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

            errors.append(compute_errors(gt_depth, pred_depth))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")