diff --git a/geo_seg_sam_with_pca.ipynb b/geo_seg_sam_with_pca.ipynb new file mode 100644 index 0000000..97bfa2e --- /dev/null +++ b/geo_seg_sam_with_pca.ipynb @@ -0,0 +1,614 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import open3d as o3d\n", + "\n", + "full_pcd = o3d.io.read_point_cloud('ACMMP_model.ply')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-06-23 12:05:28.319 Python[1496:9792870] WARNING: Secure coding is automatically enabled for restorable state! However, not on all supported macOS versions of this application. Opt-in to secure coding explicitly by implementing NSApplicationDelegate.applicationSupportsSecureRestorableState:.\n" + ] + } + ], + "source": [ + "o3d.visualization.draw_geometries([full_pcd])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from sklearn.decomposition import PCA\n", + "\n", + "pca = PCA(n_components=3)\n", + "pca.fit(np.asarray(full_pcd.points))\n", + "pca_points = pca.transform(np.asarray(full_pcd.points))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 6.75752944e-01, 8.48351256e-03, 4.24977380e-04],\n", + " [-4.94206366e-01, -4.55547835e-02, 6.27162766e-04],\n", + " [-4.94773512e-01, -4.59906911e-02, 6.54203763e-04],\n", + " ...,\n", + " [ 1.62199020e-01, -7.49438389e-02, 3.64268772e-03],\n", + " [ 1.62045542e-01, -7.47876570e-02, 3.65650457e-03],\n", + " [ 1.61892065e-01, -7.46314751e-02, 3.67032143e-03]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pca_points" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.02760315, 0.11641365, 4.34926653],\n", + " [-1.01881158, 0.71682596, 4.17974997],\n", + " [-1.01952505, 0.71678185, 4.1797123 ],\n", + " ...,\n", + " [-0.49656773, 0.33102733, 4.28055477],\n", + " [-0.4966073 , 0.33124256, 4.28053904],\n", + " [-0.49664688, 0.33145779, 4.2805233 ]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.asarray(full_pcd.points)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import copy\n", + "\n", + "pca_points_colors = copy.deepcopy(np.asarray(full_pcd.colors))\n", + "\n", + "pca_pcd = o3d.geometry.PointCloud()\n", + "pca_pcd.points = o3d.utility.Vector3dVector(pca_points)\n", + "pca_pcd.colors = o3d.utility.Vector3dVector(pca_points_colors)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + } + ], + "source": [ + "o3d.visualization.draw_geometries([pca_pcd])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def is_point_in_square(point, x_start, x_end, y_start, y_end):\n", + " return point[0] >= x_start and point[0] < x_end and point[1] >= y_start and point[1] < y_end\n", + "\n", + "\n", + "def divide_pcd_into_square_segments(points, colors, num_segments):\n", + " x_min, x_max = np.min(points[:, 0]), np.max(points[:, 0])\n", + " y_min, y_max = np.min(points[:, 1]), np.max(points[:, 1])\n", + "\n", + " x_step = (x_max - x_min) / num_segments\n", + " y_step = (y_max - y_min) / num_segments\n", + "\n", + " x_grid = np.arange(x_min, x_max, x_step)\n", + " y_grid = np.arange(y_min, y_max, y_step)\n", + " x, y = np.meshgrid(x_grid, y_grid)\n", + "\n", + " segments = []\n", + " for i in range(len(x_grid) - 1):\n", + " for j in range(len(y_grid) - 1):\n", + " # current square\n", + " x_start, x_end = x[i, j], x[i, j + 1]\n", + " y_start, y_end = y[i, j], y[i + 1, j]\n", + "\n", + " segment_points = []\n", + " segment_points_colors = []\n", + " for ind, point in enumerate(points):\n", + " if is_point_in_square(point, x_start, x_end, y_start, y_end):\n", + " segment_points.append(point)\n", + " segment_points_colors.append(colors[ind])\n", + " segment_points_array = np.asarray(segment_points, dtype=np.float32)\n", + " segment_points_colors_array = np.asarray(segment_points_colors, dtype=np.float32)\n", + "\n", + " dict = {\n", + " 'points': segment_points_array,\n", + " 'colors': segment_points_colors_array\n", + " }\n", + " segments.append(dict)\n", + " print(\"Finish: {}/{} x and {}/{} y\".format(i, len(x_grid) - 1 - 1, j, len(y_grid) - 1 - 1))\n", + "\n", + " return segments" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finish: 0/4 x and 0/4 y\n", + "Finish: 0/4 x and 1/4 y\n", + "Finish: 0/4 x and 2/4 y\n", + "Finish: 0/4 x and 3/4 y\n", + "Finish: 0/4 x and 4/4 y\n", + "Finish: 1/4 x and 0/4 y\n", + "Finish: 1/4 x and 1/4 y\n", + "Finish: 1/4 x and 2/4 y\n", + "Finish: 1/4 x and 3/4 y\n", + "Finish: 1/4 x and 4/4 y\n", + "Finish: 2/4 x and 0/4 y\n", + "Finish: 2/4 x and 1/4 y\n", + "Finish: 2/4 x and 2/4 y\n", + "Finish: 2/4 x and 3/4 y\n", + "Finish: 2/4 x and 4/4 y\n", + "Finish: 3/4 x and 0/4 y\n", + "Finish: 3/4 x and 1/4 y\n", + "Finish: 3/4 x and 2/4 y\n", + "Finish: 3/4 x and 3/4 y\n", + "Finish: 3/4 x and 4/4 y\n", + "Finish: 4/4 x and 0/4 y\n", + "Finish: 4/4 x and 1/4 y\n", + "Finish: 4/4 x and 2/4 y\n", + "Finish: 4/4 x and 3/4 y\n", + "Finish: 4/4 x and 4/4 y\n" + ] + } + ], + "source": [ + "segment_dicts = divide_pcd_into_square_segments(np.asarray(pca_pcd.points), np.asarray(pca_pcd.colors), num_segments=6)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4956980" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(segment_dicts[7]['points'])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + } + ], + "source": [ + "final_pcd_segment = o3d.geometry.PointCloud()\n", + "final_pcd_segment.points = o3d.utility.Vector3dVector(segment_dicts[7]['points'])\n", + "final_pcd_segment.colors = o3d.utility.Vector3dVector(segment_dicts[7]['colors'])\n", + "o3d.visualization.draw_geometries([final_pcd_segment])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# перенос масок sam" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "class Camera:\n", + " def __init__(self):\n", + " self.K = [0.0] * 9\n", + " self.R = [0.0] * 9\n", + " self.t = [0.0] * 3\n", + " self.height = 0\n", + " self.width = 0\n", + " self.depth_min = 0.0\n", + " self.depth_max = 0.0" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def read_camera(cam_path):\n", + " camera = Camera()\n", + " with open(cam_path, \"r\") as file:\n", + " line = file.readline().strip()\n", + " for i in range(3):\n", + " (\n", + " camera.R[3 * i + 0],\n", + " camera.R[3 * i + 1],\n", + " camera.R[3 * i + 2],\n", + " camera.t[i],\n", + " ) = map(float, file.readline().split())\n", + "\n", + " tmp = list(map(float, file.readline().split()))\n", + " line = file.readline().strip()\n", + " line = file.readline().strip()\n", + "\n", + " for i in range(3):\n", + " camera.K[3 * i + 0], camera.K[3 * i + 1], camera.K[3 * i + 2] = map(\n", + " float, file.readline().split()\n", + " )\n", + "\n", + " line = file.readline().strip()\n", + " camera.depth_min, interval, depth_num, camera.depth_max = map(\n", + " float, file.readline().split()\n", + " )\n", + "\n", + " return camera" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PointCloud with 4956980 points." + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "import open3d as o3d\n", + "import copy\n", + "\n", + "cams_file = \"images_cams/cams/00000000_cam.txt\"\n", + "camera = read_camera(str(cams_file))\n", + "trans = np.asarray(\n", + " [\n", + " [camera.R[0], camera.R[1], camera.R[2], camera.t[0]],\n", + " [camera.R[3], camera.R[4], camera.R[5], camera.t[1]],\n", + " [camera.R[6], camera.R[7], camera.R[8], camera.t[2]],\n", + " [0, 0, 0, 1],\n", + " ]\n", + ")\n", + "pcd_segment_copy = copy.deepcopy(final_pcd_segment)\n", + "pcd_segment_copy.transform(trans)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "659899\n" + ] + } + ], + "source": [ + "def hidden_removal_points(pcd):\n", + " diameter = np.linalg.norm(\n", + " np.asarray(pcd.get_max_bound()) - np.asarray(pcd.get_min_bound())\n", + " )\n", + " cam = [0, 0, 0]\n", + " radius = diameter * 100000\n", + " _, indices = pcd.hidden_point_removal(cam, radius)\n", + " return indices\n", + "\n", + "indices = hidden_removal_points(pcd_segment_copy)\n", + "print(len(indices))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + } + ], + "source": [ + "def get_subpcd(pcd, indices):\n", + " subpcd = o3d.geometry.PointCloud()\n", + " subpcd.points = o3d.utility.Vector3dVector(np.asarray(pcd.points)[indices])\n", + " subpcd.colors = o3d.utility.Vector3dVector(np.asarray(pcd.colors)[indices])\n", + " return subpcd\n", + "\n", + "pcd_hidden_removal = get_subpcd(pcd_segment_copy, indices)\n", + "o3d.visualization.draw_geometries([pcd_hidden_removal])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "def get_image_instances():\n", + " masks_path = Path.cwd().joinpath(\n", + " \"geo-seg/vfm-labels/sam/00000000.npz\"\n", + " )\n", + " return np.load(masks_path, allow_pickle=True)[\"masks\"]\n", + "\n", + "masks = get_image_instances()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "def masks_to_image(masks):\n", + " image_labels = np.zeros(masks[0][\"segmentation\"].shape)\n", + " for i, mask in enumerate(masks):\n", + " image_labels[mask[\"segmentation\"]] = i + 1\n", + " return image_labels" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "image_labels = masks_to_image(masks)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "def get_points_to_pixels(points, img_shape, camera):\n", + " img_width, img_height = img_shape\n", + "\n", + " intrinsic = np.asarray(\n", + " [\n", + " [camera.K[0], camera.K[1], camera.K[2]],\n", + " [camera.K[3], camera.K[4], camera.K[5]],\n", + " [camera.K[6], camera.K[7], camera.K[8]],\n", + " ]\n", + " )\n", + "\n", + " points_proj = intrinsic @ points.T\n", + " points_proj[:2, :] /= points_proj[2, :]\n", + " points_coord = points_proj.T\n", + "\n", + " inds = np.where(\n", + " (points_coord[:, 0] < img_width)\n", + " & (points_coord[:, 0] >= 0)\n", + " & (points_coord[:, 1] < img_height)\n", + " & (points_coord[:, 1] >= 0)\n", + " & (points_coord[:, 2] > 0)\n", + " )[0]\n", + " print(len(inds))\n", + "\n", + " points_ind_to_pixels = {}\n", + " for ind in inds:\n", + " points_ind_to_pixels[ind] = points_coord[ind][:2].astype(int)\n", + "\n", + " return points_ind_to_pixels\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n" + ] + } + ], + "source": [ + "points2instances = np.zeros((len(pcd_segment_copy.points), 1), dtype=int)\n", + "\n", + "points_to_pixels = get_points_to_pixels(\n", + " np.asarray(pcd_hidden_removal.points),\n", + " ((image_labels.shape[1], image_labels.shape[0])),\n", + " camera,\n", + ")\n", + "\n", + "for point_id, pixel_id in points_to_pixels.items():\n", + " points2instances[indices[point_id], 0] = int(\n", + " image_labels[pixel_id[1], pixel_id[0]]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# дальше уже не идем, так как осталось 0 точек" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "def generate_random_colors(N):\n", + " colors = [[0, 0, 0]]\n", + " for _ in range(N):\n", + " colors.append(\n", + " [random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)]\n", + " )\n", + "\n", + " colors = np.vstack(colors) / 255\n", + " return colors" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def color_pcd_by_labels(pcd, labels):\n", + " colors = generate_random_colors(len(labels) + 1)\n", + " pcd_colored = copy.deepcopy(pcd)\n", + " pcd_colored.colors = o3d.utility.Vector3dVector(\n", + " np.zeros(np.asarray(pcd.points).shape)\n", + " )\n", + "\n", + " for i in range(len(pcd_colored.points)):\n", + " pcd_colored.colors[i] = colors[labels[i]]\n", + "\n", + " return pcd_colored" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "pcd_colored = color_pcd_by_labels(pcd_segment_copy, points2instances[:, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + } + ], + "source": [ + "o3d.visualization.draw_geometries([pcd_colored])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}