랩 추가: 객체 탐지(Object Detection)

PyTorchKorea · Nov 27, 2024 · e0f2c00 · e0f2c00
1 parent c449b12
commit e0f2c00
Show file tree

Hide file tree

Showing 40 changed files with 3,030 additions and 0 deletions.
diff --git a/_static/images/logo_ko.png b/_static/images/logo_ko.png
diff --git a/codes/object-detection/api-server/app.py b/codes/object-detection/api-server/app.py
@@ -0,0 +1,63 @@
+# app.py
+from io import BytesIO
+
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import JSONResponse
+from PIL import Image
+import torch
+import torchvision
+
+from model import load_model_and_preprocess
+from utils import filter_results
+
+# FastAPI 앱 생성
+app = FastAPI()
+
+# 모델 불러오기
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model, preprocess, meta = load_model_and_preprocess(device)
+
+# 모델 정보 관리
+models = [
+  {
+    "id": model.__class__.__module__,
+    "name": type(model).__name__
+  }
+]
+
+
+# 모델 정보 엔드포인트
+@app.get("/models")
+def get_models():
+  return JSONResponse(content={"models": models})
+
+# 객체 탐지 엔드포인트
+@app.post("/image:detect")
+def detect_objects(image: UploadFile, threshold: float = 0.5, klass: int = None):
+  try:
+    # 이미지 파일이 아닌 경우 예외 발생
+    if not image.headers['content-type'].startswith('image/'):
+      raise ValueError("Uploaded file is not an image")
+
+    # 클래스 ID가 주어진 경우, 유효한 클래스 ID인지 확인
+    if klass is not None and klass < 0:
+      raise ValueError("Invalid class ID")
+
+    # 업로드된 이미지 파일 열기 (PIL.Image 객체로 변환)
+    img_obj = Image.open(BytesIO(image.file.read()))
+
+    # 전처리
+    img_input = preprocess(img_obj).to(device)
+    img_input = img_input.unsqueeze(0) # 단일 이미지이므로 배치(batch) 차원 추가
+
+    # 추론 수행
+    outputs = model(img_input)[0] # 단일 이미지이므로 첫번째 결과만 사용
+
+    # 결과 필터링
+    results = filter_results(outputs, meta['categories'], threshold=threshold, klass=klass)
+
+    return JSONResponse(content={"objects": results})
+  except ValueError as e:
+    return JSONResponse(content={"error": str(e)}, status_code=415)
+  except Exception as e:
+    return JSONResponse(content={"error": str(e)}, status_code=500)
diff --git a/codes/object-detection/api-server/model.py b/codes/object-detection/api-server/model.py
@@ -0,0 +1,20 @@
+# model.py
+
+# Object Detection 모델들 중, Faster R-CNN 모델 불러오기
+from torchvision.models.detection import FasterRCNN_ResNet50_FPN_V2_Weights, fasterrcnn_resnet50_fpn_v2
+
+# 앞에서 가져온 가중치를 제공하여 사전 학습된 모델 가져오기
+def load_model_and_preprocess(device='cpu'):
+  weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
+  model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.5) # 기본값(0.5) 이상인 객체들을 탐지 후 필터링하여 반환
+  model.to(device) # 지정된 장치로 모델 이동
+  model.eval()     # (학습이 아닌) 추론 모드로 설정
+
+  return model, weights.transforms(), weights.meta
+
+
+if __name__ == '__main__':
+  model, preprocess, meta = load_model_and_preprocess()
+  print(model)
+  print(preprocess)
+  print(meta)
diff --git a/codes/object-detection/api-server/requirements.txt b/codes/object-detection/api-server/requirements.txt
@@ -0,0 +1,8 @@
+# PyTorch 및 torchvision
+torch==2.5.0
+torchvision==0.20.0
+
+# FastAPI
+fastapi==0.115.5
+uvicorn==0.32.1
+python-multipart==0.0.17
diff --git a/codes/object-detection/api-server/utils.py b/codes/object-detection/api-server/utils.py
@@ -0,0 +1,33 @@
+# utils.py
+
+# 결과 필터링 함수
+def filter_results(outputs, categories, threshold=0.5, klass=None):
+  filtered_results = []
+
+  for label, score, box in zip(outputs['labels'], outputs['scores'], outputs['boxes']):
+    if score < threshold:
+      continue
+
+    if klass is not None and int(label) != klass:
+      continue
+
+    filtered_results.append({
+      "class": int(label),
+      "label": categories[int(label)],
+      "score": float(score),
+      "bbox": [float(coord) for coord in box]
+    })
+
+  return filtered_results
+
+
+# 결과 필터링 함수 동작 확인
+if __name__ == "__main__":
+  sample_outputs = {
+    "labels": [1, 1, 2, 3, 4],
+    "scores": [0.9, 0.8, 0.7, 0.6, 0.5],
+    "boxes": [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16], [17, 18, 19, 20]]
+  }
+  sample_categories = {1: "cat", 2: "dog", 3: "bird", 4: "fish"}
+
+  print(filter_results(sample_outputs, sample_categories, 0.75))
diff --git a/codes/object-detection/client/client.py b/codes/object-detection/client/client.py
@@ -0,0 +1,53 @@
+import io
+import random
+import collections
+
+import requests
+import json
+import torch
+import torchvision
+
+from PIL import Image
+
+# 시각화 등을 위해 필요한 라이브러리 불러오기
+from torchvision.utils import draw_bounding_boxes
+from torchvision.transforms.v2 import functional as F
+
+import matplotlib.pyplot as plt
+
+# 이미지 파일
+fn_img_input = 'data/sample.jpg'
+fn_img_output = 'data/sample_output.jpg'
+
+# REST API 호출
+url = 'http://localhost:8000/image:detect?threshold=0.8&klass=1'
+files = [
+    ('image', ('sample.jpg', open(fn_img_input, 'rb'), 'image/jpeg'))
+]
+response = requests.post(url, files=files)
+if response.status_code != 200:
+    print(f'Error: {response.status_code}')
+    print(response.text)
+
+# 결과 확인
+results = json.loads(response.text)
+objects_types = [result['label'] for result in results['objects']]
+objects_counter = collections.Counter(objects_types)
+color_type = {result['class']:("#%06x" % random.randint(0, 0xFFFFFF)) for result in results['objects']}
+box_coords = torch.stack([torch.tensor(result['bbox']) for result in results['objects']])
+box_labels = [f"{result['label']}({result['score']:.2f})" for result in results['objects']]
+box_colors = [color_type[result['class']] for result in results['objects']]
+
+print(f'검출된 객체 수: {len(results["objects"])}')
+print(f'검출된 객체 종류: {objects_counter}')
+
+# 이미지에 검출 결과 그리기
+tensor_with_boxes = draw_bounding_boxes(torchvision.io.read_image(fn_img_input),
+                          boxes=box_coords,
+                          labels=box_labels,
+                          colors=box_colors,
+                          font='Verdana',
+                          font_size=20,
+                          width=2,)
+F.to_pil_image(tensor_with_boxes).save(fn_img_output)
+print(f'검출 결과 이미지 저장: {fn_img_output}')
diff --git a/codes/object-detection/client/data/sample.jpg b/codes/object-detection/client/data/sample.jpg
diff --git a/labs/_config.yml b/labs/_config.yml
@@ -0,0 +1,95 @@
+#######################################################################################
+# A default configuration that will be loaded for all jupyter books
+# Users are expected to override these values in their own `_config.yml` file.
+# This is also the "master list" of all allowed keys and values.
+# See https://jupyterbook.org/en/stable/customize/config.html#add-a-link-to-your-repository
+
+#######################################################################################
+# Book settings
+title                       : PyTorch Hands-On Labs
+author                      : 파이토치 한국 사용자 모임
+copyright                   : "2024"
+logo                        : ../_static/images/logo_ko.png
+# Patterns to skip when building the book. Can be glob-style (e.g. "*skip.ipynb")
+exclude_patterns            : [_build, Thumbs.db, .DS_Store, "**.ipynb_checkpoints"]
+# Auto-exclude files not in the toc
+only_build_toc_files        : true
+
+#######################################################################################
+# Execution settings
+execute:
+  execute_notebooks         : cache         # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off")
+  cache                     : "../_build/.jupyter_cache" # A path to the jupyter cache that will be used to store execution artifacts. Defaults to `_build/.jupyter_cache/`
+  exclude_patterns          : []            # A list of patterns to *skip* in execution (e.g. a notebook that takes a really long time)
+  timeout                   : 300           # The maximum time (in seconds) each notebook cell is allowed to run.
+  run_in_temp               : false         # If `True`, then a temporary directory will be created and used as the command working directory (cwd),
+                                            # otherwise the notebook's parent directory will be the cwd.
+  allow_errors              : true          # If `False`, when a code cell raises an error the execution is stopped, otherwise all cells are always run.
+  stderr_output             : remove        # One of 'show', 'remove', 'remove-warn', 'warn', 'error', 'severe'
+
+#######################################################################################
+# Parse and render settings
+parse:
+  myst_enable_extensions:  # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html
+    # - amsmath
+    - colon_fence
+    # - deflist
+    - dollarmath
+    # - html_admonition
+    # - html_image
+    - linkify
+    # - replacements
+    # - smartquotes
+    - substitution
+    - tasklist
+  myst_url_schemes: [mailto, http, https] # URI schemes that will be recognised as external URLs in Markdown links
+  myst_dmath_double_inline: true  # Allow display math ($$) within an inline context
+
+#######################################################################################
+# HTML-specific settings
+html:
+  favicon                   : ""  # A path to a favicon image
+  use_edit_page_button      : true   # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in
+  use_repository_button     : true   # Whether to add a link to your repository button
+  use_issues_button         : true   # Whether to add an "open an issue" button
+  use_multitoc_numbering    : true   # Continuous numbering across parts/chapters
+  extra_footer              : ""     # Will be displayed underneath the footer.
+  home_page_in_navbar       : true   # Whether to include your home page in the left Navigation Bar
+  baseurl                   : "https://hands-on.pytorch.kr/"
+  analytics:
+    google_analytics_id     : "G-5Z3BGEWMY9"  # A GA id that can be used to track book views.
+
+  comments:
+    hypothesis              : false
+    utterances              : false
+  announcement              : "" # A banner announcement at the top of the site.
+
+#######################################################################################
+# LaTeX-specific settings
+latex:
+  latex_engine              : pdflatex  # one of 'pdflatex', 'xelatex' (recommended for unicode), 'luatex', 'platex', 'uplatex'
+  use_jupyterbook_latex     : true # use sphinx-jupyterbook-latex for pdf builds as default
+
+#######################################################################################
+# Launch button settings
+launch_buttons:
+  notebook_interface        : jupyterlab  # The interface interactive links will activate ["classic", "jupyterlab"]
+  binderhub_url             : ""  # The URL of the BinderHub (e.g., https://mybinder.org)
+  jupyterhub_url            : ""  # The URL of the JupyterHub (e.g., https://datahub.berkeley.edu)
+  thebe                     : false  # Add a thebe button to pages (requires the repository to run on Binder)
+  colab_url                 : "https://colab.research.google.com"
+  deepnote_url              : "" # The URL of Deepnote (https://deepnote.com)
+
+repository:
+  url                       : https://github.com/PyTorchKorea/hands-on  # The URL to your book's repository
+  path_to_book              : "blob/master/labs"  # A path to your book's folder, relative to the repository root.
+  branch                    : master  # Which branch of the repository should be used when creating links
+
+#######################################################################################
+# Advanced and power-user settings
+sphinx:
+  extra_extensions          :   # A list of extra extensions to load by Sphinx (added to those already used by JB).
+  local_extensions          :   # A list of local extensions to load by sphinx specified by "name: path" items
+  recursive_update          : false # A boolean indicating whether to overwrite the Sphinx config (true) or recursively update (false)
+  config                    :   # key-value pairs to directly over-ride the Sphinx configuration
+    html_theme              : sphinx_book_theme # modify this
diff --git a/labs/_toc.yml b/labs/_toc.yml
@@ -0,0 +1,26 @@
+# Table of contents
+# Learn more at https://jupyterbook.org/customize/toc.html
+
+format: jb-book
+root: index
+parts:
+  # 객체 탐지 랩
+  - caption: 객체 탐지(Object Detection)
+    chapters:
+      - file: object-detection/intro-object-detection
+      - file: object-detection/intro-torchvision
+      - file: object-detection/torchvision-basic
+        sections:
+        - file: object-detection/torchvision-basic-preparation
+        - file: object-detection/torchvision-basic-transforms
+      - file: object-detection/pretrained-model-basic
+      - file: object-detection/rest-api-intro
+        sections:
+        - file: object-detection/rest-api-preparation
+        - file: object-detection/rest-api-implementation-1
+        - file: object-detection/rest-api-implementation-2
+        - file: object-detection/rest-api-implementation-3
+        - file: object-detection/rest-api-implementation-4
+        - file: object-detection/rest-api-test
+      - file: object-detection/rest-api-visualization
+      - file: object-detection/rest-api-improvement
diff --git a/labs/index.md b/labs/index.md
@@ -0,0 +1,17 @@
+# 파이토치 핸즈온 랩 소개
+
+**파이토치 핸즈온 랩(PyTorch Hands-on Lab)** 에 오신 것을 환영합니다! 이 핸즈온 랩은 개발자들로 하여금 PyTorch를 활용한 인공지능 모델을 사용하고 배포하며 배우는 것을 목표로 합니다. 모든 랩(Lab)은 Python 언어 및 일반적인 개발 환경에 대한 지식이 있다고 가정하고, PyTorch를 활용한 인공지능 모델 개발 및 배포 과정을 직접 실행해 볼 수 있도록 구성되어 있습니다.
+
+왼쪽 메뉴의 각 랩(Lab)은 독립적으로 구성되어 있으며, 필요한 랩만 따로 참고하실 수 있게 구성하였습니다.
+
+다음은 각 랩에 대한 소개입니다:
+
+## 객체 탐지(Object Detection)
+
+객체 탐지(Object Detection)에 대한 소개 및 개념을 설명하고, 사전 학습된 객체 탐지 모델을 가져와 API를 만들어봅니다.
+
+다음 링크를 눌러 객체 탐지 랩을 시작해보세요. 👉
+{ref}`object-detection:intro-object-detection`
+
+
+*(TBD)*
diff --git a/labs/object-detection/images/example.jpg b/labs/object-detection/images/example.jpg
diff --git a/labs/object-detection/images/paperswithcode-sota.png b/labs/object-detection/images/paperswithcode-sota.png
diff --git a/labs/object-detection/images/restapi-improved-sample-output1.jpg b/labs/object-detection/images/restapi-improved-sample-output1.jpg
diff --git a/labs/object-detection/images/restapi-improved-sample-output2.jpg b/labs/object-detection/images/restapi-improved-sample-output2.jpg
diff --git a/labs/object-detection/images/restapi-sample-input1.jpg b/labs/object-detection/images/restapi-sample-input1.jpg
diff --git a/labs/object-detection/images/restapi-sample-input2.jpg b/labs/object-detection/images/restapi-sample-input2.jpg
diff --git a/labs/object-detection/images/restapi-sample-output1.jpg b/labs/object-detection/images/restapi-sample-output1.jpg
diff --git a/labs/object-detection/images/restapi-sample-output2.jpg b/labs/object-detection/images/restapi-sample-output2.jpg
diff --git a/labs/object-detection/images/restapi-swagger-improved.png b/labs/object-detection/images/restapi-swagger-improved.png
diff --git a/labs/object-detection/images/restapi-swagger-result.png b/labs/object-detection/images/restapi-swagger-result.png
diff --git a/labs/object-detection/images/restapi-swagger.png b/labs/object-detection/images/restapi-swagger.png
diff --git a/labs/object-detection/images/torchvision-examples-tutorials.png b/labs/object-detection/images/torchvision-examples-tutorials.png
diff --git a/labs/object-detection/intro-object-detection.md b/labs/object-detection/intro-object-detection.md
@@ -0,0 +1,46 @@
+(object-detection:intro-object-detection)=
+# 📚 객체 탐지 소개
+
+## 객체 탐지와 컴퓨터 비전
+
+객체 탐지(Object Detection)는 이미지나 동영상에서 특정 객체(들)을 식별하고, 식별한 객체가 위치한 영역을 표시하는 컴퓨터 비전(Computer Vision) 작업 중 하나입니다. 이는 인공지능 모델이 카메라를 들고 화면에 보이는 물체들을 찾아서 표시하고, 각 물체들에 이름표를 붙이는 작업을 하는 것으로 비유할 수 있습니다.
+
+이러한 객체 탐지 기술은 X-Ray나 MRI, CT 영상 등으로부터 병변을 탐지하는 의료 영상 분석 또는 도로 위의 차량이나 보행자, 신호 등을 탐지하는 자율주행 시스템 등에 활용됩니다.
+
+\
+객체 탐지는 컴퓨터 비전의 여러 응용 분야 중 하나로, 이미지 분류(Classification)와 이미지 세분화(Segmentation)와 밀접한 연관이 있습니다. 예를 들어, 객체 탐지는 이미지 분류를 넘어 여러 객체를 탐지하고 각 객체의 위치를 제공하며, 이미지 세분화는 탐지된 객체의 경계까지 정확히 정의합니다. 입력/출력 데이터의 형태를 바탕으로 간단히 각 작업을 비교하면 다음과 같습니다:
+
+|작업|정의|입력 데이터|출력 데이터|주요 활용 사례|
+|---|---|---|---|---|
+|이미지 분류 (Classification)|이미지에서 하나의 객체 카테고리를 예측|단일 이미지|단일 라벨|이미지 검색, 동물 종류 분류|
+|객체 탐지 (Object Detection)|여러 객체의 존재 여부와 위치를 바운딩 박스로 예측|단일 이미지|여러 라벨과 바운딩 박스|자율주행 차량, 감시 시스템|
+|이미지 세분화 (Segmentation)|이미지의 모든 픽셀을 객체 영역으로 할당|단일 이미지|픽셀 단위 마스크|의료 영상 분석, 자율주행 정밀 지도 생성|
+
+\
+각 작업(Task)을 한 문장으로 정리하면 다음과 같습니다.
+- 이미지 분류는 단순히 "이 이미지는 무엇인가?"라는 질문에 답합니다.
+- 객체 탐지는 "무엇이 어디에 있는가?"라는 질문을 해결합니다.
+- 이미지 세분화는 "무엇이 어디에 있는가?"라는 질문에 픽셀 수준의 정밀한 결과를 제공합니다.
+
+\
+컴퓨터 비전(Computer Vision)과 관련한 더 다양한 작업들은 [PapersWithCode 사이트](https://paperswithcode.com/)를 참고해주세요:
+
+```{figure} images/paperswithcode-sota.png
+---
+alt: PapersWithCode 사이트의 Browse State-of-the-Art 메뉴
+name: PapersWithCode 사이트
+width: 640px
+align: center
+---
+바로가기: [https://paperswithcode.com/area/computer-vision](https://paperswithcode.com/area/computer-vision)
+```
+
+```{note}
+새로운 분야의 연구나 기술을 탐색할 때, PapersWithCode 사이트는 매우 유용한 정보를 제공합니다.
+주요 분야들에 대한 최신 연구 논문과 코드, 그리고 성능 평가 지표 등을 한눈에 확인할 수 있습니다.
+```
+
+
+## 객체 탐지 랩 소개
+
+이번 객체 탐지 랩에서는 PyTorch의 영상처리 특화 라이브러리(Domain API)인 torchvision을 활용합니다. torchvision을 사용하여 이미지를 불러오고, 변환하기 위한 방법들을 코드를 통해 배워봅니다. 이후, 사전 학습된 모델을 사용하고 개선하는 방법을 익혀보겠습니다.