-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
40 changed files
with
3,030 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# app.py | ||
from io import BytesIO | ||
|
||
from fastapi import FastAPI, File, UploadFile | ||
from fastapi.responses import JSONResponse | ||
from PIL import Image | ||
import torch | ||
import torchvision | ||
|
||
from model import load_model_and_preprocess | ||
from utils import filter_results | ||
|
||
# FastAPI 앱 생성 | ||
app = FastAPI() | ||
|
||
# 모델 불러오기 | ||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
model, preprocess, meta = load_model_and_preprocess(device) | ||
|
||
# 모델 정보 관리 | ||
models = [ | ||
{ | ||
"id": model.__class__.__module__, | ||
"name": type(model).__name__ | ||
} | ||
] | ||
|
||
|
||
# 모델 정보 엔드포인트 | ||
@app.get("/models") | ||
def get_models(): | ||
return JSONResponse(content={"models": models}) | ||
|
||
# 객체 탐지 엔드포인트 | ||
@app.post("/image:detect") | ||
def detect_objects(image: UploadFile, threshold: float = 0.5, klass: int = None): | ||
try: | ||
# 이미지 파일이 아닌 경우 예외 발생 | ||
if not image.headers['content-type'].startswith('image/'): | ||
raise ValueError("Uploaded file is not an image") | ||
|
||
# 클래스 ID가 주어진 경우, 유효한 클래스 ID인지 확인 | ||
if klass is not None and klass < 0: | ||
raise ValueError("Invalid class ID") | ||
|
||
# 업로드된 이미지 파일 열기 (PIL.Image 객체로 변환) | ||
img_obj = Image.open(BytesIO(image.file.read())) | ||
|
||
# 전처리 | ||
img_input = preprocess(img_obj).to(device) | ||
img_input = img_input.unsqueeze(0) # 단일 이미지이므로 배치(batch) 차원 추가 | ||
|
||
# 추론 수행 | ||
outputs = model(img_input)[0] # 단일 이미지이므로 첫번째 결과만 사용 | ||
|
||
# 결과 필터링 | ||
results = filter_results(outputs, meta['categories'], threshold=threshold, klass=klass) | ||
|
||
return JSONResponse(content={"objects": results}) | ||
except ValueError as e: | ||
return JSONResponse(content={"error": str(e)}, status_code=415) | ||
except Exception as e: | ||
return JSONResponse(content={"error": str(e)}, status_code=500) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# model.py | ||
|
||
# Object Detection 모델들 중, Faster R-CNN 모델 불러오기 | ||
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_V2_Weights, fasterrcnn_resnet50_fpn_v2 | ||
|
||
# 앞에서 가져온 가중치를 제공하여 사전 학습된 모델 가져오기 | ||
def load_model_and_preprocess(device='cpu'): | ||
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT | ||
model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.5) # 기본값(0.5) 이상인 객체들을 탐지 후 필터링하여 반환 | ||
model.to(device) # 지정된 장치로 모델 이동 | ||
model.eval() # (학습이 아닌) 추론 모드로 설정 | ||
|
||
return model, weights.transforms(), weights.meta | ||
|
||
|
||
if __name__ == '__main__': | ||
model, preprocess, meta = load_model_and_preprocess() | ||
print(model) | ||
print(preprocess) | ||
print(meta) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# PyTorch 및 torchvision | ||
torch==2.5.0 | ||
torchvision==0.20.0 | ||
|
||
# FastAPI | ||
fastapi==0.115.5 | ||
uvicorn==0.32.1 | ||
python-multipart==0.0.17 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# utils.py | ||
|
||
# 결과 필터링 함수 | ||
def filter_results(outputs, categories, threshold=0.5, klass=None): | ||
filtered_results = [] | ||
|
||
for label, score, box in zip(outputs['labels'], outputs['scores'], outputs['boxes']): | ||
if score < threshold: | ||
continue | ||
|
||
if klass is not None and int(label) != klass: | ||
continue | ||
|
||
filtered_results.append({ | ||
"class": int(label), | ||
"label": categories[int(label)], | ||
"score": float(score), | ||
"bbox": [float(coord) for coord in box] | ||
}) | ||
|
||
return filtered_results | ||
|
||
|
||
# 결과 필터링 함수 동작 확인 | ||
if __name__ == "__main__": | ||
sample_outputs = { | ||
"labels": [1, 1, 2, 3, 4], | ||
"scores": [0.9, 0.8, 0.7, 0.6, 0.5], | ||
"boxes": [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16], [17, 18, 19, 20]] | ||
} | ||
sample_categories = {1: "cat", 2: "dog", 3: "bird", 4: "fish"} | ||
|
||
print(filter_results(sample_outputs, sample_categories, 0.75)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import io | ||
import random | ||
import collections | ||
|
||
import requests | ||
import json | ||
import torch | ||
import torchvision | ||
|
||
from PIL import Image | ||
|
||
# 시각화 등을 위해 필요한 라이브러리 불러오기 | ||
from torchvision.utils import draw_bounding_boxes | ||
from torchvision.transforms.v2 import functional as F | ||
|
||
import matplotlib.pyplot as plt | ||
|
||
# 이미지 파일 | ||
fn_img_input = 'data/sample.jpg' | ||
fn_img_output = 'data/sample_output.jpg' | ||
|
||
# REST API 호출 | ||
url = 'http://localhost:8000/image:detect?threshold=0.8&klass=1' | ||
files = [ | ||
('image', ('sample.jpg', open(fn_img_input, 'rb'), 'image/jpeg')) | ||
] | ||
response = requests.post(url, files=files) | ||
if response.status_code != 200: | ||
print(f'Error: {response.status_code}') | ||
print(response.text) | ||
|
||
# 결과 확인 | ||
results = json.loads(response.text) | ||
objects_types = [result['label'] for result in results['objects']] | ||
objects_counter = collections.Counter(objects_types) | ||
color_type = {result['class']:("#%06x" % random.randint(0, 0xFFFFFF)) for result in results['objects']} | ||
box_coords = torch.stack([torch.tensor(result['bbox']) for result in results['objects']]) | ||
box_labels = [f"{result['label']}({result['score']:.2f})" for result in results['objects']] | ||
box_colors = [color_type[result['class']] for result in results['objects']] | ||
|
||
print(f'검출된 객체 수: {len(results["objects"])}') | ||
print(f'검출된 객체 종류: {objects_counter}') | ||
|
||
# 이미지에 검출 결과 그리기 | ||
tensor_with_boxes = draw_bounding_boxes(torchvision.io.read_image(fn_img_input), | ||
boxes=box_coords, | ||
labels=box_labels, | ||
colors=box_colors, | ||
font='Verdana', | ||
font_size=20, | ||
width=2,) | ||
F.to_pil_image(tensor_with_boxes).save(fn_img_output) | ||
print(f'검출 결과 이미지 저장: {fn_img_output}') |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
####################################################################################### | ||
# A default configuration that will be loaded for all jupyter books | ||
# Users are expected to override these values in their own `_config.yml` file. | ||
# This is also the "master list" of all allowed keys and values. | ||
# See https://jupyterbook.org/en/stable/customize/config.html#add-a-link-to-your-repository | ||
|
||
####################################################################################### | ||
# Book settings | ||
title : PyTorch Hands-On Labs | ||
author : 파이토치 한국 사용자 모임 | ||
copyright : "2024" | ||
logo : ../_static/images/logo_ko.png | ||
# Patterns to skip when building the book. Can be glob-style (e.g. "*skip.ipynb") | ||
exclude_patterns : [_build, Thumbs.db, .DS_Store, "**.ipynb_checkpoints"] | ||
# Auto-exclude files not in the toc | ||
only_build_toc_files : true | ||
|
||
####################################################################################### | ||
# Execution settings | ||
execute: | ||
execute_notebooks : cache # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off") | ||
cache : "../_build/.jupyter_cache" # A path to the jupyter cache that will be used to store execution artifacts. Defaults to `_build/.jupyter_cache/` | ||
exclude_patterns : [] # A list of patterns to *skip* in execution (e.g. a notebook that takes a really long time) | ||
timeout : 300 # The maximum time (in seconds) each notebook cell is allowed to run. | ||
run_in_temp : false # If `True`, then a temporary directory will be created and used as the command working directory (cwd), | ||
# otherwise the notebook's parent directory will be the cwd. | ||
allow_errors : true # If `False`, when a code cell raises an error the execution is stopped, otherwise all cells are always run. | ||
stderr_output : remove # One of 'show', 'remove', 'remove-warn', 'warn', 'error', 'severe' | ||
|
||
####################################################################################### | ||
# Parse and render settings | ||
parse: | ||
myst_enable_extensions: # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html | ||
# - amsmath | ||
- colon_fence | ||
# - deflist | ||
- dollarmath | ||
# - html_admonition | ||
# - html_image | ||
- linkify | ||
# - replacements | ||
# - smartquotes | ||
- substitution | ||
- tasklist | ||
myst_url_schemes: [mailto, http, https] # URI schemes that will be recognised as external URLs in Markdown links | ||
myst_dmath_double_inline: true # Allow display math ($$) within an inline context | ||
|
||
####################################################################################### | ||
# HTML-specific settings | ||
html: | ||
favicon : "" # A path to a favicon image | ||
use_edit_page_button : true # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in | ||
use_repository_button : true # Whether to add a link to your repository button | ||
use_issues_button : true # Whether to add an "open an issue" button | ||
use_multitoc_numbering : true # Continuous numbering across parts/chapters | ||
extra_footer : "" # Will be displayed underneath the footer. | ||
home_page_in_navbar : true # Whether to include your home page in the left Navigation Bar | ||
baseurl : "https://hands-on.pytorch.kr/" | ||
analytics: | ||
google_analytics_id : "G-5Z3BGEWMY9" # A GA id that can be used to track book views. | ||
|
||
comments: | ||
hypothesis : false | ||
utterances : false | ||
announcement : "" # A banner announcement at the top of the site. | ||
|
||
####################################################################################### | ||
# LaTeX-specific settings | ||
latex: | ||
latex_engine : pdflatex # one of 'pdflatex', 'xelatex' (recommended for unicode), 'luatex', 'platex', 'uplatex' | ||
use_jupyterbook_latex : true # use sphinx-jupyterbook-latex for pdf builds as default | ||
|
||
####################################################################################### | ||
# Launch button settings | ||
launch_buttons: | ||
notebook_interface : jupyterlab # The interface interactive links will activate ["classic", "jupyterlab"] | ||
binderhub_url : "" # The URL of the BinderHub (e.g., https://mybinder.org) | ||
jupyterhub_url : "" # The URL of the JupyterHub (e.g., https://datahub.berkeley.edu) | ||
thebe : false # Add a thebe button to pages (requires the repository to run on Binder) | ||
colab_url : "https://colab.research.google.com" | ||
deepnote_url : "" # The URL of Deepnote (https://deepnote.com) | ||
|
||
repository: | ||
url : https://github.com/PyTorchKorea/hands-on # The URL to your book's repository | ||
path_to_book : "blob/master/labs" # A path to your book's folder, relative to the repository root. | ||
branch : master # Which branch of the repository should be used when creating links | ||
|
||
####################################################################################### | ||
# Advanced and power-user settings | ||
sphinx: | ||
extra_extensions : # A list of extra extensions to load by Sphinx (added to those already used by JB). | ||
local_extensions : # A list of local extensions to load by sphinx specified by "name: path" items | ||
recursive_update : false # A boolean indicating whether to overwrite the Sphinx config (true) or recursively update (false) | ||
config : # key-value pairs to directly over-ride the Sphinx configuration | ||
html_theme : sphinx_book_theme # modify this |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Table of contents | ||
# Learn more at https://jupyterbook.org/customize/toc.html | ||
|
||
format: jb-book | ||
root: index | ||
parts: | ||
# 객체 탐지 랩 | ||
- caption: 객체 탐지(Object Detection) | ||
chapters: | ||
- file: object-detection/intro-object-detection | ||
- file: object-detection/intro-torchvision | ||
- file: object-detection/torchvision-basic | ||
sections: | ||
- file: object-detection/torchvision-basic-preparation | ||
- file: object-detection/torchvision-basic-transforms | ||
- file: object-detection/pretrained-model-basic | ||
- file: object-detection/rest-api-intro | ||
sections: | ||
- file: object-detection/rest-api-preparation | ||
- file: object-detection/rest-api-implementation-1 | ||
- file: object-detection/rest-api-implementation-2 | ||
- file: object-detection/rest-api-implementation-3 | ||
- file: object-detection/rest-api-implementation-4 | ||
- file: object-detection/rest-api-test | ||
- file: object-detection/rest-api-visualization | ||
- file: object-detection/rest-api-improvement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# 파이토치 핸즈온 랩 소개 | ||
|
||
**파이토치 핸즈온 랩(PyTorch Hands-on Lab)** 에 오신 것을 환영합니다! 이 핸즈온 랩은 개발자들로 하여금 PyTorch를 활용한 인공지능 모델을 사용하고 배포하며 배우는 것을 목표로 합니다. 모든 랩(Lab)은 Python 언어 및 일반적인 개발 환경에 대한 지식이 있다고 가정하고, PyTorch를 활용한 인공지능 모델 개발 및 배포 과정을 직접 실행해 볼 수 있도록 구성되어 있습니다. | ||
|
||
왼쪽 메뉴의 각 랩(Lab)은 독립적으로 구성되어 있으며, 필요한 랩만 따로 참고하실 수 있게 구성하였습니다. | ||
|
||
다음은 각 랩에 대한 소개입니다: | ||
|
||
## 객체 탐지(Object Detection) | ||
|
||
객체 탐지(Object Detection)에 대한 소개 및 개념을 설명하고, 사전 학습된 객체 탐지 모델을 가져와 API를 만들어봅니다. | ||
|
||
다음 링크를 눌러 객체 탐지 랩을 시작해보세요. 👉 | ||
{ref}`object-detection:intro-object-detection` | ||
|
||
|
||
*(TBD)* |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
(object-detection:intro-object-detection)= | ||
# 📚 객체 탐지 소개 | ||
|
||
## 객체 탐지와 컴퓨터 비전 | ||
|
||
객체 탐지(Object Detection)는 이미지나 동영상에서 특정 객체(들)을 식별하고, 식별한 객체가 위치한 영역을 표시하는 컴퓨터 비전(Computer Vision) 작업 중 하나입니다. 이는 인공지능 모델이 카메라를 들고 화면에 보이는 물체들을 찾아서 표시하고, 각 물체들에 이름표를 붙이는 작업을 하는 것으로 비유할 수 있습니다. | ||
|
||
이러한 객체 탐지 기술은 X-Ray나 MRI, CT 영상 등으로부터 병변을 탐지하는 의료 영상 분석 또는 도로 위의 차량이나 보행자, 신호 등을 탐지하는 자율주행 시스템 등에 활용됩니다. | ||
|
||
\ | ||
객체 탐지는 컴퓨터 비전의 여러 응용 분야 중 하나로, 이미지 분류(Classification)와 이미지 세분화(Segmentation)와 밀접한 연관이 있습니다. 예를 들어, 객체 탐지는 이미지 분류를 넘어 여러 객체를 탐지하고 각 객체의 위치를 제공하며, 이미지 세분화는 탐지된 객체의 경계까지 정확히 정의합니다. 입력/출력 데이터의 형태를 바탕으로 간단히 각 작업을 비교하면 다음과 같습니다: | ||
|
||
|작업|정의|입력 데이터|출력 데이터|주요 활용 사례| | ||
|---|---|---|---|---| | ||
|이미지 분류 (Classification)|이미지에서 하나의 객체 카테고리를 예측|단일 이미지|단일 라벨|이미지 검색, 동물 종류 분류| | ||
|객체 탐지 (Object Detection)|여러 객체의 존재 여부와 위치를 바운딩 박스로 예측|단일 이미지|여러 라벨과 바운딩 박스|자율주행 차량, 감시 시스템| | ||
|이미지 세분화 (Segmentation)|이미지의 모든 픽셀을 객체 영역으로 할당|단일 이미지|픽셀 단위 마스크|의료 영상 분석, 자율주행 정밀 지도 생성| | ||
|
||
\ | ||
각 작업(Task)을 한 문장으로 정리하면 다음과 같습니다. | ||
- 이미지 분류는 단순히 "이 이미지는 무엇인가?"라는 질문에 답합니다. | ||
- 객체 탐지는 "무엇이 어디에 있는가?"라는 질문을 해결합니다. | ||
- 이미지 세분화는 "무엇이 어디에 있는가?"라는 질문에 픽셀 수준의 정밀한 결과를 제공합니다. | ||
|
||
\ | ||
컴퓨터 비전(Computer Vision)과 관련한 더 다양한 작업들은 [PapersWithCode 사이트](https://paperswithcode.com/)를 참고해주세요: | ||
|
||
```{figure} images/paperswithcode-sota.png | ||
--- | ||
alt: PapersWithCode 사이트의 Browse State-of-the-Art 메뉴 | ||
name: PapersWithCode 사이트 | ||
width: 640px | ||
align: center | ||
--- | ||
바로가기: [https://paperswithcode.com/area/computer-vision](https://paperswithcode.com/area/computer-vision) | ||
``` | ||
|
||
```{note} | ||
새로운 분야의 연구나 기술을 탐색할 때, PapersWithCode 사이트는 매우 유용한 정보를 제공합니다. | ||
주요 분야들에 대한 최신 연구 논문과 코드, 그리고 성능 평가 지표 등을 한눈에 확인할 수 있습니다. | ||
``` | ||
|
||
|
||
## 객체 탐지 랩 소개 | ||
|
||
이번 객체 탐지 랩에서는 PyTorch의 영상처리 특화 라이브러리(Domain API)인 torchvision을 활용합니다. torchvision을 사용하여 이미지를 불러오고, 변환하기 위한 방법들을 코드를 통해 배워봅니다. 이후, 사전 학습된 모델을 사용하고 개선하는 방법을 익혀보겠습니다. |
Oops, something went wrong.