From 6f8a7d9ed2851ead4e3e285e930493e630149767 Mon Sep 17 00:00:00 2001
From: Mohak <mohakgupta0981@gmail.com>
Date: Sun, 12 Jan 2025 16:20:22 +0530
Subject: [PATCH] added my leporsy detection project which was build using VIT

---
 Computer Vision/Leprosy Detection/README.md | 206 +++++++++++
 Computer Vision/Leprosy Detection/test.py   | 180 +++++++++
 Computer Vision/Leprosy Detection/train.py  | 383 ++++++++++++++++++++
 3 files changed, 769 insertions(+)
 create mode 100644 Computer Vision/Leprosy Detection/README.md
 create mode 100644 Computer Vision/Leprosy Detection/test.py
 create mode 100644 Computer Vision/Leprosy Detection/train.py

diff --git a/Computer Vision/Leprosy Detection/README.md b/Computer Vision/Leprosy Detection/README.md
new file mode 100644
index 0000000000..e5932282a8
--- /dev/null
+++ b/Computer Vision/Leprosy Detection/README.md	
@@ -0,0 +1,206 @@
+## Dataset
+
+### Source
+The dataset is available on Roboflow Universe:
+- Dataset Link: [AI Leprosy Detection Dataset](https://universe.roboflow.com/intelligent-systems-1b35z/ai-leprosy-bbdnr)
+- Format: COCO JSON
+- Classes: Binary classification (Leprosy/Non-Leprosy)
+
+### Dataset Structure
+The dataset is split into:
+- Training set
+- Validation set
+- Test set
+
+Each set contains:
+- RGB images
+- COCO format annotations (_annotations.coco.json)
+
+### Accessing the Dataset
+1. Visit the [dataset page](https://universe.roboflow.com/intelligent-systems-1b35z/ai-leprosy-bbdnr)
+2. Create a Roboflow account if needed
+3. Download the dataset in COCO format
+4. Place the downl# Leprosy Detection System
+
+## Overview
+This project implements an automated system for detecting leprosy using machine learning and image processing techniques. The system aims to assist healthcare professionals in early diagnosis of leprosy by analyzing skin lesion images.
+
+## Features
+- Automated analysis of skin lesion images
+- Support for multiple image formats (JPG, PNG)
+- Pre-processing pipeline for image enhancement
+- Deep learning model for lesion classification
+- User-friendly interface for healthcare professionals
+- Detailed report generation
+
+## Hardware Requirements
+
+### Minimum Requirements
+- 2x NVIDIA Tesla T4 GPUs (or equivalent)
+- 16GB+ GPU memory
+- 32GB RAM recommended
+- 50GB available storage space
+
+### Development Setup
+The model was developed and tested on:
+- NVIDIA Tesla T4 GPUs (2x)
+- CUDA 11.x
+- PyTorch with CUDA support
+
+Note: Training time may vary significantly with different hardware configurations. The model is optimized for multi-GPU training using DataParallel.
+
+## Installation
+1. Clone the repository:
+```bash
+git clone https://github.com/yourusername/leprosy-detection.git
+cd leprosy-detection
+```
+
+2. Create a virtual environment:
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+3. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+### Training the Model
+```bash
+python src/train.py
+```
+
+### Testing/Inference
+The model can be used for inference using the provided testing script:
+
+```bash
+python src/test.py
+```
+
+Key features of the testing module:
+- Supports batch processing of multiple images
+- Displays predictions with confidence scores
+- Visualizes results using matplotlib
+- Handles both CPU and GPU inference
+
+#### Testing Configuration
+```python
+# Example configuration
+model_path = 'best_custom_vit_mo.pth'
+num_classes = 2
+class_names = ['Leprosy', 'No Lep']
+
+# Image preprocessing parameters
+image_size = 224
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+```
+
+#### Custom Inference
+```python
+from model import CustomViT, load_model
+from utils import preprocess_image, predict
+
+# Load model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = load_model('best_custom_vit_mo.pth', num_classes=2, device=device)
+
+# Process single image
+image_tensor = preprocess_image('path/to/image.jpg', mean, std)
+category_id, probability = predict(model, image_tensor, device)
+```
+
+## Dataset
+The project uses a custom dataset format with COCO-style annotations:
+- Training, validation, and test sets are provided separately
+- Images are annotated with binary labels (Leprosy/Non-Leprosy)
+- Dataset is loaded using a custom `LeprosyDataset` class extending `torch.utils.data.Dataset`
+
+## Project Structure
+```
+leprosy-detection/
+├── src/
+│   ├── train.py           # Training script
+│   ├── test.py           # Inference script
+│ 
+├── data/
+│   ├── train/
+│   │   ├── images/
+│   │   └── _annotations.coco.json
+│   ├── valid/
+│   │   ├── images/
+│   │   └── _annotations.coco.json
+│   └── test/
+│       ├── images/
+│       └── _annotations.coco.json
+├── models/              # Saved model checkpoints
+├── results/            # Training results and visualizations
+├── docs/
+└── requirements.txt
+```
+
+## Model Architecture
+The system implements a Custom Vision Transformer (ViT) architecture specifically designed for leprosy detection:
+
+### Key Components
+- **Patch Embedding**: Converts input images (224x224) into patches (16x16) and projects them to the embedding dimension (768)
+- **Transformer Blocks**: 12 layers of transformer blocks with:
+  - Multi-head self-attention (12 heads)
+  - Layer normalization
+  - MLP with GELU activation
+  - Dropout for regularization
+- **Classification Head**: Final layer for binary classification (Leprosy vs Non-Leprosy)
+
+### Training Details
+- Batch Size: 32
+- Optimizer: Adam (learning rate: 0.0001)
+- Loss Function: Cross Entropy Loss
+- Training Duration: 20 epochs
+- Data Augmentation: Resize, Normalization (ImageNet stats)
+- Model Selection: Best model saved based on validation accuracy
+
+## Performance Metrics
+The model's performance is comprehensively evaluated using various metrics:
+- Training and validation metrics tracked per epoch
+- Confusion matrices generated for detailed error analysis
+- Final evaluation on test set includes:
+  - Accuracy
+  - Precision
+  - Recall (Sensitivity)
+  - F1 Score
+  - Loss values
+
+### Visualization
+- Training history plots showing:
+  - Loss curves (training and validation)
+  - Accuracy progression
+  - Precision, Recall, and F1 score trends
+- Confusion matrices for each epoch and final test results
+- All visualizations saved automatically with timestamps
+
+## Contributing
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'Add amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+
+## License
+This project is licensed under the MIT License - see the LICENSE file for details.
+
+## Acknowledgments
+- World Health Organization (WHO) for providing clinical guidelines
+- Contributing healthcare institutions for providing validated datasets
+- Research partners and medical professionals for expert guidance
+
+## Contact
+- Project Maintainer: [Mohak]
+- Email: [mohakgupta0981@gmail.com]
+- Project Link: https://github.com/lukiod/Levit
+
+## Disclaimer
+This tool is designed to assist healthcare professionals and should not be used as the sole basis for diagnosis. Always consult qualified medical professionals for proper diagnosis and treatment.
\ No newline at end of file
diff --git a/Computer Vision/Leprosy Detection/test.py b/Computer Vision/Leprosy Detection/test.py
new file mode 100644
index 0000000000..99c3cc2531
--- /dev/null
+++ b/Computer Vision/Leprosy Detection/test.py	
@@ -0,0 +1,180 @@
+import torch
+import torch.nn as nn
+from torchvision import transforms
+from PIL import Image
+import matplotlib.pyplot as plt
+
+# Define the CustomViT model (this should match your training model architecture)
+class PatchEmbedding(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768):
+        super().__init__()
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.n_patches = (img_size // patch_size) ** 2
+        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+    def forward(self, x):
+        x = self.proj(x)  # (B, embed_dim, H', W')
+        x = x.flatten(2)  # (B, embed_dim, H'*W')
+        x = x.transpose(1, 2)  # (B, H'*W', embed_dim)
+        return x
+
+class Attention(nn.Module):
+    def __init__(self, dim, n_heads=12, qkv_bias=True, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.n_heads = n_heads
+        self.scale = (dim // n_heads) ** -0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.n_heads, C // self.n_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv.unbind(0)
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+class TransformerBlock(nn.Module):
+    def __init__(self, dim, n_heads, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dim)
+        self.attn = Attention(dim, n_heads=n_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = nn.LayerNorm(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = nn.Sequential(
+            nn.Linear(dim, mlp_hidden_dim),
+            nn.GELU(),
+            nn.Dropout(drop),
+            nn.Linear(mlp_hidden_dim, dim),
+            nn.Dropout(drop)
+        )
+
+    def forward(self, x):
+        x = x + self.attn(self.norm1(x))
+        x = x + self.mlp(self.norm2(x))
+        return x
+
+class CustomViT(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=1000, embed_dim=768, depth=12, n_heads=12, mlp_ratio=4., qkv_bias=True, drop_rate=0.):
+        super().__init__()
+        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, 1 + self.patch_embed.n_patches, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        self.blocks = nn.ModuleList([
+            TransformerBlock(embed_dim, n_heads, mlp_ratio, qkv_bias, drop_rate, drop_rate)
+            for _ in range(depth)
+        ])
+
+        self.norm = nn.LayerNorm(embed_dim)
+        self.head = nn.Linear(embed_dim, num_classes)
+
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        for block in self.blocks:
+            x = block(x)
+
+        x = self.norm(x)
+        x = x[:, 0]
+        x = self.head(x)
+        return x
+
+def load_model(model_path, num_classes, device):
+    # Load the state dict
+    state_dict = torch.load(model_path, map_location=device, weights_only=True)
+    
+    # Check the number of classes in the saved model
+    saved_num_classes = state_dict['module.head.weight'].size(0)
+    
+    # Initialize the model with the correct number of classes
+    model = CustomViT(num_classes=saved_num_classes)
+    model = nn.DataParallel(model)
+    
+    # Load the state dict
+    model.load_state_dict(state_dict)
+    
+    # If the number of classes doesn't match, replace the head
+    if saved_num_classes != num_classes:
+        print(f"Warning: Number of classes in saved model ({saved_num_classes}) "
+              f"doesn't match the specified number of classes ({num_classes}). "
+              "Replacing the classification head.")
+        model.module.head = nn.Linear(768, num_classes)  # Assuming embed_dim is 768
+    
+    model.to(device)
+    model.eval()
+    return model
+
+def preprocess_image(image_path, mean, std):
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=mean, std=std)
+    ])
+    image = Image.open(image_path).convert('RGB')
+    return transform(image).unsqueeze(0)
+
+def predict(model, image_tensor, device):
+    with torch.no_grad():
+        outputs = model(image_tensor.to(device))
+        _, predicted = outputs.max(1)
+        probability = torch.nn.functional.softmax(outputs, dim=1)[0]
+        return predicted.item(), probability[predicted.item()].item()
+
+def display_prediction(image_path, category_id, probability, class_names):
+    image = Image.open(image_path)
+    plt.figure(figsize=(10, 10))
+    plt.imshow(image)
+    plt.axis('off')
+    class_name = class_names[category_id] if class_names else f"Category {category_id}"
+    plt.title(f"Predicted: {class_name}\nProbability: {probability:.2f}")
+    plt.show()
+
+def test_model(model_path, num_classes, image_paths, class_names):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+    
+    model = load_model(model_path, num_classes, device)
+    
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    
+    for image_path in image_paths:
+        try:
+            image_tensor = preprocess_image(image_path, mean, std)
+            category_id, probability = predict(model, image_tensor, device)
+            display_prediction(image_path, category_id, probability, class_names)
+        except Exception as e:
+            print(f"Error processing image {image_path}: {e}")
+
+if __name__ == "__main__":
+    model_path = 'best_custom_vit_mo50.pth'
+    num_classes = 2  # The number of classes you expect
+    
+    # Specify your image paths here
+    image_paths = [
+        '/kaggle/input/cocoform/train/Non-lep-_210823_20_jpg.rf.507c4cfff3f2d5cd03271d4383b5cf7d.jpg',
+        
+    ]
+    
+    # Specify your class names here
+    class_names = ['Leprosy','No Lep']  # Update this based on your actual classes
+    
+    test_model(model_path, num_classes, image_paths, class_names)
\ No newline at end of file
diff --git a/Computer Vision/Leprosy Detection/train.py b/Computer Vision/Leprosy Detection/train.py
new file mode 100644
index 0000000000..a2e069f7b8
--- /dev/null
+++ b/Computer Vision/Leprosy Detection/train.py	
@@ -0,0 +1,383 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms
+from PIL import Image
+import os
+import json
+from tqdm import tqdm
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from datetime import datetime
+
+class PatchEmbedding(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dim=768):
+        super().__init__()
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.n_patches = (img_size // patch_size) ** 2
+        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+    def forward(self, x):
+        x = self.proj(x)
+        x = x.flatten(2)
+        x = x.transpose(1, 2)
+        return x
+
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = head_dim ** -0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]
+
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+class TransformerBlock(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dim)
+        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        self.norm2 = nn.LayerNorm(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = nn.Sequential(
+            nn.Linear(dim, mlp_hidden_dim),
+            nn.GELU(),
+            nn.Dropout(drop),
+            nn.Linear(mlp_hidden_dim, dim),
+            nn.Dropout(drop)
+        )
+
+    def forward(self, x):
+        x = x + self.attn(self.norm1(x))
+        x = x + self.mlp(self.norm2(x))
+        return x
+
+class CustomViT(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4., qkv_bias=True, drop_rate=0.1):
+        super().__init__()
+        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
+        num_patches = self.patch_embed.n_patches
+
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        self.blocks = nn.ModuleList([
+            TransformerBlock(embed_dim, num_heads, mlp_ratio, qkv_bias, drop_rate, drop_rate)
+            for _ in range(depth)
+        ])
+
+        self.norm = nn.LayerNorm(embed_dim)
+        self.head = nn.Linear(embed_dim, num_classes)
+
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        for block in self.blocks:
+            x = block(x)
+
+        x = self.norm(x)
+        x = x[:, 0]
+        x = self.head(x)
+        return x
+
+class LeprosyDataset(Dataset):
+    def __init__(self, img_dir, annotations_file, transform=None):
+        self.img_dir = img_dir
+        self.transform = transform
+        with open(annotations_file, 'r') as f:
+            self.annotations = json.load(f)
+        self.images = self.annotations['images']
+        self.categories = {0: "Leprosy", 1: "Non Leprosy"}
+        self.img_to_label = {}
+        
+        for ann in self.annotations['annotations']:
+            original_category = ann['category_id']
+            binary_label = 0 if original_category in [0, 1] else 1
+            self.img_to_label[ann['image_id']] = binary_label
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getitem__(self, idx):
+        img_info = self.images[idx]
+        img_path = os.path.join(self.img_dir, img_info['file_name'])
+        image = Image.open(img_path).convert('RGB')
+        
+        if self.transform:
+            image = self.transform(image)
+        
+        label = self.img_to_label[img_info['id']]
+        return image, label
+
+def calculate_metrics(y_true, y_pred):
+    return {
+        'accuracy': accuracy_score(y_true, y_pred),
+        'precision': precision_score(y_true, y_pred, average='binary'),
+        'recall': recall_score(y_true, y_pred, average='binary'),
+        'f1': f1_score(y_true, y_pred, average='binary'),
+        'confusion_matrix': confusion_matrix(y_true, y_pred)
+    }
+
+def plot_confusion_matrix(cm, classes, title='Confusion Matrix'):
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
+    plt.title(title)
+    plt.ylabel('True Label')
+    plt.xlabel('Predicted Label')
+    plt.tight_layout()
+    
+    # Save the plot
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    plt.savefig(f'confusion_matrix_{timestamp}.png')
+    plt.close()
+
+def train_epoch(model, loader, criterion, optimizer, device):
+    model.train()
+    running_loss = 0.0
+    all_labels = []
+    all_predictions = []
+    
+    for inputs, labels in tqdm(loader, desc="Training"):
+        inputs, labels = inputs.to(device), labels.to(device)
+        
+        optimizer.zero_grad()
+        outputs = model(inputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+        optimizer.step()
+        
+        running_loss += loss.item()
+        _, predicted = outputs.max(1)
+        
+        all_labels.extend(labels.cpu().numpy())
+        all_predictions.extend(predicted.cpu().numpy())
+    
+    metrics = calculate_metrics(all_labels, all_predictions)
+    return running_loss / len(loader), metrics
+
+def validate(model, loader, criterion, device):
+    model.eval()
+    running_loss = 0.0
+    all_labels = []
+    all_predictions = []
+    
+    with torch.no_grad():
+        for inputs, labels in tqdm(loader, desc="Validating"):
+            inputs, labels = inputs.to(device), labels.to(device)
+            
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            
+            running_loss += loss.item()
+            _, predicted = outputs.max(1)
+            
+            all_labels.extend(labels.cpu().numpy())
+            all_predictions.extend(predicted.cpu().numpy())
+    
+    metrics = calculate_metrics(all_labels, all_predictions)
+    return running_loss / len(loader), metrics
+
+def main():
+    # Set up transforms
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+
+    # Create datasets and dataloaders
+    train_dataset = LeprosyDataset('/kaggle/input/cocoform/train', 
+                                 '/kaggle/input/cocoform/train/_annotations.coco.json', 
+                                 transform=transform)
+    val_dataset = LeprosyDataset('/kaggle/input/cocoform/valid', 
+                               '/kaggle/input/cocoform/valid/_annotations.coco.json', 
+                               transform=transform)
+    test_dataset = LeprosyDataset('/kaggle/input/cocoform/test', 
+                                '/kaggle/input/cocoform/test/_annotations.coco.json', 
+                                transform=transform)
+
+    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
+    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
+    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
+
+    # Initialize model and move to device
+    model = CustomViT(num_classes=2)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = nn.DataParallel(model)
+    model = model.to(device)
+
+    # Set up loss and optimizer
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.0001)
+
+    # Training loop with detailed metrics
+    num_epochs = 20
+    best_val_acc = 0
+    all_metrics = []
+
+    print("Training Started...")
+    print("-" * 120)
+    print(f"{'Epoch':^6} | {'Train Loss':^10} | {'Train Acc':^9} | {'Train Prec':^10} | {'Train Rec':^9} | {'Train F1':^8} | "
+          f"{'Val Loss':^8} | {'Val Acc':^7} | {'Val Prec':^8} | {'Val Rec':^7} | {'Val F1':^6}")
+    print("-" * 120)
+
+    for epoch in range(num_epochs):
+        train_loss, train_metrics = train_epoch(model, train_loader, criterion, optimizer, device)
+        val_loss, val_metrics = validate(model, val_loader, criterion, device)
+        
+        # Store metrics
+        epoch_metrics = {
+            'epoch': epoch + 1,
+            'train_loss': train_loss,
+            'train_metrics': train_metrics,
+            'val_loss': val_loss,
+            'val_metrics': val_metrics
+        }
+        all_metrics.append(epoch_metrics)
+        
+        # Print metrics
+        print(f"{epoch+1:6d} | {train_loss:10.4f} | {train_metrics['accuracy']:9.4f} | "
+              f"{train_metrics['precision']:10.4f} | {train_metrics['recall']:9.4f} | {train_metrics['f1']:8.4f} | "
+              f"{val_loss:8.4f} | {val_metrics['accuracy']:7.4f} | {val_metrics['precision']:8.4f} | "
+              f"{val_metrics['recall']:7.4f} | {val_metrics['f1']:6.4f}")
+        
+        # Save best model
+        if val_metrics['accuracy'] > best_val_acc:
+            best_val_acc = val_metrics['accuracy']
+            torch.save(model.state_dict(), 'best_custom_vit_mo.pth')
+            print("✓ New best model saved!")
+
+        # Plot confusion matrix for this epoch
+        plot_confusion_matrix(train_metrics['confusion_matrix'], 
+                            classes=["Leprosy", "Non-Leprosy"],
+                            title=f'Training Confusion Matrix - Epoch {epoch+1}')
+
+    print("-" * 120)
+    print("Training Complete!")
+
+    # Test evaluation
+    print("\nEvaluating Best Model on Test Set...")
+    model.load_state_dict(torch.load('best_custom_vit_mo.pth'))
+    test_loss, test_metrics = validate(model, test_loader, criterion, device)
+
+    print("\nFinal Test Results:")
+    print("-" * 50)
+    print(f"Test Loss:      {test_loss:.4f}")
+    print(f"Test Accuracy:  {test_metrics['accuracy']:.4f}")
+    print(f"Test Precision: {test_metrics['precision']:.4f}")
+    print(f"Test Recall:    {test_metrics['recall']:.4f}")
+    print(f"Test F1-Score:  {test_metrics['f1']:.4f}")
+    print("-" * 50)
+
+    # Plot final test confusion matrix
+    plot_confusion_matrix(test_metrics['confusion_matrix'], 
+                        classes=["Leprosy", "Non-Leprosy"],
+                        title='Final Test Confusion Matrix')
+
+    # Save all metrics to file
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    with open(f'training_metrics_{timestamp}.txt', 'w') as f:
+        f.write("Training Metrics Summary\n")
+        f.write("=" * 50 + "\n\n")
+        
+        # Write epoch-wise metrics
+        f.write("Epoch-wise Metrics:\n")
+        f.write("-" * 50 + "\n")
+        for metric in all_metrics:
+            f.write(f"Epoch {metric['epoch']}:\n")
+            f.write(f"  Training:\n")
+            f.write(f"    Loss: {metric['train_loss']:.4f}\n")
+            f.write(f"    Accuracy: {metric['train_metrics']['accuracy']:.4f}\n")
+            f.write(f"    Precision: {metric['train_metrics']['precision']:.4f}\n")
+            f.write(f"    Recall: {metric['train_metrics']['recall']:.4f}\n")
+            f.write(f"    F1-Score: {metric['train_metrics']['f1']:.4f}\n")
+            f.write(f"  Validation:\n")
+            f.write(f"    Loss: {metric['val_loss']:.4f}\n")
+            f.write(f"    Accuracy: {metric['val_metrics']['accuracy']:.4f}\n")
+            f.write(f"    Precision: {metric['val_metrics']['precision']:.4f}\n")
+            f.write(f"    Recall: {metric['val_metrics']['recall']:.4f}\n")
+            f.write(f"    F1-Score: {metric['val_metrics']['f1']:.4f}\n")
+            f.write("\n")
+        
+        # Write final test metrics
+        f.write("\nFinal Test Metrics:\n")
+        f.write("-" * 50 + "\n")
+        f.write(f"Loss: {test_loss:.4f}\n")
+        f.write(f"Accuracy: {test_metrics['accuracy']:.4f}\n")
+        f.write(f"Precision: {test_metrics['precision']:.4f}\n")
+        f.write(f"Recall: {test_metrics['recall']:.4f}\n")
+        f.write(f"F1-Score: {test_metrics['f1']:.4f}\n")
+
+    # Plot training history
+    plt.figure(figsize=(12, 8))
+    epochs = range(1, num_epochs + 1)
+    
+    # Plot training metrics
+    plt.subplot(2, 2, 1)
+    plt.plot([m['train_loss'] for m in all_metrics], label='Train Loss')
+    plt.plot([m['val_loss'] for m in all_metrics], label='Val Loss')
+    plt.title('Loss History')
+    plt.xlabel('Epoch')
+    plt.ylabel('Loss')
+    plt.legend()
+    
+    plt.subplot(2, 2, 2)
+    plt.plot([m['train_metrics']['accuracy'] for m in all_metrics], label='Train Accuracy')
+    plt.plot([m['val_metrics']['accuracy'] for m in all_metrics], label='Val Accuracy')
+    plt.title('Accuracy History')
+    plt.xlabel('Epoch')
+    plt.ylabel('Accuracy')
+    plt.legend()
+    
+    plt.subplot(2, 2, 3)
+    plt.plot([m['train_metrics']['precision'] for m in all_metrics], label='Train Precision')
+    plt.plot([m['train_metrics']['recall'] for m in all_metrics], label='Train Recall')
+    plt.plot([m['train_metrics']['f1'] for m in all_metrics], label='Train F1')
+    plt.title('Training Metrics History')
+    plt.xlabel('Epoch')
+    plt.ylabel('Score')
+    plt.legend()
+    
+    plt.subplot(2, 2, 4)
+    plt.plot([m['val_metrics']['precision'] for m in all_metrics], label='Val Precision')
+    plt.plot([m['val_metrics']['recall'] for m in all_metrics], label='Val Recall')
+    plt.plot([m['val_metrics']['f1'] for m in all_metrics], label='Val F1')
+    plt.title('Validation Metrics History')
+    plt.xlabel('Epoch')
+    plt.ylabel('Score')
+    plt.legend()
+    
+    plt.tight_layout()
+    plt.savefig(f'training_history_{timestamp}.png')
+    plt.close()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file