From d752b8fbf3caacb408259909d66ded4f6ea7204a Mon Sep 17 00:00:00 2001
From: KrishanYadav333 <kryshan753@gmail.com>
Date: Fri, 6 Feb 2026 13:57:22 +0530
Subject: [PATCH 1/3] Pre-GSoC: Multi-dimensional location proximity framework
 with documentation, code stubs, and tests

This PR establishes the foundation for multi-dimensional location-proximity analysis
in DREAMS, building upon existing EXIF extraction (PR #77) and emotion proximity (PR #70).

## Documentation (9 new/updated files)
- docs/api_design.md: REST API specification for location-proximity endpoints
- docs/evaluation_metrics.md: Quantitative metrics and ablation study plan
- docs/exif_extraction_research.md: Library comparison research (informed PR #77)
- docs/integration_guide.md: Step-by-step integration instructions
- docs/project_roadmap.md: GSoC 2026 timeline aligned with official dates (350h)
- docs/risk_analysis.md: Risk matrix and mitigation strategies
- docs/TEST_PLAN.md: Extended with 50+ location-proximity test cases
- plans/pre_gsoc_contribution_plan.md: 7-week, 18-PR contribution roadmap
- dreamsApp/docs/data-model.md: Added location_analysis and emotion_location_entries collections

## Code Implementation
- dreamsApp/exif_extractor.py: NEW - Complete EXIF extraction with dual-library fallback
- dreamsApp/location_proximity.py: Updated stubs with EXIFExtractor integration
- ARCHITECTURE.md: Updated diagram to show integration with PR #77 and #70
- LOCATION_PROXIMITY_SUMMARY.md: Added acknowledgment of existing work

## Tests
- tests/test_exif_extraction.py: NEW - Unit tests for EXIF extractor with mocking

## Code Quality
- Removed emojis from entire project (8 files) for professional documentation
- data_integrity/reporter.py: Replaced emoji indicators with text
- dream-integration/app/templates/index.html: Replaced emoji UI elements
- dreamsApp/app/dashboard/main.py: Removed emoji comments

## Integration Points
- Builds upon PR #77 (kunal-595): Uses existing EXIFExtractor class
- Complements PR #70 (AnvayKharb): Adds spatial proximity to time-aware emotion analysis
- Aligns with PR #79 (anish1206): Emotion-location work supports CHIME framework

## Key Features
- Multi-dimensional proximity: geographic + categorical + linguistic + cultural
- Emotion-location hotspot detection
- Semantic clustering with DBSCAN
- MongoDB schema extensions for location data
- Performance benchmarks and evaluation framework

Total: 10 new files, 9 updated files, 350 hours planned for GSoC 2026 implementation
---
 .gitignore                                 |   1 +
 ARCHITECTURE.md                            |   8 +-
 LOCATION_PROXIMITY_SUMMARY.md              |  11 +-
 data_integrity/reporter.py                 |   4 +-
 docs/TEST_PLAN.md                          | 246 ++++++++++
 docs/api_design.md                         | 447 ++++++++++++++++++
 docs/evaluation_metrics.md                 | 380 ++++++++++++++++
 docs/exif_extraction_research.md           | 205 +++++++++
 docs/integration_guide.md                  | 498 +++++++++++++++++++++
 docs/project_roadmap.md                    | 463 +++++++++++++++++++
 docs/risk_analysis.md                      | 404 +++++++++++++++++
 dream-integration/app/templates/index.html |   8 +-
 dreamsApp/app/dashboard/main.py            |   2 +-
 dreamsApp/docs/data-model.md               | 109 +++++
 dreamsApp/exif_extractor.py                | 172 +++++++
 dreamsApp/location_proximity.py            |  11 +-
 location_proximity/README.md               |   9 +-
 plans/pre_gsoc_contribution_plan.md        | 197 ++++++++
 tests/test_exif_extraction.py              |  70 +++
 19 files changed, 3227 insertions(+), 18 deletions(-)
 create mode 100644 docs/api_design.md
 create mode 100644 docs/evaluation_metrics.md
 create mode 100644 docs/exif_extraction_research.md
 create mode 100644 docs/integration_guide.md
 create mode 100644 docs/project_roadmap.md
 create mode 100644 docs/risk_analysis.md
 create mode 100644 dreamsApp/exif_extractor.py
 create mode 100644 plans/pre_gsoc_contribution_plan.md
 create mode 100644 tests/test_exif_extraction.py

diff --git a/.gitignore b/.gitignore
index f14c4d8..2ba88cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,6 +38,7 @@ MANIFEST
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
+docs/PR_SUMMARY.md
 *.spec
 
 # Installer logs
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index f1382aa..18df4f5 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -40,14 +40,14 @@ Dreams
                                  │
                                  ▼
 ┌────────────────────────────────────────────────────────────────┐
-│                    LOCATION-PROXIMITY MODULE                   │
-│                      (Your Contribution)                       │
+│               LOCATION-PROXIMITY MODULE (GSoC 2026)            │
+│         Building upon PR #77 (EXIF) & PR #70 (Emotion)        │
 │                                                                │
 │  ┌──────────────────────────────────────────────────────────┐  │
-│  │  1. Location Extractor                                   │  │
+│  │  1. Location Extractor (uses existing EXIFExtractor)     │  │
 │  │     Input: Image file                                    │  │
 │  │     Output: {lat, lon, timestamp}                        │  │ 
-│  │     Tech: Pillow EXIF parsing                            │  │
+│  │     Tech: Pillow EXIF parsing (from PR #77)              │  │
 │  └──────────────────────────────────────────────────────────┘  │
 │                              │                                 │
 │                              ▼                                 │
diff --git a/LOCATION_PROXIMITY_SUMMARY.md b/LOCATION_PROXIMITY_SUMMARY.md
index 0c387f6..843840b 100644
--- a/LOCATION_PROXIMITY_SUMMARY.md
+++ b/LOCATION_PROXIMITY_SUMMARY.md
@@ -6,6 +6,11 @@ A new module for DREAMS that analyzes **multi-dimensional location proximity** t
 
 **Key Innovation**: Goes beyond GPS coordinates to consider categorical, linguistic, and cultural dimensions of location similarity.
 
+**Building Upon Existing Work**:
+- **PR #77** (kunal-595): EXIF GPS extraction - we use `dreamsApp/exif_extractor.py` for location data
+- **PR #70** (AnvayKharb): Time-aware emotion proximity - we integrate with `analytics/emotion_proximity.py`
+- **Our Contribution**: Multi-dimensional spatial proximity analysis (geographic + categorical + linguistic + cultural)
+
 ---
 
 ## Module Location
@@ -41,8 +46,8 @@ DEMO 1: Multi-Dimensional Proximity Calculation
 St. Mary's Church ↔ Holy Trinity Church : 0.850
 Alaska Native Medical Center ↔ Providence Hospital : 0.725
 
-✓ Notice: Two churches have high proximity despite different locations
-✓ Notice: Two hospitals cluster together semantically
+Notice: Two churches have high proximity despite different locations
+Notice: Two hospitals cluster together semantically
 ```
 
 ---
@@ -255,6 +260,6 @@ Same as DREAMS project (see [LICENSE](LICENSE))
 
 ---
 
-**Status**: ✅ Ready for integration and testing  
+**Status**: Ready for integration and testing  
 **Version**: 0.1.0  
 **Last Updated**: 2024
diff --git a/data_integrity/reporter.py b/data_integrity/reporter.py
index 76cf559..d523ade 100644
--- a/data_integrity/reporter.py
+++ b/data_integrity/reporter.py
@@ -78,7 +78,7 @@ def to_dict(self) -> dict:
     def format_summary(self) -> str:
         """Generate human-readable summary."""
         if not self.issues:
-            return "✓ All validation checks passed."
+            return "All validation checks passed."
         
         counts = self.count_by_severity()
         lines = [
@@ -88,7 +88,7 @@ def format_summary(self) -> str:
         ]
         
         for issue in self.issues:
-            icon = "✗" if issue.severity == Severity.ERROR else "⚠" if issue.severity == Severity.WARNING else "ℹ"
+            icon = "X" if issue.severity == Severity.ERROR else "!" if issue.severity == Severity.WARNING else "i"
             location_str = f" [{issue.location}]" if issue.location else ""
             lines.append(f"\n{icon} {issue.severity.value} ({issue.category}){location_str}")
             lines.append(f"  {issue.message}")
diff --git a/docs/TEST_PLAN.md b/docs/TEST_PLAN.md
index 5a3a43c..7eb50a0 100644
--- a/docs/TEST_PLAN.md
+++ b/docs/TEST_PLAN.md
@@ -249,7 +249,253 @@ Comprehensive testing strategy for the location-proximity analysis module, cover
 **Input**: Antipodal points (opposite sides of Earth)
 **Expected Output**: Geographic proximity ≈ 0.0
 **Priority**: Low
+#### Test Case: PC-EC-003
+**Description**: Missing dimensions (no cultural tags)
+**Input**: Places without cultural_tags field
+**Expected Output**: Cultural similarity defaults to 0.0, weights redistributed
+**Priority**: Medium
+
+#### Test Case: PC-EC-004
+**Description**: Zero weight dimension
+**Input**: Composite proximity with one dimension weight = 0
+**Expected Output**: Excluded dimension ignored, other weights sum to 1.0
+**Priority**: Medium
+
+---
+
+## Clustering Test Cases
+
+### Unit Tests - DBSCAN Clustering
+
+#### Test Case: CL-UT-001
+**Description**: Cluster homogeneous place types
+**Input**: 9 locations (3 parks, 3 hospitals, 3 churches) from `tests/data/locations.json`
+**Expected Output**: 3 clusters, each containing same place type
+**Validation**:
+- Cluster 0: [park_001, park_002, park_003]
+- Cluster 1: [hospital_001, hospital_002, hospital_003]
+- Cluster 2: [church_001, church_002, church_003]
+**Priority**: Critical
+
+#### Test Case: CL-UT-002
+**Description**: DBSCAN parameter sensitivity
+**Input**: Same 9 locations with varying eps (0.2, 0.4, 0.6)
+**Expected Output**: 
+- eps=0.2: More clusters (over-segmentation)
+- eps=0.4: 3 clean clusters (optimal)
+- eps=0.6: Fewer clusters (under-segmentation)
+**Priority**: High
+
+#### Test Case: CL-UT-003
+**Description**: Noise point detection
+**Input**: 9 locations + 2 outliers with unique attributes
+**Expected Output**: Outliers labeled as noise (cluster_id = -1)
+**Priority**: Medium
+
+#### Test Case: CL-UT-004
+**Description**: Minimum cluster size enforcement
+**Input**: min_samples=3, locations with 2 similar + 1 outlier
+**Expected Output**: Group of 2 not forming cluster (below threshold)
+**Priority**: Medium
+
+### Integration Tests - Clustering with Emotions
+
+#### Test Case: CL-IT-001
+**Description**: Cluster emotion profile aggregation
+**Input**: 
+- 9 locations clustered into 3 groups
+- Sentiment data from `tests/data/sentiments.json`
+**Expected Output**:
+- Church cluster: 80%+ positive emotions
+- Hospital cluster: 60%+ negative emotions
+- Park cluster: 70%+ positive emotions
+**Priority**: Critical
+
+#### Test Case: CL-IT-002
+**Description**: Temporal emotion evolution within cluster
+**Input**: Cluster with visits across 2 months
+**Expected Output**: Timeline showing emotion trend over time
+**Priority**: Medium
+
+### Quality Metrics Tests
+
+#### Test Case: CL-QM-001
+**Description**: Silhouette score calculation
+**Input**: Clustered locations with proximity matrix
+**Expected Output**: Silhouette score > 0.5 (good separation)
+**Priority**: High
+
+#### Test Case: CL-QM-002
+**Description**: Davies-Bouldin index
+**Input**: Clustered locations
+**Expected Output**: DB index < 1.0 (tight, well-separated clusters)
+**Priority**: Medium
+
+#### Test Case: CL-QM-003
+**Description**: Clustering purity
+**Input**: Predicted clusters vs. ground truth (place types)
+**Expected Output**: Purity > 0.80 (accurate grouping)
+**Priority**: High
+
+---
+
+## Emotion-Location Pattern Detection
+
+### Hotspot Detection Tests
+
+#### Test Case: HS-UT-001
+**Description**: Positive emotional hotspot identification
+**Input**: Location with 5 visits, 4 positive (80%), 1 neutral
+**Expected Output**: Identified as positive hotspot (confidence=0.80)
+**Min Visits**: 3
+**Min Confidence**: 0.60
+**Priority**: Critical
+
+#### Test Case: HS-UT-002
+**Description**: Negative emotional hotspot identification
+**Input**: Hospital with 6 visits, 5 negative (83%), 1 neutral
+**Expected Output**: Identified as negative hotspot (confidence=0.83)
+**Priority**: Critical
+
+#### Test Case: HS-UT-003
+**Description**: Insufficient visits - no hotspot
+**Input**: Location with 2 visits (below min_visits=3)
+**Expected Output**: Not classified as hotspot
+**Priority**: Medium
+
+#### Test Case: HS-UT-004
+**Description**: Mixed emotions - no dominant sentiment
+**Input**: Location with balanced emotions (33% each)
+**Expected Output**: No hotspot (confidence < 0.60 threshold)
+**Priority**: Medium
+
+### Place-Type Emotion Comparison
+
+#### Test Case: PT-UT-001
+**Description**: Aggregate emotions by place type
+**Input**: All church visits from `tests/data/sentiments.json`
+**Expected Output**: 
+- Mean positive score: 0.82
+- Dominant sentiment: positive (>75%)
+**Priority**: High
+
+#### Test Case: PT-UT-002
+**Description**: Statistical significance test
+**Input**: Church emotions vs. Hospital emotions
+**Expected Output**: t-test p-value < 0.05 (significantly different)
+**Priority**: Medium
+
+### Temporal Emotion Trends
+
+#### Test Case: TE-UT-001
+**Description**: Weekly emotion aggregation
+**Input**: Location with 8 visits across 4 weeks
+**Expected Output**: 
+- Week 1-4 emotion distribution per week
+- Trend direction (improving/declining/stable)
+**Priority**: Medium
+
+#### Test Case: TE-UT-002
+**Description**: Seasonal pattern detection
+**Input**: Year-long visit history at location
+**Expected Output**: Identify seasonal variations (e.g., positive in summer)
+**Priority**: Low (future enhancement)
+
+---
+
+## End-to-End Integration Tests
+
+### Test Case: E2E-001
+**Description**: Complete photo upload to dashboard pipeline
+**Steps**:
+1. Upload photo with GPS EXIF data
+2. Extract location and sentiment
+3. Store in MongoDB
+4. Compute proximity to existing locations
+5. Update location_analysis collection
+6. Trigger clustering if threshold met
+7. Display on dashboard
+
+**Expected Results**:
+- Photo processed < 3 seconds
+- Location extracted correctly
+- Proximity scores computed for nearby locations
+- Dashboard shows updated analysis within 5 seconds
+
+**Priority**: Critical
+
+### Test Case: E2E-002
+**Description**: No GPS fallback to manual location
+**Steps**:
+1. Upload photo without GPS data
+2. System prompts for manual location
+3. User provides coordinates
+4. Pipeline continues normally
+
+**Expected Results**:
+- Graceful handling of missing GPS
+- Manual location stored with accuracy='manual'
+- All analysis proceeds as normal
+
+**Priority**: High
+
+### Test Case: E2E-003
+**Description**: Real-time dashboard updates
+**Steps**:
+1. User has existing location analysis dashboard open
+2. Upload new photo at new location
+3. Dashboard refreshes automatically or shows update notification
 
+**Expected Results**:
+- New location appears on map
+- Cluster assignments updated if applicable
+- Hotspots recalculated
+
+**Priority**: Medium
+
+---
+
+## Performance & Load Testing
+
+### Test Case: PERF-001
+**Description**: Upload processing time benchmark
+**Input**: Single photo upload with location
+**Expected**: Complete processing < 3 seconds
+**Measurement**: Average over 100 uploads
+**Priority**: Critical
+
+### Test Case: PERF-002
+**Description**: Proximity calculation latency
+**Input**: Compute proximity between 2 locations
+**Expected**: < 100 milliseconds
+**Measurement**: Average over 1000 calculations
+**Priority**: High
+
+### Test Case: PERF-003
+**Description**: Clustering performance scaling
+**Input**: Varying number of locations (10, 50, 100, 500)
+**Expected**: 
+- 100 locations: < 2 seconds
+- 500 locations: < 10 seconds
+**Priority**: High
+
+### Test Case: PERF-004
+**Description**: Dashboard load time
+**Input**: Request location analysis dashboard
+**Expected**: Initial load < 1 second (excluding map tiles)
+**Priority**: Medium
+
+### Test Case: LOAD-001
+**Description**: Concurrent upload handling
+**Input**: 100 simultaneous photo uploads
+**Expected**: All complete successfully, average time < 5 seconds
+**Priority**: High
+
+### Test Case: LOAD-002
+**Description**: Database query performance under load
+**Input**: 50 concurrent dashboard requests
+**Expected**: All respond < 2 seconds
+**Priority**: Medium
 #### Test Case: PC-EC-003
 **Description**: Missing attribute handling
 **Input**: Location with missing 'type' field
diff --git a/docs/api_design.md b/docs/api_design.md
new file mode 100644
index 0000000..99f371c
--- /dev/null
+++ b/docs/api_design.md
@@ -0,0 +1,447 @@
+# DREAMS API Design - Location Proximity & Emotion Analysis
+
+## Overview
+
+This document outlines the REST API design for multi-dimensional location-proximity analysis within DREAMS. The API builds upon:
+
+- **Existing EXIF extraction** (PR #77 by kunal-595): GPS coordinate extraction from image metadata
+- **Existing emotion proximity** (PR #70 by AnvayKharb): Time-aware emotion timeline comparison
+
+Our API adds **spatial proximity endpoints** for geographic clustering, place-type similarity, and emotion-location mapping.
+
+---
+
+## API Endpoints
+
+### 1. Ingestion & Analysis Endpoints
+
+#### POST `/api/upload`
+**Description**: Upload photo with caption, extract location, analyze sentiment, and compute proximity patterns.
+
+**Request**:
+```json
+{
+  "user_id": "string",
+  "image": "base64_encoded_image",
+  "caption": "string",
+  "timestamp": "ISO8601_datetime",
+  "manual_location": {  // Optional fallback if no EXIF GPS
+    "lat": 61.2181,
+    "lon": -149.9003
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "post_id": "string",
+  "sentiment": {
+    "label": "positive|neutral|negative",
+    "score": 0.85
+  },
+  "location": {
+    "lat": 61.2181,
+    "lon": -149.9003,
+    "accuracy": "high|medium|low|none",
+    "place_type": "park",  // Inferred or manual
+    "nearby_locations": [
+      {
+        "location_id": "string",
+        "distance_meters": 150.5,
+        "proximity_score": 0.75
+      }
+    ]
+  },
+  "keywords": ["keyword1", "keyword2"],
+  "processing_time_ms": 1234
+}
+```
+
+**Integration Point**: `dreamsApp/app/ingestion/routes.py`
+
+---
+
+#### GET `/api/location/proximity`
+**Description**: Calculate multi-dimensional proximity between two locations.
+
+**Query Parameters**:
+- `location1_id` (string): First location ID
+- `location2_id` (string): Second location ID
+- `weights` (optional string): JSON object `{"geo": 0.3, "cat": 0.4, "ling": 0.15, "cult": 0.15}`
+
+**Response**:
+```json
+{
+  "location1": {
+    "id": "park_001",
+    "name": "Delaney Park Strip",
+    "type": "park"
+  },
+  "location2": {
+    "id": "park_002",
+    "name": "Chugach State Park",
+    "type": "park"
+  },
+  "proximity_scores": {
+    "geographic": 0.45,
+    "categorical": 1.0,
+    "linguistic": 1.0,
+    "cultural": 0.67,
+    "composite": 0.78
+  },
+  "distance_meters": 8542.3
+}
+```
+
+---
+
+#### POST `/api/location/cluster`
+**Description**: Cluster user's locations using multi-dimensional proximity.
+
+**Request**:
+```json
+{
+  "user_id": "string",
+  "method": "dbscan|kmeans",
+  "params": {
+    "eps": 0.4,
+    "min_samples": 2
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "clusters": [
+    {
+      "cluster_id": 0,
+      "label": "Parks & Recreation",
+      "members": ["park_001", "park_002", "park_003"],
+      "centroid": {"lat": 61.19, "lon": -149.88},
+      "emotion_profile": {
+        "positive": 0.75,
+        "neutral": 0.15,
+        "negative": 0.10
+      }
+    }
+  ],
+  "noise_points": ["location_xyz"],
+  "silhouette_score": 0.68
+}
+```
+
+---
+
+### 2. Emotion-Location Query Endpoints
+
+#### GET `/api/location/{location_id}/emotions`
+**Description**: Get emotion profile for a specific location.
+
+**Response**:
+```json
+{
+  "location_id": "church_001",
+  "name": "St. Mary's Catholic Church",
+  "total_visits": 5,
+  "emotion_distribution": {
+    "positive": 0.80,
+    "neutral": 0.15,
+    "negative": 0.05
+  },
+  "mean_score": 0.82,
+  "timeline": [
+    {
+      "timestamp": "2024-01-21T10:00:00Z",
+      "sentiment": "positive",
+      "score": 0.88
+    }
+  ]
+}
+```
+
+---
+
+#### GET `/api/location/hotspots`
+**Description**: Find emotional hotspots for a user.
+
+**Query Parameters**:
+- `user_id` (string): User ID
+- `sentiment` (string): Filter by `positive|neutral|negative`
+- `min_confidence` (float): Minimum confidence threshold (default 0.6)
+- `min_visits` (int): Minimum visits required (default 3)
+
+**Response**:
+```json
+{
+  "hotspots": [
+    {
+      "location_id": "church_001",
+      "name": "St. Mary's Catholic Church",
+      "sentiment": "positive",
+      "confidence": 0.80,
+      "visit_count": 5,
+      "coordinates": {"lat": 61.2167, "lon": -149.8944}
+    }
+  ]
+}
+```
+
+---
+
+#### GET `/api/location/place-type-comparison`
+**Description**: Compare emotions across place types.
+
+**Query Parameters**:
+- `user_id` (string): User ID
+
+**Response**:
+```json
+{
+  "place_types": {
+    "church": {
+      "positive": 0.85,
+      "neutral": 0.10,
+      "negative": 0.05,
+      "mean_score": 0.82,
+      "visit_count": 8
+    },
+    "hospital": {
+      "positive": 0.15,
+      "neutral": 0.20,
+      "negative": 0.65,
+      "mean_score": 0.31,
+      "visit_count": 6
+    },
+    "park": {
+      "positive": 0.70,
+      "neutral": 0.20,
+      "negative": 0.10,
+      "mean_score": 0.75,
+      "visit_count": 10
+    }
+  }
+}
+```
+
+---
+
+### 3. Dashboard Visualization Endpoints
+
+#### GET `/api/dashboard/{user_id}/location-analysis`
+**Description**: Get comprehensive location analysis for dashboard.
+
+**Response**:
+```json
+{
+  "summary": {
+    "total_locations": 24,
+    "unique_place_types": 5,
+    "clusters": 4,
+    "hotspots": 3
+  },
+  "clusters": [...],  // Same as cluster endpoint
+  "hotspots": [...],  // Same as hotspots endpoint
+  "temporal_patterns": {
+    "weekly_distribution": {
+      "Monday": {"positive": 0.7, "neutral": 0.2, "negative": 0.1},
+      "Tuesday": {...}
+    },
+    "place_type_evolution": [
+      {
+        "week": "2024-W01",
+        "church": {"positive": 0.8},
+        "hospital": {"negative": 0.6}
+      }
+    ]
+  }
+}
+```
+
+---
+
+## Data Flow Architecture
+
+```
+┌─────────────────┐
+│  Photo Upload   │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ EXIF Extraction │ ──► GPS Coordinates
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ Sentiment       │ ──► Emotion Score
+│ Analysis        │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────────────────────┐
+│ Location-Emotion Mapper         │
+│ - Store location + emotion pair │
+│ - Update visit history          │
+└────────┬────────────────────────┘
+         │
+         ▼
+┌─────────────────────────────────┐
+│ Proximity Calculation           │
+│ - Find nearby locations         │
+│ - Compute multi-dim scores      │
+└────────┬────────────────────────┘
+         │
+         ▼
+┌─────────────────────────────────┐
+│ Pattern Detection               │
+│ - Identify hotspots             │
+│ - Cluster analysis              │
+│ - Temporal trends               │
+└────────┬────────────────────────┘
+         │
+         ▼
+┌─────────────────┐
+│ MongoDB Storage │
+└─────────────────┘
+```
+
+---
+
+## Error Handling
+
+### Standard Error Response
+```json
+{
+  "error": {
+    "code": "ERROR_CODE",
+    "message": "Human-readable error message",
+    "details": {
+      "field": "Additional context"
+    }
+  }
+}
+```
+
+### Error Codes
+- `INVALID_IMAGE_FORMAT`: Unsupported image format
+- `NO_GPS_DATA`: No GPS coordinates in EXIF or manual location
+- `LOCATION_NOT_FOUND`: Location ID doesn't exist
+- `INSUFFICIENT_DATA`: Not enough data for clustering/analysis
+- `INVALID_COORDINATES`: GPS coordinates out of valid range
+- `PROCESSING_FAILED`: General processing error
+
+---
+
+## Rate Limiting
+
+- **Upload**: 10 requests/minute per user
+- **Query endpoints**: 100 requests/minute per user
+- **Dashboard**: 20 requests/minute per user
+
+Headers:
+```
+X-RateLimit-Limit: 10
+X-RateLimit-Remaining: 7
+X-RateLimit-Reset: 1640995200
+```
+
+---
+
+## Authentication
+
+All endpoints require Bearer token authentication:
+```
+Authorization: Bearer <token>
+```
+
+User context is extracted from the JWT token. The `user_id` in requests must match the authenticated user (except for admin users).
+
+---
+
+## Integration Points
+
+### Existing DREAMS Components
+
+1. **Ingestion Pipeline** (`dreamsApp/app/ingestion/routes.py`)
+   - Extend `POST /upload` to include location extraction
+   - Add location-proximity calculations after sentiment analysis
+
+2. **Dashboard** (`dreamsApp/app/dashboard/main.py`)
+   - Add new route `/location_analysis/<user_id>`
+   - Integrate location map visualization
+   - Display cluster cards and hotspot markers
+
+3. **Data Models** (`dreamsApp/app/models.py`)
+   - Extend `Post` model with location fields
+   - Add `LocationAnalysis` model for storing clusters/hotspots
+   - Add `EmotionLocationEntry` for tracking location-emotion pairs
+
+### New Components
+
+1. **Location Proximity Calculator** (`location_proximity/proximity_calculator.py`)
+   - Called by API endpoints to compute multi-dimensional scores
+
+2. **Emotion-Location Mapper** (`location_proximity/emotion_location_mapper.py`)
+   - Manages emotion-location associations
+   - Provides hotspot detection and pattern analysis
+
+3. **Semantic Clusterer** (`location_proximity/semantic_clustering.py`)
+   - Clusters locations using DBSCAN
+   - Generates emotion profiles per cluster
+
+---
+
+## Performance Considerations
+
+### Caching Strategy
+- Cache proximity scores between location pairs (TTL: 1 hour)
+- Cache cluster results per user (invalidate on new upload)
+- Cache hotspot calculations (invalidate on new location-emotion pair)
+
+### Optimization
+- Batch proximity calculations for nearby locations
+- Precompute distance matrices for frequent queries
+- Use spatial indexing (MongoDB geospatial queries) for radius searches
+
+### Expected Performance
+- Upload processing: < 3 seconds (including all analysis)
+- Proximity query: < 100ms
+- Clustering: < 2 seconds for 100 locations
+- Dashboard load: < 1 second
+
+---
+
+## Future Enhancements
+
+1. **Real-time Place Enrichment**
+   - Google Places API integration for place type inference
+   - Automatic tagging of cultural/linguistic attributes
+
+2. **Collaborative Filtering**
+   - Cross-user emotion patterns at shared locations
+   - Privacy-preserving aggregation
+
+3. **Temporal Predictions**
+   - Predict likely emotional response at a location based on history
+   - Recommend emotionally beneficial locations
+
+4. **WebSocket Support**
+   - Real-time clustering updates
+   - Live emotion-location mapping during photo uploads
+
+---
+
+## Testing Strategy
+
+- **Unit Tests**: Mock external dependencies (DB, ML models)
+- **Integration Tests**: Test full pipeline with synthetic data
+- **Load Tests**: Simulate 100 concurrent users
+- **Validation**: Compare results against `tests/data/expected_results.json`
+
+---
+
+**Document Version**: 1.0  
+**Last Updated**: February 3, 2026  
+**Author**: Krishan (GSoC 2026 Contributor)
diff --git a/docs/evaluation_metrics.md b/docs/evaluation_metrics.md
new file mode 100644
index 0000000..a49a891
--- /dev/null
+++ b/docs/evaluation_metrics.md
@@ -0,0 +1,380 @@
+# Evaluation Metrics & Ablation Study Plan
+
+## Overview
+
+This document defines the evaluation methodology for the DREAMS location-proximity module, including quantitative metrics, qualitative assessment criteria, and a systematic ablation study plan to validate the multi-dimensional proximity approach.
+
+---
+
+## 1. Quantitative Evaluation Metrics
+
+### 1.1 Proximity Calculation Accuracy
+
+**Metric**: Mean Absolute Error (MAE) against human-annotated proximity scores
+
+**Method**:
+- Collect human judgments for 50 location pairs
+- Humans rate semantic similarity on 0-1 scale
+- Compare with computed proximity scores
+
+**Formula**:
+```
+MAE = (1/n) * Σ|human_score_i - computed_score_i|
+```
+
+**Success Criteria**: MAE < 0.15
+
+### 1.2 Clustering Quality
+
+**Metrics**:
+- **Silhouette Score**: Measures cluster cohesion and separation (-1 to +1)
+- **Davies-Bouldin Index**: Lower is better (minimum 0)
+- **Purity**: Percentage of correctly clustered items
+
+**Success Criteria**:
+- Silhouette Score > 0.5
+- Davies-Bouldin Index < 1.0
+- Purity > 0.80
+
+**Validation**:
+```python
+from sklearn.metrics import silhouette_score, davies_bouldin_score
+
+# Using synthetic data from tests/data/expected_results.json
+expected_clusters = {
+    0: ["church_001", "church_002", "church_003"],
+    1: ["hospital_001", "hospital_002", "hospital_003"],
+    2: ["park_001", "park_002", "park_003"]
+}
+
+# Compute metrics
+silhouette = silhouette_score(proximity_matrix, cluster_labels)
+davies_bouldin = davies_bouldin_score(proximity_matrix, cluster_labels)
+purity = compute_purity(cluster_labels, ground_truth_labels)
+```
+
+### 1.3 Hotspot Detection Precision & Recall
+
+**Metrics**:
+- **Precision**: Of detected hotspots, how many are true positives?
+- **Recall**: Of true hotspots, how many were detected?
+- **F1 Score**: Harmonic mean of precision and recall
+
+**Ground Truth**: Manually labeled hotspots in test dataset
+
+**Formula**:
+```
+Precision = TP / (TP + FP)
+Recall = TP / (TP + FN)
+F1 = 2 * (Precision * Recall) / (Precision + Recall)
+```
+
+**Success Criteria**: F1 Score > 0.75
+
+### 1.4 Emotion Prediction Accuracy
+
+**Metric**: Accuracy of predicting emotion at a location based on place type
+
+**Method**:
+- Hold out 20% of emotion-location pairs
+- Predict sentiment using place-type averages
+- Compare with ground truth
+
+**Formula**:
+```
+Accuracy = (Correct Predictions) / (Total Predictions)
+```
+
+**Success Criteria**: Accuracy > 0.65 (better than random baseline of 0.33)
+
+### 1.5 Performance Benchmarks
+
+**Metrics**:
+- Processing time per photo upload
+- Proximity calculation latency
+- Clustering computation time
+- Memory usage
+
+**Success Criteria**:
+- Upload processing: < 3 seconds
+- Proximity query: < 100ms
+- Clustering (100 locations): < 2 seconds
+- Memory footprint: < 500MB
+
+---
+
+## 2. Qualitative Evaluation
+
+### 2.1 User Study Design
+
+**Participants**: 10-15 mental health researchers and clinicians
+
+**Tasks**:
+1. Review 5 user recovery timelines with location-emotion visualizations
+2. Assess whether location clusters match their clinical intuition
+3. Evaluate usefulness of hotspot identification
+4. Rate interpretability of proximity scores (1-5 scale)
+
+**Questions**:
+- "Do the location clusters make semantic sense?"
+- "Are emotional hotspots clinically meaningful?"
+- "Would this analysis support recovery tracking?"
+- "Are proximity scores interpretable?"
+
+**Success Criteria**: 
+- Mean usefulness rating > 3.5/5
+- 70%+ agreement on cluster meaningfulness
+
+### 2.2 Case Study Analysis
+
+**Method**: Detailed analysis of 3 synthetic user journeys
+
+**Dimensions**:
+- Temporal evolution of place-emotion associations
+- Identification of recovery milestones via location patterns
+- Discovery of unexpected semantic proximity patterns
+
+**Documentation**: Rich narrative descriptions with visualizations
+
+---
+
+## 3. Ablation Study Plan
+
+### 3.1 Study Overview
+
+**Purpose**: Determine the contribution of each proximity dimension (geographic, categorical, linguistic, cultural) to overall system performance.
+
+**Method**: Systematically remove each dimension and measure impact on clustering quality and emotion prediction accuracy.
+
+### 3.2 Experimental Conditions
+
+| Condition | Geographic | Categorical | Linguistic | Cultural | Description |
+|-----------|------------|-------------|------------|----------|-------------|
+| **Full**  | Yes | Yes | Yes | Yes | All dimensions (baseline) |
+| **Ablate-Geo** | No | Yes | Yes | Yes | Remove geographic distance |
+| **Ablate-Cat** | Yes | No | Yes | Yes | Remove categorical similarity |
+| **Ablate-Ling** | Yes | Yes | No | Yes | Remove linguistic context |
+| **Ablate-Cult** | Yes | Yes | Yes | No | Remove cultural tags |
+| **Geo-Only** | Yes | No | No | No | Geographic distance only |
+| **Cat-Only** | No | Yes | No | No | Categorical similarity only |
+
+### 3.3 Evaluation for Each Condition
+
+**Metrics Measured**:
+- Silhouette Score
+- Davies-Bouldin Index
+- Clustering Purity
+- Emotion Prediction Accuracy
+- Human Interpretability Rating (qualitative)
+
+**Dataset**: `tests/data/locations.json` with 17 locations across 7 types
+
+### 3.4 Expected Outcomes
+
+**Hypothesis 1**: Categorical dimension contributes most to clustering quality
+- **Rationale**: Place type (church, hospital) is strongest semantic signal
+- **Test**: Ablate-Cat should show largest performance drop
+
+**Hypothesis 2**: Geographic dimension alone is insufficient
+- **Rationale**: Two distant churches are more similar than a church and nearby hospital
+- **Test**: Geo-Only should have poor clustering purity
+
+**Hypothesis 3**: Multi-dimensional approach outperforms single dimensions
+- **Rationale**: Combined signals capture richer semantics
+- **Test**: Full model should achieve best metrics
+
+### 3.5 Implementation
+
+```python
+# ablation_study.py
+
+import json
+import numpy as np
+from sklearn.metrics import silhouette_score
+from location_proximity.proximity_calculator import composite_proximity
+
+def run_ablation_study():
+    """Run systematic ablation study on proximity dimensions."""
+    
+    # Load test data
+    with open('tests/data/locations.json') as f:
+        locations = json.load(f)['locations']
+    
+    # Define ablation conditions
+    conditions = {
+        'Full': {'geo': 0.3, 'cat': 0.4, 'ling': 0.15, 'cult': 0.15},
+        'Ablate-Geo': {'geo': 0.0, 'cat': 0.55, 'ling': 0.225, 'cult': 0.225},
+        'Ablate-Cat': {'geo': 0.5, 'cat': 0.0, 'ling': 0.25, 'cult': 0.25},
+        'Ablate-Ling': {'geo': 0.35, 'cat': 0.47, 'ling': 0.0, 'cult': 0.18},
+        'Ablate-Cult': {'geo': 0.35, 'cat': 0.47, 'ling': 0.18, 'cult': 0.0},
+        'Geo-Only': {'geo': 1.0, 'cat': 0.0, 'ling': 0.0, 'cult': 0.0},
+        'Cat-Only': {'geo': 0.0, 'cat': 1.0, 'ling': 0.0, 'cult': 0.0}
+    }
+    
+    results = {}
+    
+    for condition_name, weights in conditions.items():
+        # Compute proximity matrix with current weights
+        proximity_matrix = compute_proximity_matrix(locations, weights)
+        
+        # Cluster using DBSCAN
+        from location_proximity.semantic_clustering import SemanticLocationClusterer
+        clusterer = SemanticLocationClusterer(eps=0.4, min_samples=2)
+        labels = clusterer.cluster_by_proximity(proximity_matrix)
+        
+        # Compute metrics
+        silhouette = silhouette_score(proximity_matrix, labels) if len(set(labels)) > 1 else 0
+        purity = compute_purity(labels, ground_truth_from_place_types(locations))
+        
+        results[condition_name] = {
+            'silhouette': silhouette,
+            'purity': purity,
+            'num_clusters': len(set(labels)) - (1 if -1 in labels else 0)
+        }
+    
+    return results
+```
+
+### 3.6 Results Documentation
+
+Results will be documented in:
+- **Quantitative Table**: Metrics for each condition
+- **Visualization**: Bar charts comparing conditions
+- **Statistical Analysis**: ANOVA to test significance of differences
+- **Interpretation**: Narrative explanation of findings
+
+---
+
+## 4. Validation Against Expected Results
+
+### 4.1 Synthetic Dataset Validation
+
+**File**: `tests/data/expected_results.json`
+
+**Tests**:
+1. **Proximity Scores**: Verify computed scores fall within expected ranges
+```python
+# Church-Church proximity should be 0.8-1.0
+assert 0.8 <= compute_proximity(church_001, church_002) <= 1.0
+
+# Church-Hospital proximity should be 0.1-0.4
+assert 0.1 <= compute_proximity(church_001, hospital_001) <= 0.4
+```
+
+2. **Clustering**: Verify 3 clusters detected (parks, hospitals, churches)
+```python
+assert num_clusters == 3
+assert set(clusters[0]) == set(["church_001", "church_002", "church_003"])
+```
+
+3. **Emotion Patterns**: Verify place-type emotion distributions
+```python
+church_sentiment = aggregate_by_place_type('church')
+assert church_sentiment['positive'] >= 0.70  # Expected mean 0.75
+```
+
+### 4.2 Test Suite
+
+All validation tests in `tests/test_evaluation_metrics.py`:
+
+```python
+def test_proximity_accuracy():
+    """Test proximity scores against expected ranges."""
+    # Implementation
+
+def test_clustering_quality():
+    """Test clustering meets quality thresholds."""
+    # Implementation
+
+def test_hotspot_detection():
+    """Test hotspot detection precision/recall."""
+    # Implementation
+
+def test_emotion_prediction():
+    """Test emotion prediction accuracy."""
+    # Implementation
+
+def test_performance_benchmarks():
+    """Test processing times meet requirements."""
+    # Implementation
+```
+
+---
+
+## 5. Baseline Comparisons
+
+### 5.1 Baseline Methods
+
+**Baseline 1: Geographic Distance Only**
+- Use Haversine formula only
+- No semantic considerations
+
+**Baseline 2: K-Means Clustering (Fixed K=3)**
+- Traditional clustering without proximity matrix
+- Geographic features only
+
+**Baseline 3: Random Emotion Prediction**
+- Predict emotions randomly (33% each class)
+- Lower bound on performance
+
+### 5.2 Comparison Metrics
+
+| Metric | Random | Geo-Only | K-Means | Multi-Dim (Ours) |
+|--------|--------|----------|---------|------------------|
+| Silhouette Score | - | TBD | TBD | **Target: > 0.5** |
+| Clustering Purity | 33% | TBD | TBD | **Target: > 80%** |
+| Emotion Prediction | 33% | TBD | TBD | **Target: > 65%** |
+| Interpretability | Low | Medium | Low | **Target: High** |
+
+---
+
+## 6. Continuous Monitoring
+
+### 6.1 Production Metrics
+
+Once deployed, monitor:
+- Average proximity calculation time
+- Clustering success rate (% of users with valid clusters)
+- User engagement with location analysis dashboard
+- Error rates in EXIF extraction
+
+### 6.2 A/B Testing
+
+**Test**: Multi-dimensional proximity vs. Geographic-only
+
+**Metrics**:
+- Dashboard engagement time
+- User-reported usefulness
+- Clinical insights discovered
+
+**Duration**: 4 weeks with 50 users per group
+
+---
+
+## 7. Timeline
+
+| Phase | Duration | Deliverables |
+|-------|----------|-------------|
+| **Metric Implementation** | Week 1 | All metrics coded and tested |
+| **Ablation Study** | Week 2 | Results for all conditions |
+| **User Study** | Week 3-4 | Qualitative feedback collected |
+| **Baseline Comparison** | Week 2 | Comparison table completed |
+| **Documentation** | Week 5 | Final evaluation report |
+
+---
+
+## 8. Success Criteria Summary
+
+- **Proximity Accuracy**: MAE < 0.15  
+- **Clustering Quality**: Silhouette > 0.5, Purity > 0.80  
+- **Hotspot Detection**: F1 > 0.75  
+- **Emotion Prediction**: Accuracy > 0.65  
+- **Performance**: Upload < 3s, Query < 100ms  
+- **User Study**: Usefulness > 3.5/5  
+- **Ablation Study**: Multi-dimensional > single dimensions  
+
+---
+
+**Version**: 1.0  
+**Last Updated**: February 3, 2026  
+**Author**: Krishan (GSoC 2026 Contributor)
diff --git a/docs/exif_extraction_research.md b/docs/exif_extraction_research.md
new file mode 100644
index 0000000..e908ecf
--- /dev/null
+++ b/docs/exif_extraction_research.md
@@ -0,0 +1,205 @@
+# EXIF Extraction Research
+
+## Overview
+
+**Note**: EXIF extraction has been implemented in `dreamsApp/exif_extractor.py` by PR #77 (kunal-595). This research document provided the foundation for that implementation.
+
+This document compares EXIF extraction libraries for photo metadata analysis in the DREAMS project, focusing on location data, timestamps, and camera information needed for recovery journey tracking.
+
+## Library Comparison: Pillow vs exifread
+
+### Pillow (PIL.ExifTags)
+
+**Pros:**
+- Built into PIL/Pillow (already used for image processing)
+- Simple API with `Image._getexif()`
+- Good for basic EXIF data
+- Lightweight for standard use cases
+
+**Cons:**
+- Limited EXIF tag support
+- No GPS coordinate parsing helpers
+- Inconsistent handling of malformed data
+- Returns numeric tag IDs requiring manual mapping
+
+**Code Example:**
+```python
+from PIL import Image
+from PIL.ExifTags import TAGS, GPSTAGS
+
+def extract_exif_pillow(image_path):
+    image = Image.open(image_path)
+    exif = image._getexif()
+    if not exif:
+        return {}
+    
+    data = {}
+    for tag_id, value in exif.items():
+        tag = TAGS.get(tag_id, tag_id)
+        if tag == 'GPSInfo':
+            gps_data = {}
+            for gps_tag_id, gps_value in value.items():
+                gps_tag = GPSTAGS.get(gps_tag_id, gps_tag_id)
+                gps_data[gps_tag] = gps_value
+            data[tag] = gps_data
+        else:
+            data[tag] = value
+    return data
+```
+
+### exifread
+
+**Pros:**
+- Comprehensive EXIF tag support
+- Better handling of malformed/corrupted data
+- Detailed GPS parsing
+- More robust for edge cases
+- Returns human-readable tag names
+
+**Cons:**
+- Additional dependency
+- Slightly more complex API
+- Larger memory footprint
+
+**Code Example:**
+```python
+import exifread
+
+def extract_exif_exifread(image_path):
+    with open(image_path, 'rb') as f:
+        tags = exifread.process_file(f)
+    
+    data = {}
+    for tag, value in tags.items():
+        if tag.startswith('GPS'):
+            data[tag] = str(value)
+        elif tag in ['EXIF DateTime', 'Image DateTime']:
+            data[tag] = str(value)
+        elif tag == 'Image Make':
+            data[tag] = str(value)
+    return data
+```
+
+## Edge Cases Identified
+
+### 1. Missing GPS Data
+- **Issue:** Many photos lack GPS coordinates
+- **Impact:** Cannot determine location for proximity analysis
+- **Mitigation:** Fallback to user-provided location or skip location-based features
+
+### 2. Corrupted EXIF Headers
+- **Issue:** Malformed EXIF data causes parsing failures
+- **Impact:** Complete metadata loss
+- **Mitigation:** Use exifread's robust parsing + try/catch blocks
+
+### 3. Timezone Inconsistencies
+- **Issue:** EXIF timestamps don't include timezone info
+- **Impact:** Incorrect temporal ordering across locations
+- **Mitigation:** Use GPS coordinates to infer timezone or prompt user
+
+### 4. Camera-Specific Formats
+- **Issue:** Different manufacturers use proprietary EXIF extensions
+- **Impact:** Inconsistent metadata availability
+- **Mitigation:** Normalize to common subset of tags
+
+### 5. Privacy-Stripped Images
+- **Issue:** Social media platforms remove EXIF data
+- **Impact:** No metadata available for analysis
+- **Mitigation:** Detect stripped images and request manual input
+
+### 6. Large File Handling
+- **Issue:** High-resolution images may cause memory issues
+- **Impact:** Processing failures on resource-constrained systems
+- **Mitigation:** Stream processing or thumbnail extraction
+
+## Recommended Implementation
+
+### Primary Choice: exifread
+- Better edge case handling
+- More comprehensive GPS support
+- Robust parsing for corrupted data
+
+### Fallback Strategy
+```python
+def extract_metadata(image_path):
+    try:
+        return extract_exif_exifread(image_path)
+    except Exception:
+        try:
+            return extract_exif_pillow(image_path)
+        except Exception:
+            return {}  # No metadata available
+```
+
+## GPS Coordinate Conversion
+
+Both libraries require manual GPS coordinate conversion:
+
+```python
+def convert_gps_to_decimal(gps_coord, direction):
+    """Convert GPS coordinates from DMS to decimal degrees."""
+    if not gps_coord:
+        return None
+    
+    degrees = float(gps_coord[0])
+    minutes = float(gps_coord[1])
+    seconds = float(gps_coord[2])
+    
+    decimal = degrees + (minutes / 60.0) + (seconds / 3600.0)
+    
+    if direction in ['S', 'W']:
+        decimal = -decimal
+    
+    return decimal
+```
+
+## Testing Strategy
+
+### Test Cases Required
+1. **Standard photos** with complete EXIF
+2. **GPS-enabled photos** from different devices
+3. **Corrupted EXIF** data scenarios
+4. **Privacy-stripped** images
+5. **Various camera manufacturers** (Canon, Nikon, iPhone, Android)
+6. **Different file formats** (JPEG, TIFF, RAW)
+
+### Performance Benchmarks
+- Processing time per image
+- Memory usage with large files
+- Error handling robustness
+
+## Integration with DREAMS
+
+### Metadata Schema
+```python
+{
+    "timestamp": "2024-01-15T14:30:00",
+    "location": {
+        "lat": 61.2181,
+        "lon": -149.9003,
+        "accuracy": "high"  # high/medium/low/none
+    },
+    "camera": {
+        "make": "Apple",
+        "model": "iPhone 12",
+        "settings": {...}
+    },
+    "processing": {
+        "exif_source": "exifread",  # exifread/pillow/manual
+        "extraction_time": "2024-01-15T14:35:00"
+    }
+}
+```
+
+### Error Handling
+- Log extraction failures for debugging
+- Graceful degradation when metadata unavailable
+- User prompts for critical missing data (location, timestamp)
+
+## Next Steps
+
+1. Implement robust EXIF extraction module
+2. Create comprehensive test suite
+3. Add GPS coordinate validation
+4. Integrate with photo upload pipeline
+5. Add user interface for manual metadata entry
\ No newline at end of file
diff --git a/docs/integration_guide.md b/docs/integration_guide.md
new file mode 100644
index 0000000..f4f0936
--- /dev/null
+++ b/docs/integration_guide.md
@@ -0,0 +1,498 @@
+# DREAMS Integration Guide - Location Proximity Module
+
+## Overview
+
+This guide provides step-by-step instructions for integrating the multi-dimensional location-proximity analysis module into the existing DREAMS platform. This work builds upon:
+
+- **PR #77** (by kunal-595): EXIF GPS extraction already implemented in `dreamsApp/exif_extractor.py`
+- **PR #70** (by AnvayKharb): Time-aware emotion proximity in `dreamsApp/analytics/emotion_proximity.py`
+
+Our contribution adds **multi-dimensional spatial proximity** (geographic + categorical + linguistic + cultural) to complement the existing time-aware emotion analysis.
+
+---
+
+## Prerequisites
+
+- DREAMS platform installed and running
+- MongoDB instance configured
+- Python 3.8+ environment
+- Required packages: `exifread`, `scikit-learn`, `numpy`
+
+---
+
+## Integration Architecture
+
+```
+┌──────────────────────────────────────────────────────┐
+│            Existing DREAMS Platform                   │
+│                                                       │
+│  ┌─────────────┐      ┌──────────────┐              │
+│  │  Beehive    │─────▶│   Ingestion  │              │
+│  │  Frontend   │      │   Pipeline   │              │
+│  └─────────────┘      └──────┬───────┘              │
+│                              │                       │
+│                              ▼                       │
+│  ┌──────────────────────────────────────┐           │
+│  │      Sentiment Analysis              │           │
+│  │      (existing)                      │           │
+│  └──────────────┬───────────────────────┘           │
+│                 │                                    │
+└─────────────────┼────────────────────────────────────┘
+                  │
+                  ▼
+┌──────────────────────────────────────────────────────┐
+│     NEW: Location-Proximity Module                   │
+│                                                       │
+│  ┌──────────────┐      ┌──────────────┐             │
+│  │ EXIF         │─────▶│  Proximity   │             │
+│  │ Extractor    │      │  Calculator  │             │
+│  └──────────────┘      └──────┬───────┘             │
+│                               │                      │
+│                               ▼                      │
+│  ┌──────────────────────────────────────┐           │
+│  │   Emotion-Location Mapper            │           │
+│  └──────────────┬───────────────────────┘           │
+│                 │                                    │
+│                 ▼                                    │
+│  ┌──────────────────────────────────┐               │
+│  │   Semantic Clusterer             │               │
+│  └──────────────┬───────────────────┘               │
+└─────────────────┼────────────────────────────────────┘
+                  │
+                  ▼
+┌──────────────────────────────────────────────────────┐
+│              MongoDB Storage                         │
+│  - posts (extended with location)                   │
+│  - location_analysis                                 │
+│  - emotion_location_entries                          │
+└──────────────────────────────────────────────────────┘
+```
+
+---
+
+## Step 1: Extend Post Ingestion Route
+
+**File**: `dreamsApp/app/ingestion/routes.py`
+
+### 1.1 Import Location Modules
+
+Add to the top of the file:
+```python
+from dreamsApp.exif_extractor import EXIFExtractor  # From PR #77 (kunal-595)
+from dreamsApp.analytics.emotion_proximity import segment_timeline_into_windows  # From PR #70 (AnvayKharb)
+from dreamsApp.location_proximity import extract_location, find_nearby_locations  # New multi-dimensional proximity
+```
+
+### 1.2 Modify Upload Route
+
+Extend the existing `POST /upload` endpoint:
+
+```python
+@ingestion_bp.route('/upload', methods=['POST'])
+def upload_photo():
+    # Existing code for image upload and sentiment analysis...
+    
+    # Use existing EXIF extractor from PR #77
+    extractor = EXIFExtractor()
+    metadata = extractor.extract_metadata(image_path)
+    location_data = metadata.get('location', {})
+    
+    # Fallback to manual location if no GPS in EXIF
+    if location_data.get('accuracy') == 'none' and 'manual_location' in request.json:
+        manual = request.json['manual_location']
+        location_data = {
+            'lat': manual['lat'],
+            'lon': manual['lon'],
+            'accuracy': 'manual'
+        }
+    
+    # Store post with location
+    post_doc = {
+        'user_id': user_id,
+        'caption': caption,
+        'timestamp': datetime.utcnow(),
+        'image_path': image_path,
+        'sentiment': sentiment_result,
+        'location': location_data  # NEW FIELD
+    }
+    
+    post_id = db.posts.insert_one(post_doc).inserted_id
+    
+    # NEW: If location available, find nearby locations and update analysis
+    if location_data.get('lat') and location_data.get('lon'):
+        from location_proximity.emotion_location_mapper import EmotionLocationMapper
+        
+        mapper = EmotionLocationMapper()
+        
+        # Add emotion-location entry
+        mapper.add_entry(
+            location_id=str(post_id),  # Use post_id as location_id initially
+            sentiment=sentiment_result['label'],
+            score=sentiment_result['score'],
+            metadata={
+                'timestamp': post_doc['timestamp'],
+                'coordinates': location_data,
+                'user_id': user_id
+            }
+        )
+        
+        # Store in emotion_location_entries collection
+        db.emotion_location_entries.insert_one({
+            'user_id': user_id,
+            'location_id': str(post_id),
+            'post_id': post_id,
+            'sentiment': sentiment_result['label'],
+            'score': sentiment_result['score'],
+            'timestamp': post_doc['timestamp'],
+            'coordinates': location_data
+        })
+        
+        # Find nearby locations
+        user_locations = list(db.posts.find({
+            'user_id': user_id,
+            'location.lat': {'$exists': True}
+        }))
+        
+        nearby = find_nearby_locations(
+            target_location={'lat': location_data['lat'], 'lon': location_data['lon']},
+            locations=[
+                {'lat': loc['location']['lat'], 'lon': loc['location']['lon']}
+                for loc in user_locations
+            ],
+            radius_meters=1000  # 1km radius
+        )
+        
+        # Update location_analysis collection
+        db.location_analysis.update_one(
+            {'user_id': user_id},
+            {
+                '$push': {
+                    'locations': {
+                        'id': str(post_id),
+                        'coordinates': location_data,
+                        'timestamp': post_doc['timestamp'],
+                        'sentiment': sentiment_result['label'],
+                        'nearby_count': len(nearby)
+                    }
+                },
+                '$set': {'updated_at': datetime.utcnow()}
+            },
+            upsert=True
+        )
+    
+    return jsonify({
+        'post_id': str(post_id),
+        'sentiment': sentiment_result,
+        'location': location_data,
+        'nearby_locations': len(nearby) if location_data.get('lat') else 0
+    })
+```
+
+---
+
+## Step 2: Create Location Analysis Dashboard Route
+
+**File**: `dreamsApp/app/dashboard/main.py`
+
+### 2.1 Add New Route
+
+```python
+from flask import render_template
+from location_proximity.semantic_clustering import SemanticLocationClusterer
+from location_proximity.emotion_location_mapper import EmotionLocationMapper
+
+@dashboard_bp.route('/location_analysis/<user_id>')
+def location_analysis(user_id):
+    """Display location-emotion analysis dashboard."""
+    
+    # Get user's location data
+    analysis_doc = db.location_analysis.find_one({'user_id': user_id})
+    
+    if not analysis_doc:
+        return render_template('dashboard/location_analysis.html', 
+                             error="No location data available")
+    
+    # Get emotion-location entries
+    entries = list(db.emotion_location_entries.find({'user_id': user_id}))
+    
+    # Initialize mapper and load data
+    mapper = EmotionLocationMapper()
+    for entry in entries:
+        mapper.add_entry(
+            location_id=entry['location_id'],
+            sentiment=entry['sentiment'],
+            score=entry['score'],
+            metadata={
+                'timestamp': entry['timestamp'],
+                'coordinates': entry['coordinates']
+            }
+        )
+    
+    # Find hotspots
+    positive_hotspots = mapper.find_emotional_hotspots('positive', min_visits=3)
+    negative_hotspots = mapper.find_emotional_hotspots('negative', min_visits=3)
+    
+    # Perform clustering if enough locations
+    clusters = []
+    if len(analysis_doc.get('locations', [])) >= 6:
+        # Build proximity matrix (simplified - use actual multi-dimensional in production)
+        from location_proximity.proximity_calculator import compute_proximity_matrix
+        
+        locations = analysis_doc['locations']
+        proximity_matrix = compute_proximity_matrix(locations)
+        
+        clusterer = SemanticLocationClusterer(eps=0.4, min_samples=2)
+        cluster_labels = clusterer.cluster_by_proximity(proximity_matrix)
+        
+        # Get emotion profiles for clusters
+        clusters = clusterer.cluster_with_emotions(proximity_matrix, entries)
+    
+    return render_template('dashboard/location_analysis.html',
+                          user_id=user_id,
+                          locations=analysis_doc.get('locations', []),
+                          positive_hotspots=positive_hotspots,
+                          negative_hotspots=negative_hotspots,
+                          clusters=clusters,
+                          total_locations=len(analysis_doc.get('locations', [])))
+```
+
+### 2.2 Create Template
+
+**File**: `dreamsApp/app/templates/dashboard/location_analysis.html`
+
+```html
+{% extends "base.html" %}
+
+{% block content %}
+<div class="container">
+    <h1>Location-Emotion Analysis</h1>
+    
+    {% if error %}
+        <div class="alert alert-warning">{{ error }}</div>
+    {% else %}
+        <!-- Summary Stats -->
+        <div class="row">
+            <div class="col-md-3">
+                <div class="card">
+                    <div class="card-body">
+                        <h5>Total Locations</h5>
+                        <p class="display-4">{{ total_locations }}</p>
+                    </div>
+                </div>
+            </div>
+            <div class="col-md-3">
+                <div class="card">
+                    <div class="card-body">
+                        <h5>Positive Hotspots</h5>
+                        <p class="display-4">{{ positive_hotspots|length }}</p>
+                    </div>
+                </div>
+            </div>
+            <div class="col-md-3">
+                <div class="card">
+                    <div class="card-body">
+                        <h5>Negative Hotspots</h5>
+                        <p class="display-4">{{ negative_hotspots|length }}</p>
+                    </div>
+                </div>
+            </div>
+            <div class="col-md-3">
+                <div class="card">
+                    <div class="card-body">
+                        <h5>Clusters</h5>
+                        <p class="display-4">{{ clusters|length }}</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+        
+        <!-- Map placeholder -->
+        <div class="row mt-4">
+            <div class="col-12">
+                <h3>Emotional Hotspots Map</h3>
+                <div id="map" style="height: 400px; background: #f0f0f0;">
+                    <!-- Leaflet.js map integration here -->
+                </div>
+            </div>
+        </div>
+        
+        <!-- Clusters -->
+        <div class="row mt-4">
+            <div class="col-12">
+                <h3>Location Clusters</h3>
+                {% for cluster in clusters %}
+                <div class="card mb-3">
+                    <div class="card-header">
+                        Cluster {{ cluster.cluster_id }}: {{ cluster.label }}
+                    </div>
+                    <div class="card-body">
+                        <p><strong>Members:</strong> {{ cluster.members|length }}</p>
+                        <p><strong>Emotion Profile:</strong></p>
+                        <div class="progress">
+                            <div class="progress-bar bg-success" style="width: {{ cluster.emotion_distribution.positive * 100 }}%">
+                                Positive {{ "%.0f"|format(cluster.emotion_distribution.positive * 100) }}%
+                            </div>
+                            <div class="progress-bar bg-warning" style="width: {{ cluster.emotion_distribution.neutral * 100 }}%">
+                                Neutral {{ "%.0f"|format(cluster.emotion_distribution.neutral * 100) }}%
+                            </div>
+                            <div class="progress-bar bg-danger" style="width: {{ cluster.emotion_distribution.negative * 100 }}%">
+                                Negative {{ "%.0f"|format(cluster.emotion_distribution.negative * 100) }}%
+                            </div>
+                        </div>
+                    </div>
+                </div>
+                {% endfor %}
+            </div>
+        </div>
+    {% endif %}
+</div>
+{% endblock %}
+```
+
+---
+
+## Step 3: Database Indexes
+
+Add indexes for efficient querying:
+
+```python
+# In a migration script or app initialization
+db.posts.create_index([('user_id', 1), ('location.lat', 1)])
+db.emotion_location_entries.create_index([('user_id', 1), ('timestamp', -1)])
+db.emotion_location_entries.create_index([('user_id', 1), ('location_id', 1)])
+db.location_analysis.create_index([('user_id', 1)])
+```
+
+---
+
+## Step 4: Configuration
+
+**File**: `dreamsApp/app/config.py`
+
+Add location-proximity settings:
+
+```python
+class Config:
+    # Existing config...
+    
+    # Location-Proximity Settings
+    LOCATION_PROXIMITY_WEIGHTS = {
+        'geographic': 0.3,
+        'categorical': 0.4,
+        'linguistic': 0.15,
+        'cultural': 0.15
+    }
+    
+    CLUSTERING_PARAMS = {
+        'eps': 0.4,
+        'min_samples': 2
+    }
+    
+    HOTSPOT_MIN_VISITS = 3
+    HOTSPOT_MIN_CONFIDENCE = 0.6
+    
+    NEARBY_RADIUS_METERS = 1000
+```
+
+---
+
+## Step 5: Testing Integration
+
+Create integration test:
+
+**File**: `tests/test_location_emotion_integration.py`
+
+```python
+import pytest
+import json
+from dreamsApp.app import create_app
+
+def test_full_pipeline():
+    """Test full pipeline: upload → location extraction → emotion mapping → clustering."""
+    
+    app = create_app('testing')
+    client = app.test_client()
+    
+    # Load test data
+    with open('tests/data/locations.json') as f:
+        test_locations = json.load(f)['locations']
+    
+    # Simulate uploads for multiple locations
+    for loc in test_locations[:5]:
+        response = client.post('/upload', json={
+            'user_id': 'test_user',
+            'image': 'base64_image_here',
+            'caption': f'Visit to {loc["name"]}',
+            'manual_location': loc['coordinates']
+        })
+        
+        assert response.status_code == 200
+        data = response.json
+        assert 'location' in data
+        assert data['location']['lat'] == loc['coordinates']['lat']
+    
+    # Check location analysis was created
+    response = client.get('/dashboard/test_user/location_analysis')
+    assert response.status_code == 200
+```
+
+---
+
+## Step 6: Deployment Checklist
+
+- [ ] Install required packages: `pip install exifread scikit-learn`
+- [ ] Create MongoDB indexes
+- [ ] Update `requirements.txt`
+- [ ] Add location-proximity settings to config
+- [ ] Extend ingestion route with location extraction
+- [ ] Create location analysis dashboard route and template
+- [ ] Run integration tests
+- [ ] Update API documentation
+- [ ] Deploy to staging environment
+- [ ] Monitor performance and errors
+
+---
+
+## Troubleshooting
+
+### Issue: No GPS data in uploaded images
+
+**Solution**: Ensure fallback to manual location:
+```python
+if 'manual_location' in request.json:
+    location_data = request.json['manual_location']
+```
+
+### Issue: Clustering fails with too few locations
+
+**Solution**: Add minimum check:
+```python
+if len(locations) < 6:
+    return {'error': 'Need at least 6 locations for clustering'}
+```
+
+### Issue: Slow proximity calculations
+
+**Solution**: Implement caching:
+```python
+from functools import lru_cache
+
+@lru_cache(maxsize=1000)
+def cached_proximity(loc1_id, loc2_id):
+    return compute_proximity(loc1, loc2)
+```
+
+---
+
+## Next Steps
+
+1. **Add Place Type Inference**: Use Google Places API to automatically tag locations
+2. **Implement Real-time Updates**: WebSocket support for live clustering
+3. **Cross-User Analysis**: Privacy-preserving aggregation of emotion patterns
+4. **Mobile Support**: Optimize for mobile dashboard viewing
+
+---
+
+**Integration Version**: 1.0  
+**Last Updated**: February 3, 2026  
+**Author**: Krishan (GSoC 2026 Contributor)
diff --git a/docs/project_roadmap.md b/docs/project_roadmap.md
new file mode 100644
index 0000000..ba1c972
--- /dev/null
+++ b/docs/project_roadmap.md
@@ -0,0 +1,463 @@
+# DREAMS Project Roadmap & Future Work
+
+## Overview
+
+This document outlines the current state of the DREAMS platform, the location-proximity module implementation roadmap for GSoC 2026, and future enhancements for continued development post-GSoC.
+
+---
+
+## Current State (February 2026)
+
+### Completed Features
+
+**Core DREAMS Platform**:
+- Flask backend with user authentication
+- Image upload and caption processing
+- BLIP-based image captioning
+- RoBERTa sentiment analysis
+- Keyword extraction and HDBSCAN clustering
+- MongoDB storage for posts, keywords, and themes
+- LLM-based thematic analysis (Gemini integration)
+- Dashboard with sentiment timelines and word clouds
+
+**Location-Proximity Foundation** (Pre-GSoC):
+- Comprehensive research documentation
+- System architecture diagrams
+- Integration with existing EXIF extractor (PR #77 by kunal-595)
+- Integration with emotion-timeline proximity (PR #70 by AnvayKharb)
+- Test plan and evaluation framework
+- Synthetic dataset with 17 locations and expected results
+- Function stubs and interface definitions for multi-dimensional proximity
+- API design specification
+- Data model extensions
+
+### In Progress
+
+- Implementation of 6 basic location functions (building on PR #77)
+- Multi-dimensional proximity calculator (geographic + categorical + linguistic + cultural)
+- Emotion-location mapper (integrating with PR #70's emotion proximity)
+- Semantic clustering module (complementing existing emotion timeline segmentation)
+
+---
+
+
+## GSoC 2026 Roadmap (Aligned with Official Timeline)
+
+
+
+### Pre-GSoC: Community Bonding & Planning (Feb 19 – Apr 30, 2026)
+**Duration**: 10 weeks | **Effort**: ~40h (prep, onboarding, planning; not counted in GSoC 350h coding period)
+
+- Finalize project requirements and architecture
+- Deep-dive into DREAMS codebase and data models
+- Refine test plans and synthetic datasets
+- Mentor meetings and onboarding
+
+
+### Phase 1: Core Implementation (May 1 – July 7, 2026)
+**Duration**: 10 weeks | **Effort**: 150h
+
+#### Deliverables:
+1. **Basic Location Functions** (May)
+   - `calculate_distance()` - Haversine formula
+   - `validate_coordinates()` - GPS validation
+   - `extract_location()` - EXIF integration
+   - `compute_proximity()` - Distance + threshold check
+   - `find_nearby_locations()` - Radius-based search
+   - `cluster_locations()` - Simple geographic clustering
+
+2. **Multi-Dimensional Proximity** (June)
+   - `Place` class with categorical/linguistic/cultural attributes
+   - Geographic proximity (normalized Haversine)
+   - Categorical similarity (place type matching)
+   - Linguistic similarity (language context)
+   - Cultural similarity (Jaccard index on tags)
+   - Composite proximity with configurable weights
+
+3. **Emotion-Location Integration** (late June – early July)
+   - `EmotionLocationMapper` class
+   - `add_entry()` - Store emotion-location pairs
+   - `get_location_sentiment_profile()` - Aggregate per location
+   - `find_emotional_hotspots()` - Detect consistent emotions
+   - `compare_place_types()` - Category-level patterns
+   - `temporal_emotion_trend()` - Time-series analysis
+
+4. **Semantic Clustering** (July)
+   - `SemanticLocationClusterer` with DBSCAN
+   - Cluster emotion profile aggregation
+   - Visualization support
+   - Parameter tuning utilities
+
+**Milestones**:
+- All unit tests passing (90%+ coverage)
+- Validated against synthetic dataset
+- Performance benchmarks met
+
+---
+
+
+### Phase 2: Integration & Testing (July 8 – September 1, 2026)
+**Duration**: 8 weeks | **Effort**: 110h
+
+#### Deliverables:
+1. **Backend Integration** (July)
+   - Extend `app/ingestion/routes.py` with location extraction
+   - Implement 4 REST API endpoints:
+     - `POST /api/upload` (enhanced with location)
+     - `GET /api/location/proximity`
+     - `POST /api/location/cluster`
+     - `GET /api/location/hotspots`
+   - MongoDB schema extensions and indexes
+   - API authentication and rate limiting
+
+2. **Dashboard Visualization** (August)
+   - `/location_analysis/<user_id>` route
+   - HTML/CSS template with:
+     - Interactive map (Leaflet.js) with hotspot markers
+     - Cluster visualization cards
+     - Place-type comparison bar charts
+     - Temporal emotion patterns
+   - JavaScript for dynamic content loading
+   - Mobile-responsive design
+
+3. **End-to-End Testing** (August)
+   - Integration tests for full pipeline
+   - Performance testing (upload < 3s, clustering < 2s)
+   - Load testing (100 concurrent users)
+   - Cross-browser compatibility testing
+
+**Milestones**:
+- Complete backend API functional
+- Dashboard displays all analyses correctly
+- All integration tests passing
+- Performance targets achieved
+
+---
+
+
+### Phase 3: Evaluation, User Study & Polish (September 2 – October 15, 2026)
+**Duration**: 6 weeks | **Effort**: 60h
+
+#### Deliverables:
+1. **Evaluation Metrics** (early September)
+   - Proximity accuracy (MAE < 0.15)
+   - Clustering quality (Silhouette > 0.5, Purity > 0.80)
+   - Hotspot detection (F1 > 0.75)
+   - Emotion prediction accuracy (> 0.65)
+   - Performance benchmarks
+
+2. **Ablation Study** (mid September)
+   - 7 experimental conditions (Full, Ablate-Geo, etc.)
+   - Statistical analysis of results
+   - Baseline comparisons (Geo-only, K-means, Random)
+   - Results visualization and documentation
+
+3. **User Study** (late September)
+   - Protocol design
+   - Recruit 10-15 mental health researchers
+   - Conduct interviews/surveys
+   - Analyze qualitative feedback
+   - Document findings and recommendations
+
+4. **Documentation & Demo** (early October)
+   - Comprehensive demo script
+   - Video demonstration
+   - Case study analyses
+   - Updated architecture documentation
+   - API reference guide
+
+**Milestones**:
+- All evaluation metrics meet success criteria
+- Ablation study confirms multi-dimensional approach
+- User feedback validates usefulness
+- Complete documentation ready for handoff
+
+---
+
+
+### Final Phase: Wrap-up & Submission (October 16 – November 11, 2026)
+**Duration**: 4 weeks | **Effort**: 30h
+
+#### Deliverables:
+- GSoC final report
+- Final presentation to mentors/community
+- Code cleanup and refactoring
+- Contributor guide for future developers
+- Knowledge transfer documentation
+
+**Milestone**: Project ready for production deployment
+
+---
+
+**Total GSoC Coding Effort (Phases 1–4): 350 hours**
+
+---
+
+## Post-GSoC Enhancements (Future Work)
+
+### Short-term (3-6 months)
+
+#### 1. Advanced Place Enrichment
+**Description**: Integrate external APIs for automatic place type detection and tagging.
+
+**Features**:
+- Google Places API integration for:
+  - Automatic place type inference from GPS
+  - Business name and category extraction
+  - Photo matching for location verification
+- Nominatim (OpenStreetMap) as free alternative
+- Automatic cultural tag extraction from place descriptions
+
+**Benefits**:
+- Reduces manual input burden
+- Improves proximity accuracy with rich metadata
+- Enables cross-user location matching
+
+**Effort**: 40 hours
+
+---
+
+#### 2. Real-time Collaborative Features
+**Description**: Enable cross-user emotion pattern analysis while preserving privacy.
+
+**Features**:
+- Anonymized aggregation of emotions at public locations
+- "Others felt positive here too" insights
+- Heatmap of community emotional landscape
+- Privacy-preserving differential privacy techniques
+
+**Benefits**:
+- Social validation for recovery journeys
+- Community-level mental health insights
+- Research opportunities for population-level analysis
+
+**Effort**: 60 hours
+
+---
+
+#### 3. Mobile App Integration
+**Description**: Native mobile support for location-aware photo uploads.
+
+**Features**:
+- React Native mobile app
+- Automatic GPS capture on photo upload
+- Offline mode with sync
+- Push notifications for emotional hotspot proximity
+- Map-based photo browsing
+
+**Benefits**:
+- Improves data quality (native GPS access)
+- Better user experience
+- Real-time location-emotion tracking
+
+**Effort**: 120 hours
+
+---
+
+### Medium-term (6-12 months)
+
+#### 4. Predictive Emotion Modeling
+**Description**: ML models to predict emotional responses at locations.
+
+**Features**:
+- Train models on historical location-emotion data
+- Predict likely emotion at new/unvisited locations
+- Personalized recommendations for emotionally beneficial places
+- Transfer learning across users with similar patterns
+
+**Benefits**:
+- Proactive mental health support
+- Personalized location recommendations
+- Research insights into emotion-place associations
+
+**Effort**: 80 hours
+
+---
+
+#### 5. Temporal Pattern Mining
+**Description**: Advanced analysis of how location-emotion associations evolve.
+
+**Features**:
+- Change point detection in temporal trends
+- Season/time-of-day emotion patterns
+- Recovery milestone identification
+- Longitudinal trajectory modeling
+
+**Benefits**:
+- Identify recovery inflection points
+- Understand cyclical patterns
+- Support intervention timing decisions
+
+**Effort**: 60 hours
+
+---
+
+#### 6. Clinician Dashboard
+**Description**: Specialized interface for therapists and researchers.
+
+**Features**:
+- Multi-patient aggregate view (with consent)
+- Customizable reports for clinical sessions
+- Export data in standard formats (CSV, FHIR)
+- Annotation tools for clinical notes
+- HIPAA compliance and data security
+
+**Benefits**:
+- Clinical research support
+- Therapy integration
+- Evidence-based interventions
+
+**Effort**: 100 hours
+
+---
+
+### Long-term (1-2 years)
+
+#### 7. Multi-modal Emotion Analysis
+**Description**: Integrate additional data sources beyond photos/captions.
+
+**Features**:
+- Audio analysis (voice emotion detection)
+- Video micro-expression analysis
+- Wearable integration (heart rate, activity)
+- Social media cross-posting analysis
+- Calendar/schedule correlation
+
+**Benefits**:
+- Richer emotional context
+- Triangulation of emotion signals
+- Holistic recovery tracking
+
+**Effort**: 200 hours
+
+---
+
+#### 8. Intervention Recommendation Engine
+**Description**: AI-powered suggestions for mental health interventions.
+
+**Features**:
+- Pattern-based intervention recommendations
+- "You felt better after visiting parks" insights
+- Evidence-based coping strategies
+- Connection to mental health resources
+- Crisis detection and emergency protocols
+
+**Benefits**:
+- Actionable guidance for users
+- Bridge between analysis and intervention
+- Potential life-saving crisis support
+
+**Effort**: 150 hours
+
+---
+
+#### 9. Research Platform & Data Sharing
+**Description**: Enable academic research while protecting privacy.
+
+**Features**:
+- De-identified data export for researchers
+- Federated learning across institutions
+- Open dataset publication (with consent)
+- Replication tools for published studies
+- IRB-compliant data access workflows
+
+**Benefits**:
+- Accelerate mental health research
+- Validate findings across populations
+- Build evidence base for digital therapeutics
+
+**Effort**: 120 hours
+
+---
+
+## Technology Evolution
+
+### Current Stack
+- **Backend**: Python Flask
+- **Database**: MongoDB
+- **ML**: Hugging Face Transformers, scikit-learn
+- **Frontend**: Jinja2 templates, vanilla JavaScript
+- **Visualization**: Leaflet.js (maps)
+
+### Planned Upgrades
+
+**Near-term**:
+- **Frontend**: Migrate to React or Vue.js for richer interactivity
+- **API**: GraphQL for flexible data querying
+- **Caching**: Redis for proximity score caching
+- **Task Queue**: Celery for async clustering jobs
+
+**Long-term**:
+- **Database**: TimescaleDB for better time-series performance
+- **ML Ops**: MLflow for model versioning and deployment
+- **Monitoring**: Prometheus + Grafana for production observability
+- **Scaling**: Kubernetes for horizontal scaling
+
+---
+
+## Community & Ecosystem
+
+### Open Source Growth
+- **Contributors**: Attract additional GSoC students for future summers
+- **Plugin System**: Allow third-party emotion analysis models
+- **API Clients**: Official Python/JavaScript client libraries
+- **Documentation**: Sphinx-generated API docs, video tutorials
+
+### Integration Partnerships
+- **Beehive**: Deeper integration with photo storytelling
+- **Mental Health Apps**: Partnerships with existing platforms
+- **Wearable Devices**: Official integrations with Fitbit, Apple Health
+- **EHR Systems**: FHIR-compliant data export for clinical records
+
+### Research Collaborations
+- **University Partnerships**: Pilot studies with psychology departments
+- **Funding**: Grants for large-scale clinical trials
+- **Publications**: Research papers on location-emotion findings
+- **Conferences**: Presentations at mental health informatics conferences
+
+---
+
+## Timeline Overview
+
+```
+2026
+│
+├─ Feb-Apr: Pre-GSoC Contributions (18 PRs)
+├─ May-Aug: GSoC Implementation (350h)
+├─ Sep-Dec: Production Deployment + Short-term Enhancements
+│
+2027
+│
+├─ Q1-Q2: Medium-term Features (Predictive Modeling, Temporal Mining)
+├─ Q3-Q4: Long-term Features (Multi-modal Analysis)
+│
+2028+
+│
+└─ Research Platform, Intervention Engine, Ecosystem Growth
+```
+
+---
+
+## Success Metrics (Long-term)
+
+### Usage Metrics
+- **Users**: 10,000+ active users by 2027
+- **Photos Analyzed**: 1M+ photos with location data
+- **Research Studies**: 10+ published papers using DREAMS data
+
+### Impact Metrics
+- **Recovery Outcomes**: Demonstrated improvement in recovery trajectories
+- **Clinical Adoption**: 50+ clinicians actively using platform
+- **Community Engagement**: Active open-source community (100+ stars)
+
+### Technical Metrics
+- **Performance**: 99.9% uptime, < 2s average response time
+- **Scalability**: Support 100k+ concurrent users
+- **Accuracy**: Emotion prediction accuracy > 0.75
+
+---
+
+**Version**: 1.0  
+**Last Updated**: February 3, 2026  
+**Author**: Krishan (GSoC 2026 Contributor)  
+**Status**: Living document - updated quarterly
diff --git a/docs/risk_analysis.md b/docs/risk_analysis.md
new file mode 100644
index 0000000..5ec8d7b
--- /dev/null
+++ b/docs/risk_analysis.md
@@ -0,0 +1,404 @@
+# Risk Analysis & Mitigation Strategies
+
+## Overview
+
+This document identifies potential risks to the DREAMS location-proximity GSoC project and outlines comprehensive mitigation strategies to ensure successful completion within the 350-hour timeframe.
+
+---
+
+## Risk Matrix
+
+| Risk ID | Risk | Probability | Impact | Severity | Mitigation Priority |
+|---------|------|-------------|--------|----------|-------------------|
+| R1 | EXIF data unavailable in most photos | High | High | **Critical** | 1 |
+| R2 | Multi-dimensional proximity doesn't improve results | Medium | High | **High** | 2 |
+| R3 | Integration conflicts with existing code | Medium | Medium | **Medium** | 3 |
+| R4 | Performance issues with large datasets | Medium | High | **High** | 4 |
+| R5 | Scope creep beyond 350 hours | Medium | High | **High** | 5 |
+| R6 | MongoDB schema changes break existing features | Low | High | **Medium** | 6 |
+| R7 | User study recruitment difficulties | Medium | Low | **Low** | 7 |
+| R8 | Clustering produces meaningless results | Low | Medium | **Low** | 8 |
+| R9 | Mentor availability constraints | Low | Medium | **Low** | 9 |
+| R10 | Technical dependencies (libraries, APIs) fail | Low | Medium | **Low** | 10 |
+
+**Severity Calculation**: Probability × Impact
+
+---
+
+## Detailed Risk Analysis & Mitigation
+
+### R1: EXIF Data Unavailable in Most Photos
+**Probability**: High (70%)  
+**Impact**: High  
+**Description**: Users may upload photos without GPS metadata (social media downloads, privacy-stripped images, scanned photos).
+
+#### Mitigation Strategies
+
+**Primary**: Fallback to manual location entry
+```python
+if location_data.get('accuracy') == 'none':
+    # Prompt user for manual location
+    return {"requires_manual_location": True}
+```
+
+**Secondary**: Place inference from caption/keywords
+```python
+# Extract location names from caption
+location_mentions = extract_place_names(caption)
+# Use geocoding API to get coordinates
+coords = geocode(location_mentions[0])
+```
+
+**Tertiary**: Use IP-based geolocation as rough estimate
+```python
+# For logged-in users, approximate from IP
+approx_location = geolocate_ip(request.remote_addr)
+```
+
+**Validation**: Track percentage of photos with GPS in test data. If < 30%, prioritize fallback mechanisms.
+
+**Timeline Impact**: +5 hours for robust fallback implementation
+
+---
+
+### R2: Multi-Dimensional Proximity Doesn't Improve Results
+**Probability**: Medium (40%)  
+**Impact**: High  
+**Description**: Ablation study may show that additional dimensions (linguistic, cultural) don't significantly improve clustering or emotion prediction over geographic distance alone.
+
+#### Mitigation Strategies
+
+**Primary**: Rigorous ablation study early (Week 8)
+- Run all 7 experimental conditions
+- If multi-dimensional doesn't outperform, pivot to geo + categorical only
+- Document findings as research contribution (negative results are valuable)
+
+**Secondary**: Adaptive weighting
+```python
+# Learn optimal weights from data
+weights = optimize_weights(validation_set)
+```
+
+**Tertiary**: Focus on interpretability over performance
+- Even if metrics are similar, multi-dimensional may be more interpretable
+- User study can validate semantic meaningfulness
+
+**Success Criteria Adjustment**: If multi-dimensional < 5% better than geo+categorical, simplify to two dimensions.
+
+**Timeline Impact**: None (ablation already planned)
+
+---
+
+### R3: Integration Conflicts with Existing Code
+**Probability**: Medium (50%)  
+**Impact**: Medium  
+**Description**: Extending ingestion pipeline and dashboard may conflict with ongoing development or existing functionality.
+
+#### Mitigation Strategies
+
+**Primary**: Regular communication with mentors
+- Weekly check-ins on any parallel development
+- Review PRs in main branch before integration
+
+**Secondary**: Modular design with clear interfaces
+```python
+# Use dependency injection for easy testing
+class LocationProximityService:
+    def __init__(self, db_client, exif_extractor):
+        self.db = db_client
+        self.exif = exif_extractor
+```
+
+**Tertiary**: Feature flags for gradual rollout
+```python
+if app.config.get('ENABLE_LOCATION_PROXIMITY'):
+    # New functionality
+```
+
+**Validation**: Integration tests run against latest main branch weekly.
+
+**Timeline Impact**: +10 hours for conflict resolution (already budgeted in Phase 2)
+
+---
+
+### R4: Performance Issues with Large Datasets
+**Probability**: Medium (40%)  
+**Impact**: High  
+**Description**: Proximity calculations for 1000+ locations may exceed 3-second upload target.
+
+#### Mitigation Strategies
+
+**Primary**: Optimization techniques
+- **Spatial indexing**: MongoDB geospatial queries for nearby locations
+- **Caching**: Cache proximity scores between location pairs
+- **Batch processing**: Compute proximity matrix in background task
+- **Approximate algorithms**: Use locality-sensitive hashing for large-scale
+
+**Secondary**: Performance benchmarks early
+```python
+@pytest.mark.benchmark
+def test_proximity_performance():
+    """Ensure proximity calculation < 100ms for 100 locations."""
+    start = time.time()
+    compute_proximity_matrix(100_locations)
+    assert time.time() - start < 0.1
+```
+
+**Tertiary**: Incremental computation
+```python
+# Only compute proximity for new location vs. existing
+# Don't recompute entire matrix on each upload
+```
+
+**Success Criteria**: If upload > 3s with 100 locations, move clustering to async background job.
+
+**Timeline Impact**: +8 hours for optimization (included in Week 6-7)
+
+---
+
+### R5: Scope Creep Beyond 350 Hours
+**Probability**: Medium (50%)  
+**Impact**: High  
+**Description**: Feature requests or perfectionism may expand scope beyond planned milestones.
+
+#### Mitigation Strategies
+
+**Primary**: Strict scope management
+- **MVP focus**: Core features only (proximity, clustering, hotspots)
+- **Future work list**: Document "nice-to-haves" for post-GSoC
+- **Weekly hour tracking**: Monitor actual vs. planned hours
+
+**Secondary**: Ruthless prioritization
+```
+P0 (Must-have): Basic proximity, clustering, integration
+P1 (Should-have): Dashboard visualization, ablation study
+P2 (Nice-to-have): Advanced analytics, real-time updates
+P3 (Future): Cross-user analysis, ML predictions
+```
+
+**Tertiary**: Timeboxing
+- Each task has maximum hour allocation
+- If exceeded, move to "polish" phase or defer
+
+**Validation**: If cumulative hours > planned by 10%, cut P2 features.
+
+**Timeline Impact**: None (proactive management)
+
+---
+
+### R6: MongoDB Schema Changes Break Existing Features
+**Probability**: Low (20%)  
+**Impact**: High  
+**Description**: Adding new collections or fields may inadvertently break existing queries or functionality.
+
+#### Mitigation Strategies
+
+**Primary**: Backward compatibility
+```python
+# Add fields, don't modify existing ones
+post_doc = {
+    # ... existing fields unchanged
+    'location': location_data  # NEW, optional
+}
+```
+
+**Secondary**: Comprehensive testing
+- Run full existing test suite before/after schema changes
+- Integration tests validate old functionality still works
+
+**Tertiary**: Database migrations
+```python
+# Migration script to add new fields safely
+def migrate_add_location_field():
+    db.posts.update_many(
+        {'location': {'$exists': False}},
+        {'$set': {'location': {'accuracy': 'none'}}}
+    )
+```
+
+**Validation**: All existing tests pass after schema extension.
+
+**Timeline Impact**: +4 hours for careful migration (budgeted)
+
+---
+
+### R7: User Study Recruitment Difficulties
+**Probability**: Medium (40%)  
+**Impact**: Low  
+**Description**: May struggle to recruit 10-15 mental health researchers for user study in August.
+
+#### Mitigation Strategies
+
+**Primary**: Early recruitment
+- Start outreach in Week 8 (2 weeks before user study)
+- Leverage mentors' professional networks
+- Offer small incentive (e.g., $25 Amazon gift card)
+
+**Secondary**: Alternative participants
+- PhD students in clinical psychology
+- Recovery support group facilitators
+- DREAMS/Beehive existing community members
+
+**Tertiary**: Internal validation
+- If < 5 external participants, conduct internal review with mentors
+- Document as "expert evaluation" instead of "user study"
+
+**Success Criteria**: Minimum 5 participants provides sufficient qualitative feedback.
+
+**Timeline Impact**: None (user study is enhancement, not blocker)
+
+---
+
+### R8: Clustering Produces Meaningless Results
+**Probability**: Low (25%)  
+**Impact**: Medium  
+**Description**: DBSCAN may produce many outliers or fail to find coherent clusters with real data.
+
+#### Mitigation Strategies
+
+**Primary**: Adaptive parameters
+```python
+# Automatically tune eps and min_samples
+from sklearn.model_selection import GridSearchCV
+best_params = grid_search_dbscan(proximity_matrix)
+```
+
+**Secondary**: Alternative algorithms
+- Try HDBSCAN (hierarchical DBSCAN) for adaptive density
+- Try Agglomerative Clustering with proximity distance matrix
+- Ensemble of multiple clustering methods
+
+**Tertiary**: Fallback to simpler grouping
+```python
+# If clustering fails, fall back to place-type grouping
+if silhouette_score < 0.3:
+    # Just group by place_type
+    clusters = group_by_place_type(locations)
+```
+
+**Validation**: Synthetic dataset should always produce 3 clean clusters.
+
+**Timeline Impact**: +6 hours for parameter tuning (budgeted)
+
+---
+
+### R9: Mentor Availability Constraints
+**Probability**: Low (20%)  
+**Impact**: Medium  
+**Description**: Mentors may have limited availability during summer for weekly meetings.
+
+#### Mitigation Strategies
+
+**Primary**: Asynchronous communication
+- Detailed weekly progress reports via email/GitHub discussions
+- Use project board (GitHub Projects) for transparency
+- Record demo videos for async review
+
+**Secondary**: Flexible meeting schedule
+- Schedule meetings 2 weeks in advance
+- Offer multiple time slot options
+- Accept shorter 30-min check-ins if needed
+
+**Tertiary**: Self-sufficiency
+- Make decisions independently when appropriate
+- Document rationale for mentor review later
+- Escalate only blockers that require immediate input
+
+**Success Criteria**: Minimum 1 mentor interaction per week (meeting or detailed async feedback).
+
+**Timeline Impact**: None
+
+---
+
+### R10: Technical Dependencies Fail
+**Probability**: Low (15%)  
+**Impact**: Medium  
+**Description**: External libraries (scikit-learn, exifread) or services (Google Places API) may have issues.
+
+#### Mitigation Strategies
+
+**Primary**: Pin dependency versions
+```txt
+# requirements.txt
+scikit-learn==1.4.0
+exifread==3.0.0
+```
+
+**Secondary**: Fallback implementations
+```python
+# If exifread fails, use Pillow
+try:
+    from exifread import process_file
+except ImportError:
+    # Use Pillow fallback
+    from PIL import Image
+```
+
+**Tertiary**: No external API dependencies for MVP
+- Defer Google Places integration to future work
+- Core functionality works offline with synthetic place types
+
+**Validation**: Test in clean environment before each phase.
+
+**Timeline Impact**: None (good practice)
+
+---
+
+## Risk Monitoring & Response Plan
+
+### Weekly Risk Review
+Every mentor meeting, review:
+1. Have any risks materialized?
+2. Has probability/impact changed for any risk?
+3. Are mitigation strategies working?
+
+### Escalation Criteria
+Escalate to mentors immediately if:
+- Any critical risk materializes
+- Cumulative hours > 10% over plan
+- Core functionality blocker arises
+
+### Risk Log
+Maintain `docs/risk_log.md` with:
+- Date risk identified
+- Mitigation actions taken
+- Current status
+- Lessons learned
+
+---
+
+## Contingency Plans by Phase
+
+### Phase 1 Contingency
+**If**: Core implementation takes 140h instead of 120h  
+**Then**: Reduce dashboard polish in Phase 2 (cut 20h)
+
+### Phase 2 Contingency
+**If**: Integration issues consume extra time  
+**Then**: Defer advanced visualizations, focus on basic dashboard
+
+### Phase 3 Contingency
+**If**: Evaluation reveals major issues  
+**Then**: Allocate Final Week hours to fixes instead of documentation
+
+---
+
+## Success Probability Assessment
+
+Given mitigation strategies:
+
+| Outcome | Probability |
+|---------|-------------|
+| **Complete Success** (All deliverables, on time) | 70% |
+| **Partial Success** (Core features, minor delays) | 25% |
+| **Significant Issues** (Major delays or missing features) | 5% |
+
+**Overall Project Risk Level**: **LOW-MEDIUM**
+
+With proactive risk management, rigorous testing, and mentor collaboration, this project has a high likelihood of successful completion within the 350-hour GSoC timeframe.
+
+---
+
+**Version**: 1.0  
+**Last Updated**: February 3, 2026  
+**Author**: Krishan (GSoC 2026 Contributor)
diff --git a/dream-integration/app/templates/index.html b/dream-integration/app/templates/index.html
index 6e5a54e..f111e69 100644
--- a/dream-integration/app/templates/index.html
+++ b/dream-integration/app/templates/index.html
@@ -70,7 +70,7 @@ <h2>Description</h2>
     <!-- EMOTION ANALYSIS RESULTS -->
     {% if text_scores or image_scores %}
     <div class="analysis-results">
-      <h2>📊 Enhanced Emotion Analysis Results</h2>
+      <h2>Enhanced Emotion Analysis Results</h2>
       
       <div id="debugInfo">
         <strong>Debug Information:</strong><br>
@@ -82,10 +82,10 @@ <h2>📊 Enhanced Emotion Analysis Results</h2>
       <!-- Chart Type Controls -->
       <div class="chart-controls">
         <button class="chart-btn active" onclick="switchChart('radar')" id="radar-btn">
-          🎯 Radar Chart
+          Radar Chart
         </button>
         <button class="chart-btn" onclick="switchChart('bar')" id="bar-btn">
-          📊 Bar Chart
+          Bar Chart
         </button>
       </div>
 
@@ -108,7 +108,7 @@ <h2>📊 Enhanced Emotion Analysis Results</h2>
       </div>
 
       <div class="zoom-hint">
-        💡 Tip: Use Ctrl+Scroll to zoom charts for better detail viewing
+        Tip: Use Ctrl+Scroll to zoom charts for better detail viewing
       </div>
 
       <!-- EMOTION CARDS WITH PROGRESS BARS -->
diff --git a/dreamsApp/app/dashboard/main.py b/dreamsApp/app/dashboard/main.py
index 2231508..1897fa7 100644
--- a/dreamsApp/app/dashboard/main.py
+++ b/dreamsApp/app/dashboard/main.py
@@ -51,7 +51,7 @@ def profile(target):
     df["rolling_avg"] = df["score"].rolling(window=5, min_periods=1).mean()
     df["ema_score"] = df["score"].ewm(span=5, adjust=False).mean()
 
-    # 📈 Create user-friendly visual
+    # Create user-friendly visual
     plt.figure(figsize=(12, 6))
 
     plt.plot(df["timestamp"], df["cumulative_score"],
diff --git a/dreamsApp/docs/data-model.md b/dreamsApp/docs/data-model.md
index b99d60f..b58dfdd 100644
--- a/dreamsApp/docs/data-model.md
+++ b/dreamsApp/docs/data-model.md
@@ -93,10 +93,119 @@ Each theme entry includes:
 
 ---
 
+---
+
+## 5. Collection: `location_analysis`
+
+Stores location-proximity analysis results, clusters, and emotional hotspots per user.
+
+| Field        | Type           | Description                                  |
+|--------------|----------------|----------------------------------------------|
+| `_id`        | ObjectId       | Document ID                                  |
+| `user_id`    | string         | Associated user                              |
+| `locations`  | array<object>  | All locations with visit history and emotions|
+| `clusters`   | array<object>  | Semantic clusters of similar locations       |
+| `hotspots`   | array<object>  | Emotional hotspots (consistent emotions)     |
+| `updated_at` | datetime       | Last update timestamp                        |
+
+**Location Entry Example:**
+```json
+{
+  "id": "loc_001",
+  "name": "St. Mary's Church",
+  "coordinates": {"lat": 61.2167, "lon": -149.8944},
+  "place_type": "church",
+  "language": "english",
+  "cultural_tags": ["catholic", "christian", "traditional"],
+  "visits": [
+    {
+      "timestamp": "2024-01-21T10:00:00Z",
+      "post_id": "...",
+      "sentiment": "positive",
+      "score": 0.88
+    }
+  ],
+  "emotion_profile": {
+    "positive": 0.80,
+    "neutral": 0.15,
+    "negative": 0.05
+  }
+}
+```
+
+**Cluster Example:**
+```json
+{
+  "cluster_id": 0,
+  "label": "Religious Places",
+  "members": ["loc_001", "loc_002", "loc_003"],
+  "centroid": {"lat": 61.2186, "lon": -149.8870},
+  "emotion_distribution": {
+    "positive": 0.82,
+    "neutral": 0.12,
+    "negative": 0.06
+  },
+  "place_types": ["church", "church", "church"],
+  "created_at": "2024-02-01T00:00:00Z"
+}
+```
+
+**Hotspot Example:**
+```json
+{
+  "location_id": "loc_001",
+  "name": "St. Mary's Church",
+  "sentiment": "positive",
+  "confidence": 0.80,
+  "visit_count": 5,
+  "coordinates": {"lat": 61.2167, "lon": -149.8944}
+}
+```
+
+---
+
+## 6. Collection: `emotion_location_entries`
+
+Stores individual emotion-location mappings for fine-grained temporal analysis.
+
+| Field         | Type     | Description                              |
+|---------------|----------|------------------------------------------|
+| `_id`         | ObjectId | Document ID                              |
+| `user_id`     | string   | Associated user                          |
+| `location_id` | string   | Reference to location in location_analysis|
+| `post_id`     | ObjectId | Reference to post                        |
+| `sentiment`   | string   | Emotion label (positive/neutral/negative)|
+| `score`       | float    | Sentiment confidence (0-1)               |
+| `timestamp`   | datetime | When emotion was recorded                |
+| `place_type`  | string   | Type of place (church, hospital, park)   |
+| `coordinates` | object   | GPS coordinates                          |
+
+**Example:**
+```json
+{
+  "user_id": "user_001",
+  "location_id": "loc_001",
+  "post_id": ObjectId("..."),
+  "sentiment": "positive",
+  "score": 0.88,
+  "timestamp": "2024-01-21T10:00:00Z",
+  "place_type": "church",
+  "coordinates": {"lat": 61.2167, "lon": -149.8944}
+}
+```
+
+This collection enables:
+- Temporal emotion trend analysis at specific locations
+- Cross-location emotion pattern detection
+- Place-type emotion aggregation
+
+---
+
 ##  Access Control
 
 - Only the `users` collection is authenticated via Flask-Login.
 - All other collections are accessed programmatically by the backend and admin panel.
+- Location data is pseudonymized with user IDs to protect privacy.
 
 ---
 
diff --git a/dreamsApp/exif_extractor.py b/dreamsApp/exif_extractor.py
new file mode 100644
index 0000000..132c589
--- /dev/null
+++ b/dreamsApp/exif_extractor.py
@@ -0,0 +1,172 @@
+"""EXIF metadata extraction module for photo analysis."""
+
+import exifread
+from PIL import Image
+from PIL.ExifTags import TAGS, GPSTAGS
+from datetime import datetime
+import logging
+
+logger = logging.getLogger(__name__)
+
+class EXIFExtractor:
+    """Extract and process EXIF metadata from images."""
+    
+    def extract_metadata(self, image_path):
+        """Extract metadata using fallback strategy."""
+        try:
+            return self._extract_exifread(image_path)
+        except Exception as e:
+            logger.warning(f"exifread failed: {e}, trying Pillow")
+            try:
+                return self._extract_pillow(image_path)
+            except Exception as e2:
+                logger.error(f"Both extractors failed: {e2}")
+                return self._empty_metadata()
+    
+    def _extract_exifread(self, image_path):
+        """Extract using exifread library."""
+        with open(image_path, 'rb') as f:
+            tags = exifread.process_file(f)
+        
+        return {
+            "timestamp": self._parse_timestamp(tags),
+            "location": self._parse_gps_exifread(tags),
+            "camera": self._parse_camera_exifread(tags),
+            "processing": {"exif_source": "exifread"}
+        }
+    
+    def _extract_pillow(self, image_path):
+        """Extract using Pillow as fallback."""
+        image = Image.open(image_path)
+        exif = image._getexif()
+        
+        if not exif:
+            return self._empty_metadata()
+        
+        return {
+            "timestamp": self._parse_timestamp_pillow(exif),
+            "location": self._parse_gps_pillow(exif),
+            "camera": self._parse_camera_pillow(exif),
+            "processing": {"exif_source": "pillow"}
+        }
+    
+    def _parse_gps_exifread(self, tags):
+        """Parse GPS coordinates from exifread tags."""
+        lat = self._get_gps_coordinate(tags, 'GPS GPSLatitude', 'GPS GPSLatitudeRef')
+        lon = self._get_gps_coordinate(tags, 'GPS GPSLongitude', 'GPS GPSLongitudeRef')
+        
+        if lat and lon:
+            return {"lat": lat, "lon": lon, "accuracy": "high"}
+        return {"accuracy": "none"}
+    
+    def _get_gps_coordinate(self, tags, coord_key, ref_key):
+        """Convert GPS coordinate to decimal degrees."""
+        coord = tags.get(coord_key)
+        ref = tags.get(ref_key)
+        
+        if not coord or not ref:
+            return None
+        
+        coord_str = str(coord)
+        ref_str = str(ref)
+        
+        # Parse coordinate string format
+        parts = coord_str.replace('[', '').replace(']', '').split(', ')
+        if len(parts) != 3:
+            return None
+        
+        try:
+            degrees = float(parts[0])
+            minutes = float(parts[1])
+            seconds = float(parts[2])
+            
+            decimal = degrees + (minutes / 60.0) + (seconds / 3600.0)
+            
+            if ref_str in ['S', 'W']:
+                decimal = -decimal
+            
+            return decimal
+        except (ValueError, IndexError):
+            return None
+    
+    def _parse_timestamp(self, tags):
+        """Parse timestamp from exifread tags."""
+        for key in ['EXIF DateTime', 'Image DateTime', 'EXIF DateTimeOriginal']:
+            if key in tags:
+                try:
+                    dt_str = str(tags[key])
+                    return datetime.strptime(dt_str, '%Y:%m:%d %H:%M:%S').isoformat()
+                except ValueError:
+                    continue
+        return None
+    
+    def _parse_camera_exifread(self, tags):
+        """Parse camera info from exifread tags."""
+        return {
+            "make": str(tags.get('Image Make', '')),
+            "model": str(tags.get('Image Model', ''))
+        }
+    
+    def _parse_timestamp_pillow(self, exif):
+        """Parse timestamp from Pillow exif data."""
+        for tag in [36867, 306, 36868]:  # DateTimeOriginal, DateTime, DateTimeDigitized
+            if tag in exif:
+                try:
+                    dt_str = exif[tag]
+                    if isinstance(dt_str, bytes):
+                        dt_str = dt_str.decode('utf-8')
+                    return datetime.strptime(dt_str, '%Y:%m:%d %H:%M:%S').isoformat()
+                except (ValueError, AttributeError):
+                    continue
+        return None
+
+    def _parse_gps_pillow(self, exif):
+        """Parse GPS coordinates from Pillow exif data."""
+        if 'GPSInfo' not in exif:
+            return {"accuracy": "none"}
+
+        gps_info = exif['GPSInfo']
+
+        def get_coordinate(coord, ref):
+            if coord not in gps_info or ref not in gps_info:
+                return None
+            coord_vals = gps_info[coord]
+            ref_val = gps_info[ref]
+
+            try:
+                degrees = coord_vals[0][0] / coord_vals[0][1]
+                minutes = coord_vals[1][0] / coord_vals[1][1]
+                seconds = coord_vals[2][0] / coord_vals[2][1]
+
+                decimal = degrees + (minutes / 60.0) + (seconds / 3600.0)
+
+                if ref_val in [b'S', b'W']:
+                    decimal = -decimal
+
+                return decimal
+            except (IndexError, TypeError, ZeroDivisionError):
+                return None
+
+        lat = get_coordinate(2, 1)  # GPSLatitude, GPSLatitudeRef
+        lon = get_coordinate(4, 3)  # GPSLongitude, GPSLongitudeRef
+
+        if lat and lon:
+            return {"lat": lat, "lon": lon, "accuracy": "high"}
+        return {"accuracy": "none"}
+
+    def _parse_camera_pillow(self, exif):
+        """Parse camera info from Pillow exif data."""
+        return {
+            "make": exif.get(271, "").decode('utf-8') if isinstance(exif.get(271), bytes) else str(exif.get(271, "")),
+            "model": exif.get(272, "").decode('utf-8') if isinstance(exif.get(272), bytes) else str(exif.get(272, ""))
+        }
+
+    def _empty_metadata(self):
+        """Return empty metadata structure."""
+        return {
+            "timestamp": None,
+            "location": {"accuracy": "none"},
+            "camera": {"make": "", "model": ""},
+            "processing": {"exif_source": "none"}
+        }
+    
diff --git a/dreamsApp/location_proximity.py b/dreamsApp/location_proximity.py
index 240515a..12340f3 100644
--- a/dreamsApp/location_proximity.py
+++ b/dreamsApp/location_proximity.py
@@ -1,6 +1,11 @@
-"""Location proximity analysis module for photo clustering."""
+"""Location proximity analysis module for photo clustering.
+
+Builds upon existing EXIF extraction (PR #77) and emotion proximity (PR #70)
+to add multi-dimensional location-based clustering and analysis.
+"""
 
 from typing import List, Dict, Optional, Tuple, TypedDict
+from dreamsApp.exif_extractor import EXIFExtractor
 
 
 class Location(TypedDict):
@@ -18,13 +23,15 @@ class ProximityResult(TypedDict):
 def extract_location(metadata: Dict) -> Optional[Location]:
     """Extract location data from photo metadata.
     
+    Integrates with existing EXIFExtractor from PR #77.
+    
     Args:
         metadata: Photo metadata dictionary containing location information
         
     Returns:
         Dictionary with lat/lon coordinates and accuracy, or None if no location data
     """
-    raise NotImplementedError
+    raise NotImplementedError  # TODO: Use EXIFExtractor for actual implementation
 
 
 def compute_proximity(location1: Location, location2: Location, threshold_meters: float) -> ProximityResult:
diff --git a/location_proximity/README.md b/location_proximity/README.md
index 242e9f6..de24e01 100644
--- a/location_proximity/README.md
+++ b/location_proximity/README.md
@@ -4,6 +4,11 @@
 
 This module extends DREAMS to analyze how semantically similar locations (not just geographically close ones) influence emotional patterns in recovery journeys.
 
+**Building Upon**:
+- **PR #77** (kunal-595): Uses existing `dreamsApp/exif_extractor.py` for GPS extraction
+- **PR #70** (AnvayKharb): Integrates with `analytics/emotion_proximity.py` for emotion timelines
+- **This Module**: Adds multi-dimensional spatial proximity (geographic + categorical + linguistic + cultural)
+
 ---
 
 ## Core Concept
@@ -157,8 +162,8 @@ St. Mary's Church ↔ Alaska Native Medical Center : 0.120
 Holy Trinity Church ↔ Providence Hospital : 0.115
 Alaska Native Medical Center ↔ Providence Hospital : 0.725
 
-✓ Notice: Two churches have high proximity despite different locations
-✓ Notice: Two hospitals cluster together semantically
+Notice: Two churches have high proximity despite different locations
+Notice: Two hospitals cluster together semantically
 ```
 
 ---
diff --git a/plans/pre_gsoc_contribution_plan.md b/plans/pre_gsoc_contribution_plan.md
new file mode 100644
index 0000000..b0e635c
--- /dev/null
+++ b/plans/pre_gsoc_contribution_plan.md
@@ -0,0 +1,197 @@
+# Pre-GSoC Contribution Plan for DREAMS: Multi-Dimensional Location Proximity and Emotion Analysis
+
+## Overview
+
+This pre-GSoC contribution plan outlines 18 pull requests (PRs) spread over 7 weeks, designed to strengthen the DREAMS project's foundation in multi-dimensional location proximity and emotion analysis. The plan focuses on enhancing the Flask backend with MongoDB integration, building upon existing modules like `exif_extractor` and `location_proximity`, while ensuring alignment with the GSoC 2026 proposal.
+
+**Recent Contributions by Other Developers**:
+- **PR #77** (kunal-595): EXIF GPS extraction implemented - we integrate with this
+- **PR #70** (AnvayKharb): Time-aware emotion proximity - we complement this with spatial proximity
+- **PR #79** (anish1206): CHIME mental health framework - our emotion-location work aligns with CHIME dimensions
+
+**Total Duration**: 7 weeks  
+**Total PRs**: 18  
+**Focus Areas**: Architecture (4 PRs), Research (3 PRs), Interfaces (4 PRs), Testing (4 PRs), Proposal Alignment (3 PRs)  
+**Key Technologies**: Python Flask, MongoDB, scikit-learn, Hugging Face Transformers  
+
+---
+
+## Weekly Breakdown
+
+### Week 1: Research Foundation and Architecture Setup
+**Focus**: Establish research base and architectural foundations.
+
+#### PR 1: Research Literature Review Update
+**Description**: Expand the research foundation in `location_proximity/RESEARCH.md` with additional literature on affective geography and semantic similarity. Add references to recent papers on emotion-location associations in mental health recovery.
+**Dependencies**: None
+**Deliverables**:
+- Updated `location_proximity/RESEARCH.md` with 5+ new citations
+- Summary of key findings in `docs/research_summary.md`
+
+#### PR 2: Architecture Documentation Refinement
+**Description**: Refine `ARCHITECTURE.md` to include detailed MongoDB schema designs for location-emotion data storage. Update Mermaid diagrams to reflect MongoDB integration points.
+**Dependencies**: PR 1
+**Deliverables**:
+- Enhanced `ARCHITECTURE.md` with MongoDB-specific sections
+- New schema diagrams for location and emotion collections
+
+#### PR 3: Database Schema Implementation
+**Description**: Implement MongoDB schemas in `dreamsApp/app/models.py` for storing location proximity data and emotion-location mappings. Ensure compatibility with existing post schema.
+**Dependencies**: PR 2
+**Deliverables**:
+- Updated `dreamsApp/app/models.py` with new MongoDB collections
+- Migration scripts for schema updates
+
+### Week 2: Core Location Proximity Enhancements
+**Focus**: Strengthen core proximity calculation modules.
+
+#### PR 4: Enhanced EXIF Location Extraction
+**Status**: **Completed by PR #77** (kunal-595) - EXIF extractor already implemented in `dreamsApp/exif_extractor.py`
+**Description**: ~~Improve `dreamsApp/exif_extractor.py`~~ Integration with existing EXIF extractor to ensure compatibility with multi-dimensional proximity module.
+**Dependencies**: PR 3
+**Deliverables**:
+- ~~Enhanced `dreamsApp/exif_extractor.py` with better error handling~~ Already exists
+- Integration tests with existing `EXIFExtractor` class
+
+#### PR 5: Multi-Dimensional Proximity Calculator Refinement
+**Description**: Refine `location_proximity/proximity_calculator.py` to optimize weighted proximity calculations and add configurable dimension weights.
+**Dependencies**: PR 4
+**Deliverables**:
+- Updated `location_proximity/proximity_calculator.py`
+- Performance benchmarks for proximity calculations
+
+#### PR 6: Emotion-Location Mapper Implementation
+**Description**: Complete implementation of `location_proximity/emotion_location_mapper.py` with methods for temporal emotion trends and hotspot identification.
+**Dependencies**: PR 5
+**Deliverables**:
+- Functional `location_proximity/emotion_location_mapper.py`
+- Integration with sentiment analysis from `dreamsApp/app/utils/sentiment.py`
+
+### Week 3: Semantic Clustering and Interface Development
+**Focus**: Implement clustering algorithms and initial API interfaces.
+
+#### PR 7: Semantic Clustering Enhancements
+**Description**: Enhance `location_proximity/semantic_clustering.py` with improved DBSCAN parameters and add visualization support for clusters.
+**Dependencies**: PR 6
+**Deliverables**:
+- Updated `location_proximity/semantic_clustering.py`
+- Clustering quality metrics implementation
+
+#### PR 8: REST API Endpoints for Location Analysis
+**Description**: Add new API endpoints in `dreamsApp/app/ingestion/routes.py` for proximity calculations and location-emotion queries.
+**Dependencies**: PR 7
+**Deliverables**:
+- New routes in `dreamsApp/app/ingestion/routes.py`
+- API documentation updates
+
+#### PR 9: Dashboard UI Components for Location Proximity
+**Description**: Create new dashboard templates in `dreamsApp/app/templates/dashboard/` for visualizing location proximity patterns.
+**Dependencies**: PR 8
+**Deliverables**:
+- New HTML templates for location analysis
+- Basic JavaScript for map visualizations
+
+### Week 4: Emotion Analysis Integration and Testing
+**Focus**: Integrate emotion analysis and begin comprehensive testing.
+
+#### PR 10: Sentiment Analysis Integration with Locations
+**Description**: Integrate emotion analysis from `dreamsApp/app/utils/sentiment.py` with location data in the ingestion pipeline.
+**Dependencies**: PR 9
+**Deliverables**:
+- Updated ingestion routes with emotion-location mapping
+- Data flow integration in `dreamsApp/app/ingestion/routes.py`
+
+#### PR 11: Unit Tests for Location Proximity Modules
+**Description**: Create comprehensive unit tests for all location proximity components in `tests/test_location_proximity.py`.
+**Dependencies**: PR 10
+**Deliverables**:
+- Complete test suite in `tests/test_location_proximity.py`
+- Test data fixtures for locations and emotions
+
+#### PR 12: Integration Tests for Location-Emotion Pipeline
+**Description**: Develop integration tests covering the full pipeline from image upload to emotion-location analysis.
+**Dependencies**: PR 11
+**Deliverables**:
+- New integration test file `tests/test_location_emotion_integration.py`
+- End-to-end test scenarios
+
+### Week 5: Performance Optimization and Documentation
+**Focus**: Optimize performance and enhance documentation.
+
+#### PR 13: Performance Optimization for Proximity Calculations
+**Description**: Implement caching and batch processing optimizations for proximity calculations in the Flask app.
+**Dependencies**: PR 12
+**Deliverables**:
+- Caching layer in `dreamsApp/app/utils/`
+- Performance improvements documentation
+
+#### PR 14: Comprehensive Documentation Updates
+**Description**: Update all README files and create user guides for location proximity features.
+**Dependencies**: PR 13
+**Deliverables**:
+- Updated `README.md`, `location_proximity/README.md`
+- User guide in `docs/location_proximity_guide.md`
+
+#### PR 15: Demo Script and Example Improvements
+**Description**: Enhance the demo script in `location_proximity/demo.py` with more comprehensive examples and better output formatting.
+**Dependencies**: PR 14
+**Deliverables**:
+- Improved `location_proximity/demo.py`
+- Sample data for demonstrations
+
+### Week 6: Advanced Features and Validation
+**Focus**: Implement advanced features and validation metrics.
+
+#### PR 16: Validation Metrics and Statistical Analysis
+**Description**: Implement validation metrics from the research foundation, including clustering quality and emotion prediction accuracy.
+**Dependencies**: PR 15
+**Deliverables**:
+- New module `location_proximity/validation_metrics.py`
+- Statistical analysis functions
+
+#### PR 17: Cross-User Location-Emotion Analysis
+**Description**: Add features for analyzing location-emotion patterns across multiple users while maintaining privacy.
+**Dependencies**: PR 16
+**Deliverables**:
+- Cross-user analysis functions in `location_proximity/emotion_location_mapper.py`
+- Privacy-preserving aggregation methods
+
+### Week 7: Final Integration and Proposal Alignment
+**Focus**: Finalize integrations and ensure proposal compliance.
+
+#### PR 18: Final Proposal Alignment and Integration Testing
+**Description**: Conduct final review to ensure all contributions align with GSoC 2026 proposal requirements. Perform comprehensive integration testing.
+**Dependencies**: PR 17
+**Deliverables**:
+- Proposal alignment checklist
+- Final integration test results
+- Updated project roadmap
+
+---
+
+## Dependencies and Prerequisites
+
+- **Technical Prerequisites**: Python 3.8+, Flask, MongoDB, scikit-learn, Pillow
+- **Project Knowledge**: Familiarity with DREAMS architecture and existing modules
+- **Testing Environment**: Access to test MongoDB instance and sample image data
+
+## Success Metrics
+
+- All 18 PRs merged successfully
+- 90%+ test coverage for new location proximity code
+- Performance benchmarks meeting requirements
+- Documentation completeness for all new features
+- Alignment with GSoC proposal objectives
+
+## Risk Mitigation
+
+- Weekly code reviews to catch integration issues early
+- Incremental testing to ensure stability
+- Regular alignment checks with project mentors
+- Backup plans for complex PRs with multiple dependencies
+
+---
+
+**Plan Created**: December 2025  
+**Total Estimated Effort**: 18 PRs across 7 weeks  
+**Primary Contributor**: Krishan (Pre-GSoC Contributor)
\ No newline at end of file
diff --git a/tests/test_exif_extraction.py b/tests/test_exif_extraction.py
new file mode 100644
index 0000000..47f10e3
--- /dev/null
+++ b/tests/test_exif_extraction.py
@@ -0,0 +1,70 @@
+"""Tests for EXIF extraction functionality."""
+
+import pytest
+from unittest.mock import patch, mock_open
+from dreamsApp.exif_extractor import EXIFExtractor
+
+class TestEXIFExtractor:
+    
+    def setup_method(self):
+        self.extractor = EXIFExtractor()
+    
+    def test_empty_metadata_structure(self):
+        """Test empty metadata has correct structure."""
+        empty = self.extractor._empty_metadata()
+        
+        assert "timestamp" in empty
+        assert "location" in empty
+        assert "camera" in empty
+        assert "processing" in empty
+        assert empty["location"]["accuracy"] == "none"
+    
+    @patch('exifread.process_file')
+    @patch('builtins.open', new_callable=mock_open)
+    def test_exifread_extraction(self, mock_file, mock_process):
+        """Test successful exifread extraction."""
+        mock_process.return_value = {
+            'EXIF DateTime': type('Tag', (), {'__str__': lambda: '2024:01:15 14:30:00'})(),
+            'GPS GPSLatitude': type('Tag', (), {'__str__': lambda: '[61, 13, 4.68]'})(),
+            'GPS GPSLatitudeRef': type('Tag', (), {'__str__': lambda: 'N'})(),
+            'Image Make': type('Tag', (), {'__str__': lambda: 'Apple'})()
+        }
+        
+        result = self.extractor._extract_exifread('test.jpg')
+        
+        assert result["processing"]["exif_source"] == "exifread"
+        assert result["timestamp"] == "2024-01-15T14:30:00"
+        assert result["camera"]["make"] == "Apple"
+    
+    def test_gps_coordinate_conversion(self):
+        """Test GPS coordinate conversion to decimal."""
+        tags = {
+            'GPS GPSLatitude': type('Tag', (), {'__str__': lambda: '[61, 13, 4.68]'})(),
+            'GPS GPSLatitudeRef': type('Tag', (), {'__str__': lambda: 'N'})()
+        }
+        
+        coord = self.extractor._get_gps_coordinate(tags, 'GPS GPSLatitude', 'GPS GPSLatitudeRef')
+        
+        assert coord is not None
+        assert abs(coord - 61.2180) < 0.001  # Approximate check
+    
+    def test_missing_gps_data(self):
+        """Test handling of missing GPS data."""
+        tags = {}
+        
+        coord = self.extractor._get_gps_coordinate(tags, 'GPS GPSLatitude', 'GPS GPSLatitudeRef')
+        
+        assert coord is None
+    
+    @patch('dreamsApp.exif_extractor.EXIFExtractor._extract_exifread')
+    @patch('dreamsApp.exif_extractor.EXIFExtractor._extract_pillow')
+    def test_fallback_strategy(self, mock_pillow, mock_exifread):
+        """Test fallback from exifread to Pillow."""
+        mock_exifread.side_effect = Exception("exifread failed")
+        mock_pillow.return_value = {"processing": {"exif_source": "pillow"}}
+        
+        result = self.extractor.extract_metadata('test.jpg')
+        
+        assert result["processing"]["exif_source"] == "pillow"
+        mock_exifread.assert_called_once()
+        mock_pillow.assert_called_once()
\ No newline at end of file

From 80837b1fb061c718e905aa069ace8fbbbc37a156 Mon Sep 17 00:00:00 2001
From: Krishan Yadav <kryshan753@gmail.com>
Date: Sun, 8 Feb 2026 12:05:44 +0530
Subject: [PATCH 2/3] Update exif_extractor.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 dreamsApp/exif_extractor.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/dreamsApp/exif_extractor.py b/dreamsApp/exif_extractor.py
index 132c589..69d6cf2 100644
--- a/dreamsApp/exif_extractor.py
+++ b/dreamsApp/exif_extractor.py
@@ -67,26 +67,24 @@ def _get_gps_coordinate(self, tags, coord_key, ref_key):
         if not coord or not ref:
             return None
         
-        coord_str = str(coord)
-        ref_str = str(ref)
-        
-        # Parse coordinate string format
-        parts = coord_str.replace('[', '').replace(']', '').split(', ')
-        if len(parts) != 3:
-            return None
-        
         try:
-            degrees = float(parts[0])
-            minutes = float(parts[1])
-            seconds = float(parts[2])
-            
+            # Directly access the 'values' attribute which contains a list of Ratio objects.
+            # This is more robust than parsing the string representation.
+            if not hasattr(coord, 'values') or len(coord.values) != 3:
+                return None
+
+            degrees = float(coord.values[0])
+            minutes = float(coord.values[1])
+            seconds = float(coord.values[2])
+
             decimal = degrees + (minutes / 60.0) + (seconds / 3600.0)
-            
-            if ref_str in ['S', 'W']:
+
+            if hasattr(ref, 'values') and str(ref.values) in ['S', 'W']:
                 decimal = -decimal
             
             return decimal
-        except (ValueError, IndexError):
+        except (ValueError, IndexError, TypeError) as e:
+            logger.warning(f"Could not parse GPS coordinate: {coord}. Error: {e}")
             return None
     
     def _parse_timestamp(self, tags):

From 9db0ecbb7c850893c183f8ccdde684a34798419a Mon Sep 17 00:00:00 2001
From: Krishan Yadav <kryshan753@gmail.com>
Date: Sun, 8 Feb 2026 12:05:54 +0530
Subject: [PATCH 3/3] Update exif_extractor.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 dreamsApp/exif_extractor.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dreamsApp/exif_extractor.py b/dreamsApp/exif_extractor.py
index 69d6cf2..2593d21 100644
--- a/dreamsApp/exif_extractor.py
+++ b/dreamsApp/exif_extractor.py
@@ -154,9 +154,11 @@ def get_coordinate(coord, ref):
 
     def _parse_camera_pillow(self, exif):
         """Parse camera info from Pillow exif data."""
+        make_val = exif.get(271, "")
+        model_val = exif.get(272, "")
         return {
-            "make": exif.get(271, "").decode('utf-8') if isinstance(exif.get(271), bytes) else str(exif.get(271, "")),
-            "model": exif.get(272, "").decode('utf-8') if isinstance(exif.get(272), bytes) else str(exif.get(272, ""))
+            "make": make_val.decode('utf-8') if isinstance(make_val, bytes) else str(make_val),
+            "model": model_val.decode('utf-8') if isinstance(model_val, bytes) else str(model_val)
         }
 
     def _empty_metadata(self):