Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## 2025-12-24 - Rate Limit Bypass via IP Spoofing
**Vulnerability:** The rate limiting middleware manually parsed `X-Forwarded-For` and `X-Real-IP` headers to determine the client IP. This allowed attackers to spoof their IP address by supplying a fake `X-Forwarded-For` header, bypassing the rate limits.
**Learning:** Manual parsing of proxy headers is dangerous. Applications should rely on the web server or ASGI server (like Uvicorn/Gunicorn) to handle proxy headers securely. Uvicorn, for example, has `--proxy-headers` and `--forwarded-allow-ips` options to trust specific upstream proxies.
**Prevention:**
1. Avoid manual parsing of `X-Forwarded-For` in application code.
2. Use `request.client.host` provided by Starlette/FastAPI, which is populated securely by the ASGI server.
3. Configure the deployment environment (Uvicorn/Nginx) to handle proxy headers and trust only known proxies.
4. If manual parsing is absolutely necessary (e.g. complex multi-proxy setup not supported by server), validate the upstream IP against a strict allowlist of trusted proxies.
28 changes: 12 additions & 16 deletions app/api/rapidapi/redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,12 @@
"""

import time

from fastapi import APIRouter

from app.models.rapidapi_schemas import (
RapidAPIRedactRequest,
RapidAPIRedactResponse,
RedactedItem
)
from app.services.redaction import redact_text, get_entity_score
from app.models.rapidapi_schemas import RapidAPIRedactRequest, RapidAPIRedactResponse, RedactedItem
from app.services.json_processor import process_json_with_mode

from app.services.redaction import redact_text

router = APIRouter(prefix="/v1", tags=["Main API"])

Expand Down Expand Up @@ -103,7 +99,7 @@
)
async def rapidapi_redact(request: RapidAPIRedactRequest) -> RapidAPIRedactResponse:
"""Redact PII entities in the provided text or JSON.

This endpoint is designed for RapidAPI integration and provides:
- Flexible input (text or JSON)
- Flexible redaction modes (mask/placeholder)
Expand All @@ -112,10 +108,10 @@ async def rapidapi_redact(request: RapidAPIRedactRequest) -> RapidAPIRedactRespo
- Processing time measurement
"""
start_time = time.perf_counter()

# Convert entities filter to list of strings if provided
entities_filter = list(request.entities) if request.entities else None

if request.is_json_mode:
# JSON mode
redacted_data, json_entities = process_json_with_mode(
Expand All @@ -124,9 +120,9 @@ async def rapidapi_redact(request: RapidAPIRedactRequest) -> RapidAPIRedactRespo
mode=request.mode,
entities_filter=entities_filter
)

processing_time_ms = (time.perf_counter() - start_time) * 1000

items = [
RedactedItem(
entity_type=e.type,
Expand All @@ -137,7 +133,7 @@ async def rapidapi_redact(request: RapidAPIRedactRequest) -> RapidAPIRedactRespo
)
for e in json_entities
]

return RapidAPIRedactResponse(
redacted_text=None,
redacted_json=redacted_data,
Expand All @@ -152,9 +148,9 @@ async def rapidapi_redact(request: RapidAPIRedactRequest) -> RapidAPIRedactRespo
entities_filter=entities_filter,
mode=request.mode
)

processing_time_ms = (time.perf_counter() - start_time) * 1000

items = [
RedactedItem(
entity_type=item.entity_type,
Expand All @@ -165,7 +161,7 @@ async def rapidapi_redact(request: RapidAPIRedactRequest) -> RapidAPIRedactRespo
)
for item in redacted_items
]

return RapidAPIRedactResponse(
redacted_text=redacted_text_result,
redacted_json=None,
Expand Down
28 changes: 13 additions & 15 deletions app/api/v1/detect.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
"""Detect endpoint - find PII without modifying text or JSON."""

from typing import Union

from fastapi import APIRouter
from fastapi.responses import JSONResponse

from app.models.schemas import (
TextRequest,
UnifiedRequest,
DetectResponse,
DetectJsonResponse,
DetectedEntity,
JsonFieldEntity
DetectJsonResponse,
DetectResponse,
JsonFieldEntity,
UnifiedRequest,
)
from app.services.pii_detector import get_detector
from app.services.json_processor import detect_json

from app.services.pii_detector import get_detector

router = APIRouter(tags=["PII Detection"])

Expand Down Expand Up @@ -80,22 +78,22 @@
For JSON mode, each entity includes a `path` field showing its location (e.g., `"user.name"`).
"""
)
async def detect_pii(request: UnifiedRequest) -> Union[DetectResponse, DetectJsonResponse]:
async def detect_pii(request: UnifiedRequest) -> DetectResponse | DetectJsonResponse:
"""Detect PII entities in the provided text or JSON.

Scans for:
- Email addresses
- Phone numbers (international formats)
- Credit card numbers
- Person names (via NER)

Returns the list of detected entities with their types,
values, and positions.
"""
if request.is_json_mode:
# JSON mode - detect in all string values
_, entities = detect_json(request.json, request.language, request.entities)

json_entities = [
JsonFieldEntity(
path=e.path,
Expand All @@ -106,13 +104,13 @@ async def detect_pii(request: UnifiedRequest) -> Union[DetectResponse, DetectJso
)
for e in entities
]

return DetectJsonResponse(entities=json_entities)
else:
# Text mode - standard detection
detector = get_detector()
detected = detector.detect(request.text, request.language, request.entities)

entities = [
DetectedEntity(
type=entity.type,
Expand All @@ -122,5 +120,5 @@ async def detect_pii(request: UnifiedRequest) -> Union[DetectResponse, DetectJso
)
for entity in detected
]

return DetectResponse(entities=entities)
30 changes: 15 additions & 15 deletions app/api/v1/mask.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""Mask endpoint - replace PII with asterisks in text or JSON."""

from typing import Union

from fastapi import APIRouter

from app.models.schemas import (
UnifiedRequest,
MaskResponse,
MaskJsonResponse,
JsonFieldEntity,
MaskedEntity,
JsonFieldEntity
MaskJsonResponse,
MaskResponse,
UnifiedRequest,
)
from app.services.pii_detector import get_detector
from app.services.masking import mask_text
from app.services.json_processor import mask_json

from app.services.masking import mask_text
from app.services.pii_detector import get_detector

router = APIRouter(tags=["PII Masking"])

Expand Down Expand Up @@ -89,22 +89,22 @@
**Note:** JSON structure is preserved. Only string values are modified.
"""
)
async def mask_pii(request: UnifiedRequest) -> Union[MaskResponse, MaskJsonResponse]:
async def mask_pii(request: UnifiedRequest) -> MaskResponse | MaskJsonResponse:
"""Mask PII entities in the provided text or JSON.

Detects and replaces PII with asterisks:
- Email addresses → ***
- Phone numbers → ***
- Credit card numbers → ***
- Person names → ***

For JSON input, only string values are processed.
The JSON structure is preserved.
"""
if request.is_json_mode:
# JSON mode - mask in all string values
masked_data, entities = mask_json(request.json, request.language, request.entities)

json_entities = [
JsonFieldEntity(
path=e.path,
Expand All @@ -115,15 +115,15 @@ async def mask_pii(request: UnifiedRequest) -> Union[MaskResponse, MaskJsonRespo
)
for e in entities
]

return MaskJsonResponse(json=masked_data, entities=json_entities)
else:
# Text mode - standard masking
detector = get_detector()
detected = detector.detect(request.text, request.language, request.entities)

masked_text, masked_entities = mask_text(request.text, detected)

entities = [
MaskedEntity(
type=entity.type,
Expand All @@ -134,5 +134,5 @@ async def mask_pii(request: UnifiedRequest) -> Union[MaskResponse, MaskJsonRespo
)
for entity in masked_entities
]

return MaskResponse(text=masked_text, entities=entities)
30 changes: 15 additions & 15 deletions app/api/v1/redact.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""Redact endpoint - replace PII with [REDACTED] in text or JSON."""

from typing import Union

from fastapi import APIRouter

from app.models.schemas import (
UnifiedRequest,
MaskResponse,
MaskJsonResponse,
JsonFieldEntity,
MaskedEntity,
JsonFieldEntity
MaskJsonResponse,
MaskResponse,
UnifiedRequest,
)
from app.services.pii_detector import get_detector
from app.services.masking import redact_text
from app.services.json_processor import redact_json

from app.services.masking import redact_text
from app.services.pii_detector import get_detector

router = APIRouter(tags=["PII Redaction"])

Expand Down Expand Up @@ -89,22 +89,22 @@
**Note:** JSON structure is preserved. Only string values are modified.
"""
)
async def redact_pii(request: UnifiedRequest) -> Union[MaskResponse, MaskJsonResponse]:
async def redact_pii(request: UnifiedRequest) -> MaskResponse | MaskJsonResponse:
"""Redact PII entities in the provided text or JSON.

Detects and replaces PII with [REDACTED]:
- Email addresses → [REDACTED]
- Phone numbers → [REDACTED]
- Credit card numbers → [REDACTED]
- Person names → [REDACTED]

For JSON input, only string values are processed.
The JSON structure is preserved.
"""
if request.is_json_mode:
# JSON mode - redact in all string values
redacted_data, entities = redact_json(request.json, request.language, request.entities)

json_entities = [
JsonFieldEntity(
path=e.path,
Expand All @@ -115,15 +115,15 @@ async def redact_pii(request: UnifiedRequest) -> Union[MaskResponse, MaskJsonRes
)
for e in entities
]

return MaskJsonResponse(json=redacted_data, entities=json_entities)
else:
# Text mode - standard redaction
detector = get_detector()
detected = detector.detect(request.text, request.language, request.entities)

redacted_text, redacted_entities = redact_text(request.text, detected)

entities = [
MaskedEntity(
type=entity.type,
Expand All @@ -134,5 +134,5 @@ async def redact_pii(request: UnifiedRequest) -> Union[MaskResponse, MaskJsonRes
)
for entity in redacted_entities
]

return MaskResponse(text=redacted_text, entities=entities)
3 changes: 1 addition & 2 deletions app/api/v1/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

from fastapi import APIRouter

from app.api.v1.detect import router as detect_router
from app.api.v1.mask import router as mask_router
from app.api.v1.redact import router as redact_router
from app.api.v1.detect import router as detect_router


router = APIRouter(tags=["PII Processing"])

Expand Down
16 changes: 8 additions & 8 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,34 @@

class Settings(BaseSettings):
"""Application settings.

All settings can be overridden via environment variables.
"""

model_config = SettingsConfigDict(env_prefix="MASKER_")

# API settings
api_title: str = "Masker API"
api_description: str = "PII Redaction & Text Anonymization API for LLMs and JSON"
api_version: str = "1.0.0"

# Server settings
host: str = "0.0.0.0"
port: int = 8000

# Request limits
max_text_size: int = 32 * 1024 # 32KB for text field
max_payload_size: int = 64 * 1024 # 64KB for entire JSON payload
request_timeout: int = 10 # 10s default timeout for intensive operations

# Supported languages for NER
supported_languages: list[str] = ["en", "ru"]
default_language: str = "en"

# Masking/redaction tokens (configurable defaults)
mask_token: str = "***"
redact_token: str = "[REDACTED]"

# Placeholder templates for typed redaction
placeholder_person: str = "<PERSON>"
placeholder_email: str = "<EMAIL>"
Expand Down
Loading
Loading