Skip to content

Commit ea934ef

Browse files
committed
Update datamodel.py, main.py - hashing is working
1 parent bca088f commit ea934ef

File tree

2 files changed

+71
-69
lines changed

2 files changed

+71
-69
lines changed

backend/datamodel.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,56 @@
1+
# datamodel.py (updated)
12

2-
from datetime import datetime
3-
from typing import Optional, Dict, Any
4-
from pydantic import BaseModel, Field, root_validator
53
import hashlib
64
import json
5+
from datetime import datetime
6+
from typing import Any # <-- Only Any is needed
7+
8+
from pydantic import BaseModel, Field, model_validator
9+
10+
11+
def compute_schema_hash(
12+
name: str | None, version: str | None, content: dict[str, Any] | None
13+
) -> str:
14+
"""
15+
Compute a deterministic SHA-256 hash from the canonical JSON of {name, version, content}.
16+
"""
17+
payload = {"name": name, "version": version, "content": content}
18+
canonical = json.dumps(
19+
payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
20+
)
21+
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
722

823

924
class SchemaDefinition(BaseModel):
1025
id: str = Field(..., description="Unique identifier for the schema (content hash)")
11-
name: Optional[str] = Field(
26+
name: str | None = Field(
1227
None,
1328
description="Human-readable name of the schema",
1429
min_length=3,
1530
)
16-
version: Optional[str] = Field(None, description="Version of the schema")
17-
content: Optional[Dict[str, Any]] = Field(
31+
version: str | None = Field(None, description="Version of the schema")
32+
content: dict[str, Any] | None = Field(
1833
None, description="The actual schema content as a dictionary"
1934
)
20-
updated_at: Optional[datetime] = Field(
35+
updated_at: datetime | None = Field(
2136
None, description="Timestamp of the last update"
2237
)
2338

24-
@staticmethod
25-
def _compute_hash(name: Optional[str], version: Optional[str], content: Optional[Dict[str, Any]]) -> str:
26-
"""
27-
Compute a deterministic SHA-256 hash from the canonical JSON of {name, version, content}.
28-
"""
29-
payload = {"name": name, "version": version, "content": content}
30-
canonical = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
31-
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
32-
33-
@root_validator(pre=True)
34-
def assign_id_from_content(cls, values):
39+
@model_validator(mode="before")
40+
def assign_id_from_content(cls, data: Any):
3541
"""
42+
Pydantic v2: pre-model validator. Mutates the incoming data dict:
3643
Always (re)compute `id` from the content so it is deterministic and content-addressed.
37-
Any provided `id` is ignored to ensure correctness.
44+
Any provided `id` from the caller is ignored to ensure correctness.
3845
"""
39-
name = values.get("name")
40-
version = values.get("version")
41-
content = values.get("content")
42-
values["id"] = cls._compute_hash(name, version, content)
46+
if not isinstance(data, dict):
47+
return data
48+
name = data.get("name")
49+
version = data.get("version")
50+
content = data.get("content")
51+
data["id"] = compute_schema_hash(name, version, content)
52+
return data
53+
54+
class Config:
55+
# Keep Pydantic v1-compatible .dict() behavior if you rely on it (Optional; remove if not needed)
56+
populate_by_name = True

backend/main.py

Lines changed: 34 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1+
# main.py (updated)
12

23
from datetime import datetime
3-
from typing import Any, List, Dict, Optional
4+
from typing import Any # <-- Only Any is needed
45

56
from database import schemas_collection
6-
from datamodel import SchemaDefinition
7+
from datamodel import SchemaDefinition, compute_schema_hash
78
from fastapi import FastAPI, HTTPException
89
from fastapi.encoders import jsonable_encoder
910
from fastapi.middleware.cors import CORSMiddleware
1011

11-
1212
# ---- FastAPI app & CORS ----
1313
app = FastAPI()
1414
app.add_middleware(
@@ -20,53 +20,41 @@
2020
)
2121

2222

23-
def _compute_hash_from_doc(doc: Dict[str, Any]) -> str:
24-
"""
25-
Helper to compute the same SHA-256 hash used by SchemaDefinition
26-
without instantiating a model (used for quick normalization paths).
27-
"""
28-
# Import locally to avoid circular imports and to use the same logic
29-
from datamodel import SchemaDefinition
30-
return SchemaDefinition._compute_hash(
31-
doc.get("name"),
32-
doc.get("version"),
33-
doc.get("content"),
34-
)
35-
36-
3723
# ---- Routes ----
38-
@app.get("/schemas", response_model=List[SchemaDefinition])
39-
async def get_all_schemas() -> List[Dict[str, Any]]:
24+
@app.get("/schemas", response_model=list[SchemaDefinition])
25+
async def get_all_schemas() -> list[dict[str, Any]]:
4026
"""
4127
Retrieve all schemas. Ensures each document has `id` as the content hash
4228
and a valid `updated_at`. If the stored `id` is missing/mismatched,
4329
it will be recomputed to keep the collection consistent.
4430
"""
4531
docs = list(schemas_collection.find())
46-
normalized: List[Dict[str, Any]] = []
32+
normalized: list[dict[str, Any]] = []
4733
for d in docs:
4834
# Compute the correct content hash
49-
computed_id = _compute_hash_from_doc(d)
35+
computed_id = compute_schema_hash(
36+
d.get("name"), d.get("version"), d.get("content")
37+
)
5038
if d.get("id") != computed_id:
5139
# Heal legacy/mismatched ids
5240
d["id"] = computed_id
53-
# Do not modify updated_at during passive normalization
54-
schemas_collection.update_one({"_id": d["_id"]}, {"$set": {"id": computed_id}})
55-
41+
schemas_collection.update_one(
42+
{"_id": d["_id"]}, {"$set": {"id": computed_id}}
43+
)
5644
# Ensure updated_at exists (server-side default)
5745
if d.get("updated_at") is None:
5846
d["updated_at"] = datetime.utcnow()
59-
schemas_collection.update_one({"_id": d["_id"]}, {"$set": {"updated_at": d["updated_at"]}})
60-
47+
schemas_collection.update_one(
48+
{"_id": d["_id"]}, {"$set": {"updated_at": d["updated_at"]}}
49+
)
6150
# Remove internal MongoDB _id from outward JSON
6251
d.pop("_id", None)
6352
normalized.append(d)
64-
6553
return jsonable_encoder(normalized)
6654

6755

6856
@app.post("/schemas", response_model=SchemaDefinition)
69-
async def add_schema(schema: SchemaDefinition) -> Dict[str, Any]:
57+
async def add_schema(schema: SchemaDefinition) -> dict[str, Any]:
7058
"""
7159
Add a new schema. `id` is deterministically computed from {name, version, content}.
7260
Server sets `updated_at`.
@@ -77,31 +65,30 @@ async def add_schema(schema: SchemaDefinition) -> Dict[str, Any]:
7765
version=schema.version,
7866
content=schema.content,
7967
updated_at=None,
80-
id="ignored" # ignored by validator; kept for clarity
68+
id="ignored", # ignored by model_validator; here for clarity
8169
)
8270
doc = model.dict()
8371
doc["updated_at"] = datetime.utcnow()
84-
8572
# Insert as-is (no ObjectId conversions for id)
8673
schemas_collection.insert_one(doc)
87-
8874
# Return exactly what we stored
8975
return jsonable_encoder(doc)
9076

9177

92-
@app.put("/schemas/{id}", response_model=Dict[str, str])
93-
async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
78+
@app.put("/schemas/{id}", response_model=dict[str, str])
79+
async def update_schema(id: str, update: SchemaDefinition) -> dict[str, str]:
9480
"""
9581
Update schema by `id`. Because `id` is a content hash, any change in
9682
{name, version, content} will produce a new `id`. This endpoint:
97-
1) Finds the existing document by the current `id`.
98-
2) Merges provided fields (ignores `None` and any `id` supplied).
99-
3) Recomputes `id` from merged content.
100-
4) Replaces the document and returns the (possibly new) `id`.
83+
1) Finds the existing document by the current `id`.
84+
2) Merges provided fields (ignores `None` and any `id` supplied).
85+
3) Recomputes `id` from merged content.
86+
4) Replaces the document and returns the (possibly new) `id`.
10187
"""
10288
if not isinstance(id, str) or not id.strip():
103-
raise HTTPException(status_code=400, detail="Invalid schema id (must be a non-empty string)")
104-
89+
raise HTTPException(
90+
status_code=400, detail="Invalid schema id (must be a non-empty string)"
91+
)
10592
existing = schemas_collection.find_one({"id": id})
10693
if not existing:
10794
raise HTTPException(status_code=404, detail="Schema not found")
@@ -113,9 +100,9 @@ async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
113100
"version": payload.get("version", existing.get("version")),
114101
"content": payload.get("content", existing.get("content")),
115102
}
116-
# Compute new hash-based id using SchemaDefinition logic
117-
new_model = SchemaDefinition(**merged, id="ignored", updated_at=None)
118-
new_id = new_model.id
103+
104+
# Compute new hash-based id using the same logic
105+
new_id = compute_schema_hash(merged["name"], merged["version"], merged["content"])
119106

120107
# Build final doc to store
121108
final_doc = {
@@ -135,14 +122,15 @@ async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
135122
return {"message": "Schema updated", "id": new_id}
136123

137124

138-
@app.delete("/schemas/{id}", response_model=Dict[str, str])
139-
async def delete_schema(id: str) -> Dict[str, str]:
125+
@app.delete("/schemas/{id}", response_model=dict[str, str])
126+
async def delete_schema(id: str) -> dict[str, str]:
140127
"""
141128
Delete schema by `id`.
142129
"""
143130
if not isinstance(id, str) or not id.strip():
144-
raise HTTPException(status_code=400, detail="Invalid schema id (must be a non-empty string)")
145-
131+
raise HTTPException(
132+
status_code=400, detail="Invalid schema id (must be a non-empty string)"
133+
)
146134
result = schemas_collection.delete_one({"id": id})
147135
if result.deleted_count == 0:
148136
raise HTTPException(status_code=404, detail="Schema not found")

0 commit comments

Comments
 (0)