Update datamodel.py, main.py - hashing is working

GorkiPower · GorkiPower · commit ea934eff62bc · 2025-12-10T15:10:50.000+01:00
diff --git a/backend/datamodel.py b/backend/datamodel.py
@@ -1,42 +1,56 @@
+# datamodel.py (updated)
 
-from datetime import datetime
-from typing import Optional, Dict, Any
-from pydantic import BaseModel, Field, root_validator
 import hashlib
 import json
+from datetime import datetime
+from typing import Any  # <-- Only Any is needed
+
+from pydantic import BaseModel, Field, model_validator
+
+
+def compute_schema_hash(
+    name: str | None, version: str | None, content: dict[str, Any] | None
+) -> str:
+    """
+    Compute a deterministic SHA-256 hash from the canonical JSON of {name, version, content}.
+    """
+    payload = {"name": name, "version": version, "content": content}
+    canonical = json.dumps(
+        payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
+    )
+    return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
 
 
 class SchemaDefinition(BaseModel):
     id: str = Field(..., description="Unique identifier for the schema (content hash)")
-    name: Optional[str] = Field(
+    name: str | None = Field(
         None,
         description="Human-readable name of the schema",
         min_length=3,
     )
-    version: Optional[str] = Field(None, description="Version of the schema")
-    content: Optional[Dict[str, Any]] = Field(
+    version: str | None = Field(None, description="Version of the schema")
+    content: dict[str, Any] | None = Field(
         None, description="The actual schema content as a dictionary"
     )
-    updated_at: Optional[datetime] = Field(
+    updated_at: datetime | None = Field(
         None, description="Timestamp of the last update"
     )
 
-    @staticmethod
-    def _compute_hash(name: Optional[str], version: Optional[str], content: Optional[Dict[str, Any]]) -> str:
-        """
-        Compute a deterministic SHA-256 hash from the canonical JSON of {name, version, content}.
-        """
-        payload = {"name": name, "version": version, "content": content}
-        canonical = json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
-        return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
-
-    @root_validator(pre=True)
-    def assign_id_from_content(cls, values):
+    @model_validator(mode="before")
+    def assign_id_from_content(cls, data: Any):
         """
+        Pydantic v2: pre-model validator. Mutates the incoming data dict:
         Always (re)compute `id` from the content so it is deterministic and content-addressed.
-        Any provided `id` is ignored to ensure correctness.
+        Any provided `id` from the caller is ignored to ensure correctness.
         """
-        name = values.get("name")
-        version = values.get("version")
-        content = values.get("content")
-        values["id"] = cls._compute_hash(name, version, content)
+        if not isinstance(data, dict):
+            return data
+        name = data.get("name")
+        version = data.get("version")
+        content = data.get("content")
+        data["id"] = compute_schema_hash(name, version, content)
+        return data
+
+    class Config:
+        # Keep Pydantic v1-compatible .dict() behavior if you rely on it (Optional; remove if not needed)
+        populate_by_name = True
diff --git a/backend/main.py b/backend/main.py
@@ -1,14 +1,14 @@
+# main.py (updated)
 
 from datetime import datetime
-from typing import Any, List, Dict, Optional
+from typing import Any  # <-- Only Any is needed
 
 from database import schemas_collection
-from datamodel import SchemaDefinition
+from datamodel import SchemaDefinition, compute_schema_hash
 from fastapi import FastAPI, HTTPException
 from fastapi.encoders import jsonable_encoder
 from fastapi.middleware.cors import CORSMiddleware
 
-
 # ---- FastAPI app & CORS ----
 app = FastAPI()
 app.add_middleware(
@@ -20,53 +20,41 @@
 )
 
 
-def _compute_hash_from_doc(doc: Dict[str, Any]) -> str:
-    """
-    Helper to compute the same SHA-256 hash used by SchemaDefinition
-    without instantiating a model (used for quick normalization paths).
-    """
-    # Import locally to avoid circular imports and to use the same logic
-    from datamodel import SchemaDefinition
-    return SchemaDefinition._compute_hash(
-        doc.get("name"),
-        doc.get("version"),
-        doc.get("content"),
-    )
-
-
 # ---- Routes ----
-@app.get("/schemas", response_model=List[SchemaDefinition])
-async def get_all_schemas() -> List[Dict[str, Any]]:
+@app.get("/schemas", response_model=list[SchemaDefinition])
+async def get_all_schemas() -> list[dict[str, Any]]:
     """
     Retrieve all schemas. Ensures each document has `id` as the content hash
     and a valid `updated_at`. If the stored `id` is missing/mismatched,
     it will be recomputed to keep the collection consistent.
     """
     docs = list(schemas_collection.find())
-    normalized: List[Dict[str, Any]] = []
+    normalized: list[dict[str, Any]] = []
     for d in docs:
         # Compute the correct content hash
-        computed_id = _compute_hash_from_doc(d)
+        computed_id = compute_schema_hash(
+            d.get("name"), d.get("version"), d.get("content")
+        )
         if d.get("id") != computed_id:
             # Heal legacy/mismatched ids
             d["id"] = computed_id
-            # Do not modify updated_at during passive normalization
-            schemas_collection.update_one({"_id": d["_id"]}, {"$set": {"id": computed_id}})
-
+            schemas_collection.update_one(
+                {"_id": d["_id"]}, {"$set": {"id": computed_id}}
+            )
         # Ensure updated_at exists (server-side default)
         if d.get("updated_at") is None:
             d["updated_at"] = datetime.utcnow()
-            schemas_collection.update_one({"_id": d["_id"]}, {"$set": {"updated_at": d["updated_at"]}})
-
+            schemas_collection.update_one(
+                {"_id": d["_id"]}, {"$set": {"updated_at": d["updated_at"]}}
+            )
         # Remove internal MongoDB _id from outward JSON
         d.pop("_id", None)
         normalized.append(d)
-
     return jsonable_encoder(normalized)
 
 
 @app.post("/schemas", response_model=SchemaDefinition)
-async def add_schema(schema: SchemaDefinition) -> Dict[str, Any]:
+async def add_schema(schema: SchemaDefinition) -> dict[str, Any]:
     """
     Add a new schema. `id` is deterministically computed from {name, version, content}.
     Server sets `updated_at`.
@@ -77,31 +65,30 @@ async def add_schema(schema: SchemaDefinition) -> Dict[str, Any]:
         version=schema.version,
         content=schema.content,
         updated_at=None,
-        id="ignored"  # ignored by validator; kept for clarity
+        id="ignored",  # ignored by model_validator; here for clarity
     )
     doc = model.dict()
     doc["updated_at"] = datetime.utcnow()
-
     # Insert as-is (no ObjectId conversions for id)
     schemas_collection.insert_one(doc)
-
     # Return exactly what we stored
     return jsonable_encoder(doc)
 
 
-@app.put("/schemas/{id}", response_model=Dict[str, str])
-async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
+@app.put("/schemas/{id}", response_model=dict[str, str])
+async def update_schema(id: str, update: SchemaDefinition) -> dict[str, str]:
     """
     Update schema by `id`. Because `id` is a content hash, any change in
     {name, version, content} will produce a new `id`. This endpoint:
-      1) Finds the existing document by the current `id`.
-      2) Merges provided fields (ignores `None` and any `id` supplied).
-      3) Recomputes `id` from merged content.
-      4) Replaces the document and returns the (possibly new) `id`.
+    1) Finds the existing document by the current `id`.
+    2) Merges provided fields (ignores `None` and any `id` supplied).
+    3) Recomputes `id` from merged content.
+    4) Replaces the document and returns the (possibly new) `id`.
     """
     if not isinstance(id, str) or not id.strip():
-        raise HTTPException(status_code=400, detail="Invalid schema id (must be a non-empty string)")
-
+        raise HTTPException(
+            status_code=400, detail="Invalid schema id (must be a non-empty string)"
+        )
     existing = schemas_collection.find_one({"id": id})
     if not existing:
         raise HTTPException(status_code=404, detail="Schema not found")
@@ -113,9 +100,9 @@ async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
         "version": payload.get("version", existing.get("version")),
         "content": payload.get("content", existing.get("content")),
     }
-    # Compute new hash-based id using SchemaDefinition logic
-    new_model = SchemaDefinition(**merged, id="ignored", updated_at=None)
-    new_id = new_model.id
+
+    # Compute new hash-based id using the same logic
+    new_id = compute_schema_hash(merged["name"], merged["version"], merged["content"])
 
     # Build final doc to store
     final_doc = {
@@ -135,14 +122,15 @@ async def update_schema(id: str, update: SchemaDefinition) -> Dict[str, str]:
     return {"message": "Schema updated", "id": new_id}
 
 
-@app.delete("/schemas/{id}", response_model=Dict[str, str])
-async def delete_schema(id: str) -> Dict[str, str]:
+@app.delete("/schemas/{id}", response_model=dict[str, str])
+async def delete_schema(id: str) -> dict[str, str]:
     """
     Delete schema by `id`.
     """
     if not isinstance(id, str) or not id.strip():
-        raise HTTPException(status_code=400, detail="Invalid schema id (must be a non-empty string)")
-
+        raise HTTPException(
+            status_code=400, detail="Invalid schema id (must be a non-empty string)"
+        )
     result = schemas_collection.delete_one({"id": id})
     if result.deleted_count == 0:
         raise HTTPException(status_code=404, detail="Schema not found")