From 33ec3d2a9f552ea0bd04a90bfa447a52c925c409 Mon Sep 17 00:00:00 2001 From: Victor <194116445+dodekapod@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:35:11 +0100 Subject: [PATCH] file_metadata: JSON -> JSONB, index on file_metadata --- openrag/components/indexer/vectordb/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/openrag/components/indexer/vectordb/utils.py b/openrag/components/indexer/vectordb/utils.py index 99c63c40..7d0ebd0d 100644 --- a/openrag/components/indexer/vectordb/utils.py +++ b/openrag/components/indexer/vectordb/utils.py @@ -5,7 +5,6 @@ from typing import Dict, Optional from sqlalchemy import ( - JSON, Boolean, CheckConstraint, Column, @@ -17,6 +16,9 @@ UniqueConstraint, create_engine, ) +from sqlalchemy.dialects.postgresql import ( + JSONB +) from sqlalchemy.orm import ( declarative_base, relationship, @@ -45,7 +47,7 @@ class File(Base): partition_name = Column( String, ForeignKey("partitions.partition"), nullable=False, index=True ) # Added index - file_metadata = Column(JSON, nullable=True, default={}) + file_metadata = Column(JSONB, nullable=True, default={}) # relationship to the Partition object partition = relationship("Partition", back_populates="files") @@ -55,6 +57,8 @@ class File(Base): UniqueConstraint("file_id", "partition_name", name="uix_file_id_partition"), # Additional composite index for common query patterns (partition first for better selectivity) Index("ix_partition_file", "partition_name", "file_id"), + # Metadata GIN index + Index("ix_files_metadata_gin", "file_metadata", postgresql_using="gin") ) def to_dict(self):