From 2be1f6513d709b6cd7b1b5f1cb1d0116e4445f28 Mon Sep 17 00:00:00 2001 From: Rutvik Date: Fri, 5 Sep 2025 16:51:37 +0530 Subject: [PATCH 1/2] Update Qdrant: set field_schema to KEYWORD & add encoding="utf-8" for JSON --- wren-ai-service/eval/preparation.py | 4 ++-- wren-ai-service/src/pipelines/generation/chart_adjustment.py | 2 +- wren-ai-service/src/pipelines/generation/chart_generation.py | 2 +- wren-ai-service/src/providers/document_store/qdrant.py | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/wren-ai-service/eval/preparation.py b/wren-ai-service/eval/preparation.py index a139a2a30a..a5ef69d608 100644 --- a/wren-ai-service/eval/preparation.py +++ b/wren-ai-service/eval/preparation.py @@ -89,7 +89,7 @@ def get_database_names(path: Path): def get_tables_by_db(path: Path, key: str): - with open(path, "rb") as f: + with open(path, "rb",encoding="utf-8") as f: json_data = orjson.loads(f.read()) return {item[key]: item for item in json_data} @@ -210,7 +210,7 @@ def build_mdl_relationships(tables_info): def get_ground_truths_by_db(path: Path, key: str): - with open(path, "rb") as f: + with open(path, "rb",encoding="utf-8") as f: json_data = orjson.loads(f.read()) results = defaultdict(list) diff --git a/wren-ai-service/src/pipelines/generation/chart_adjustment.py b/wren-ai-service/src/pipelines/generation/chart_adjustment.py index 2e4e7ba8a3..ef6ce61701 100644 --- a/wren-ai-service/src/pipelines/generation/chart_adjustment.py +++ b/wren-ai-service/src/pipelines/generation/chart_adjustment.py @@ -165,7 +165,7 @@ def __init__( "post_processor": ChartGenerationPostProcessor(), } - with open("src/pipelines/generation/utils/vega-lite-schema-v5.json", "r") as f: + with open("src/pipelines/generation/utils/vega-lite-schema-v5.json", "r",encoding="utf-8") as f: _vega_schema = orjson.loads(f.read()) self._configs = { diff --git a/wren-ai-service/src/pipelines/generation/chart_generation.py b/wren-ai-service/src/pipelines/generation/chart_generation.py index 6daca5ec17..f744ad39d9 100644 --- a/wren-ai-service/src/pipelines/generation/chart_generation.py +++ b/wren-ai-service/src/pipelines/generation/chart_generation.py @@ -138,7 +138,7 @@ def __init__( "post_processor": ChartGenerationPostProcessor(), } - with open("src/pipelines/generation/utils/vega-lite-schema-v5.json", "r") as f: + with open("src/pipelines/generation/utils/vega-lite-schema-v5.json", "r",encoding="utf-8") as f: _vega_schema = orjson.loads(f.read()) self._configs = { diff --git a/wren-ai-service/src/providers/document_store/qdrant.py b/wren-ai-service/src/providers/document_store/qdrant.py index b90961c456..9eeb45e8d1 100644 --- a/wren-ai-service/src/providers/document_store/qdrant.py +++ b/wren-ai-service/src/providers/document_store/qdrant.py @@ -1,7 +1,7 @@ import logging import os from typing import Any, Dict, List, Optional - +from qdrant_client.http import models import numpy as np import qdrant_client from haystack import Document, component @@ -159,7 +159,8 @@ def __init__( # to improve the indexing performance # see https://qdrant.tech/documentation/guides/multiple-partitions/?q=mul#calibrate-performance self.client.create_payload_index( - collection_name=index, field_name="project_id", field_schema="keyword" + collection_name=index, field_name="project_id", + field_schema=models.PayloadSchemaType.KEYWORD, ) async def _query_by_embedding( From c10fed52bb570ef12f1acd03f1a7f841ccb37804 Mon Sep 17 00:00:00 2001 From: Rutvik Date: Fri, 5 Sep 2025 17:29:21 +0530 Subject: [PATCH 2/2] Revert file opening changes for orjson compatibility Remove the encoding="utf-8" parameter from the open() calls at lines 92-93 and 213-214. Binary mode (i.e. using "rb") is mandatory for orjson.loads to function properly and to prevent a TypeError. An alternative would be to open the file in text mode if a string is used. --- wren-ai-service/eval/preparation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wren-ai-service/eval/preparation.py b/wren-ai-service/eval/preparation.py index a5ef69d608..ceb6f0433a 100644 --- a/wren-ai-service/eval/preparation.py +++ b/wren-ai-service/eval/preparation.py @@ -89,7 +89,7 @@ def get_database_names(path: Path): def get_tables_by_db(path: Path, key: str): - with open(path, "rb",encoding="utf-8") as f: + with open(path, "rb")as f: json_data = orjson.loads(f.read()) return {item[key]: item for item in json_data} @@ -210,7 +210,7 @@ def build_mdl_relationships(tables_info): def get_ground_truths_by_db(path: Path, key: str): - with open(path, "rb",encoding="utf-8") as f: + with open(path, "rb")as f: json_data = orjson.loads(f.read()) results = defaultdict(list)