diff --git a/pyproject.toml b/pyproject.toml index 39150229d..4ada99e0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ dependencies = [ "chromadb>=0.4.24", "qdrant-client (>=1.15.1,<2.0.0)", "pyseekdb==1.0.0b7", - "langbot-plugin==0.2.5", + "langbot-plugin>=0.2.5", "asyncpg>=0.30.0", "line-bot-sdk>=3.19.0", "tboxsdk>=0.0.10", diff --git a/src/langbot/pkg/api/http/controller/groups/knowledge/base.py b/src/langbot/pkg/api/http/controller/groups/knowledge/base.py index 96ed001c1..8fa8a8c18 100644 --- a/src/langbot/pkg/api/http/controller/groups/knowledge/base.py +++ b/src/langbot/pkg/api/http/controller/groups/knowledge/base.py @@ -39,7 +39,7 @@ async def handle_specific_knowledge_base(knowledge_base_uuid: str) -> quart.Resp elif quart.request.method == 'PUT': json_data = await quart.request.json await self.ap.knowledge_service.update_knowledge_base(knowledge_base_uuid, json_data) - return self.success({}) + return self.success(data={'uuid': knowledge_base_uuid}) elif quart.request.method == 'DELETE': await self.ap.knowledge_service.delete_knowledge_base(knowledge_base_uuid) @@ -90,5 +90,14 @@ async def delete_specific_file_in_kb(file_id: str, knowledge_base_uuid: str) -> async def retrieve_knowledge_base(knowledge_base_uuid: str) -> str: json_data = await quart.request.json query = json_data.get('query') - results = await self.ap.knowledge_service.retrieve_knowledge_base(knowledge_base_uuid, query) + + if not query or not query.strip(): + return self.http_status(400, -1, 'Query is required and cannot be empty') + + # Extract retrieval_settings to allow dynamic control over RAG engine behavior (e.g. top_k, filters) + retrieval_settings = json_data.get('retrieval_settings', {}) + results = await self.ap.knowledge_service.retrieve_knowledge_base( + knowledge_base_uuid, query, retrieval_settings + ) return self.success(data={'results': results}) + diff --git a/src/langbot/pkg/api/http/controller/groups/knowledge/engines.py b/src/langbot/pkg/api/http/controller/groups/knowledge/engines.py new file mode 100644 index 000000000..6a85217b3 --- /dev/null +++ b/src/langbot/pkg/api/http/controller/groups/knowledge/engines.py @@ -0,0 +1,37 @@ +import quart +from urllib.parse import unquote +from ... import group + + +@group.group_class('rag_engines', '/api/v1/knowledge/engines') +class RAGEnginesRouterGroup(group.RouterGroup): + async def initialize(self) -> None: + @self.route('', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def list_rag_engines() -> quart.Response: + """List all available RAG engines from plugins. + + Returns a list of RAG engines with their capabilities and configuration schemas. + This is used by the frontend to render the knowledge base creation wizard. + """ + engines = await self.ap.knowledge_service.list_rag_engines() + return self.success(data={'engines': engines}) + + @self.route('//creation-schema', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def get_engine_creation_schema(plugin_id: str) -> quart.Response: + """Get creation settings schema for a specific RAG engine. + + plugin_id is in 'author/name' format, captured via converter. + """ + plugin_id = unquote(plugin_id) + schema = await self.ap.knowledge_service.get_engine_creation_schema(plugin_id) + return self.success(data={'schema': schema}) + + @self.route('//retrieval-schema', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY) + async def get_engine_retrieval_schema(plugin_id: str) -> quart.Response: + """Get retrieval settings schema for a specific RAG engine. + + plugin_id is in 'author/name' format, captured via converter. + """ + plugin_id = unquote(plugin_id) + schema = await self.ap.knowledge_service.get_engine_retrieval_schema(plugin_id) + return self.success(data={'schema': schema}) diff --git a/src/langbot/pkg/api/http/controller/groups/knowledge/external.py b/src/langbot/pkg/api/http/controller/groups/knowledge/external.py deleted file mode 100644 index 324889e73..000000000 --- a/src/langbot/pkg/api/http/controller/groups/knowledge/external.py +++ /dev/null @@ -1,61 +0,0 @@ -import quart -from ... import group - - -@group.group_class('external_knowledge_base', '/api/v1/knowledge/external-bases') -class ExternalKnowledgeBaseRouterGroup(group.RouterGroup): - async def initialize(self) -> None: - @self.route('/retrievers', methods=['GET']) - async def list_knowledge_retrievers() -> quart.Response: - """List all available knowledge retrievers from plugins.""" - retrievers = await self.ap.plugin_connector.list_knowledge_retrievers() - return self.success(data={'retrievers': retrievers}) - - @self.route('', methods=['POST', 'GET']) - async def handle_external_knowledge_bases() -> quart.Response: - if quart.request.method == 'GET': - external_kbs = await self.ap.external_kb_service.get_external_knowledge_bases() - return self.success(data={'bases': external_kbs}) - - elif quart.request.method == 'POST': - json_data = await quart.request.json - kb_uuid = await self.ap.external_kb_service.create_external_knowledge_base(json_data) - return self.success(data={'uuid': kb_uuid}) - - return self.http_status(405, -1, 'Method not allowed') - - @self.route( - '/', - methods=['GET', 'DELETE', 'PUT'], - ) - async def handle_specific_external_knowledge_base(kb_uuid: str) -> quart.Response: - if quart.request.method == 'GET': - external_kb = await self.ap.external_kb_service.get_external_knowledge_base(kb_uuid) - - if external_kb is None: - return self.http_status(404, -1, 'external knowledge base not found') - - return self.success( - data={ - 'base': external_kb, - } - ) - - elif quart.request.method == 'PUT': - json_data = await quart.request.json - await self.ap.external_kb_service.update_external_knowledge_base(kb_uuid, json_data) - return self.success({}) - - elif quart.request.method == 'DELETE': - await self.ap.external_kb_service.delete_external_knowledge_base(kb_uuid) - return self.success({}) - - @self.route( - '//retrieve', - methods=['POST'], - ) - async def retrieve_external_knowledge_base(kb_uuid: str) -> str: - json_data = await quart.request.json - query = json_data.get('query') - results = await self.ap.external_kb_service.retrieve_external_knowledge_base(kb_uuid, query) - return self.success(data={'results': results}) diff --git a/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py b/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py index 1828fb2b9..4493d2e72 100644 --- a/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py +++ b/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py @@ -68,7 +68,7 @@ async def _(pipeline_uuid: str) -> str: return self.http_status(404, -1, 'pipeline not found') # Only include plugins with pipeline-related components (Command, EventListener, Tool) - # Plugins that only have KnowledgeRetriever components are not suitable for pipeline extensions + # Plugins that only have RAGEngine components are not suitable for pipeline extensions pipeline_component_kinds = ['Command', 'EventListener', 'Tool'] plugins = await self.ap.plugin_connector.list_plugins(component_kinds=pipeline_component_kinds) mcp_servers = await self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True) diff --git a/src/langbot/pkg/api/http/service/external_kb.py b/src/langbot/pkg/api/http/service/external_kb.py deleted file mode 100644 index 4ac5d0fcc..000000000 --- a/src/langbot/pkg/api/http/service/external_kb.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import annotations - -from ....core import app -import sqlalchemy -from langbot.pkg.entity.persistence import rag as persistence_rag -import uuid - - -class ExternalKBService: - """External KB service""" - - ap: app.Application - - def __init__(self, ap: app.Application) -> None: - self.ap = ap - - # External Knowledge Base methods - async def get_external_knowledge_bases(self) -> list[dict]: - result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_rag.ExternalKnowledgeBase)) - external_kbs = result.all() - return [ - self.ap.persistence_mgr.serialize_model(persistence_rag.ExternalKnowledgeBase, external_kb) - for external_kb in external_kbs - ] - - async def get_external_knowledge_base(self, kb_uuid: str) -> dict | None: - result = await self.ap.persistence_mgr.execute_async( - sqlalchemy.select(persistence_rag.ExternalKnowledgeBase).where( - persistence_rag.ExternalKnowledgeBase.uuid == kb_uuid - ) - ) - external_kb = result.first() - if external_kb is None: - return None - return self.ap.persistence_mgr.serialize_model(persistence_rag.ExternalKnowledgeBase, external_kb) - - async def create_external_knowledge_base(self, kb_data: dict) -> str: - kb_data['uuid'] = str(uuid.uuid4()) - await self.ap.persistence_mgr.execute_async( - sqlalchemy.insert(persistence_rag.ExternalKnowledgeBase).values(kb_data) - ) - - kb = await self.get_external_knowledge_base(kb_data['uuid']) - - await self.ap.rag_mgr.load_external_knowledge_base(kb) - - return kb_data['uuid'] - - async def retrieve_external_knowledge_base(self, kb_uuid: str, query: str) -> list[dict]: - """Retrieve external knowledge base""" - runtime_kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid) - if runtime_kb is None: - raise Exception('Knowledge base not found') - return [ - result.model_dump() for result in await runtime_kb.retrieve(query, 5) - ] # top_k is just a placeholder for external knowledge base - - async def update_external_knowledge_base(self, kb_uuid: str, kb_data: dict) -> None: - if 'uuid' in kb_data: - del kb_data['uuid'] - - await self.ap.persistence_mgr.execute_async( - sqlalchemy.update(persistence_rag.ExternalKnowledgeBase) - .values(kb_data) - .where(persistence_rag.ExternalKnowledgeBase.uuid == kb_uuid) - ) - await self.ap.rag_mgr.remove_knowledge_base_from_runtime(kb_uuid) - - kb = await self.get_external_knowledge_base(kb_uuid) - - await self.ap.rag_mgr.load_external_knowledge_base(kb) - - async def delete_external_knowledge_base(self, kb_uuid: str) -> None: - await self.ap.persistence_mgr.execute_async( - sqlalchemy.delete(persistence_rag.ExternalKnowledgeBase).where( - persistence_rag.ExternalKnowledgeBase.uuid == kb_uuid - ) - ) - - await self.ap.rag_mgr.delete_knowledge_base(kb_uuid) diff --git a/src/langbot/pkg/api/http/service/knowledge.py b/src/langbot/pkg/api/http/service/knowledge.py index b753ce5ac..8b55f56eb 100644 --- a/src/langbot/pkg/api/http/service/knowledge.py +++ b/src/langbot/pkg/api/http/service/knowledge.py @@ -17,45 +17,41 @@ def __init__(self, ap: app.Application) -> None: async def get_knowledge_bases(self) -> list[dict]: """获取所有知识库""" - result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_rag.KnowledgeBase)) - knowledge_bases = result.all() - return [ - self.ap.persistence_mgr.serialize_model(persistence_rag.KnowledgeBase, knowledge_base) - for knowledge_base in knowledge_bases - ] + return await self.ap.rag_mgr.get_all_knowledge_base_details() async def get_knowledge_base(self, kb_uuid: str) -> dict | None: """获取知识库""" - result = await self.ap.persistence_mgr.execute_async( - sqlalchemy.select(persistence_rag.KnowledgeBase).where(persistence_rag.KnowledgeBase.uuid == kb_uuid) - ) - knowledge_base = result.first() - if knowledge_base is None: - return None - return self.ap.persistence_mgr.serialize_model(persistence_rag.KnowledgeBase, knowledge_base) + return await self.ap.rag_mgr.get_knowledge_base_details(kb_uuid) async def create_knowledge_base(self, kb_data: dict) -> str: """创建知识库""" - kb_data['uuid'] = str(uuid.uuid4()) - await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_rag.KnowledgeBase).values(kb_data)) - - kb = await self.get_knowledge_base(kb_data['uuid']) - - await self.ap.rag_mgr.load_knowledge_base(kb) - - return kb_data['uuid'] + # In new architecture, we delegate entirely to RAGManager which uses plugins. + # Legacy internal KB creation is removed. + + rag_engine_plugin_id = kb_data.get('rag_engine_plugin_id') + if not rag_engine_plugin_id: + raise ValueError("rag_engine_plugin_id is required") + + kb = await self.ap.rag_mgr.create_knowledge_base( + name=kb_data.get('name', 'Untitled'), + rag_engine_plugin_id=rag_engine_plugin_id, + creation_settings=kb_data.get('creation_settings', {}), + description=kb_data.get('description', ''), + embedding_model_uuid=kb_data.get('embedding_model_uuid', '') + ) + return kb.uuid async def update_knowledge_base(self, kb_uuid: str, kb_data: dict) -> None: """更新知识库""" - if 'uuid' in kb_data: - del kb_data['uuid'] + # Filter to only mutable fields + filtered_data = {k: v for k, v in kb_data.items() if k in persistence_rag.KnowledgeBase.MUTABLE_FIELDS} - if 'embedding_model_uuid' in kb_data: - del kb_data['embedding_model_uuid'] + if not filtered_data: + return await self.ap.persistence_mgr.execute_async( sqlalchemy.update(persistence_rag.KnowledgeBase) - .values(kb_data) + .values(filtered_data) .where(persistence_rag.KnowledgeBase.uuid == kb_uuid) ) await self.ap.rag_mgr.remove_knowledge_base_from_runtime(kb_uuid) @@ -64,16 +60,31 @@ async def update_knowledge_base(self, kb_uuid: str, kb_data: dict) -> None: await self.ap.rag_mgr.load_knowledge_base(kb) + async def _check_doc_capability(self, kb_uuid: str, operation: str) -> None: + """Check if the KB's RAG engine supports document operations. + + Args: + kb_uuid: Knowledge base UUID. + operation: Human-readable operation name for error messages. + + Raises: + Exception: If the KB does not support doc_ingestion. + """ + kb_info = await self.ap.rag_mgr.get_knowledge_base_details(kb_uuid) + if not kb_info: + raise Exception('Knowledge base not found') + capabilities = kb_info.get('rag_engine', {}).get('capabilities', []) + if 'doc_ingestion' not in capabilities: + raise Exception(f'This knowledge base does not support {operation}') + async def store_file(self, kb_uuid: str, file_id: str) -> int: """存储文件""" - # await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_rag.File).values(kb_id=kb_uuid, file_id=file_id)) - # await self.ap.rag_mgr.store_file(file_id) runtime_kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid) if runtime_kb is None: raise Exception('Knowledge base not found') - # Only internal KBs support file storage - if runtime_kb.get_type() != 'internal': - raise Exception('Only internal knowledge bases support file storage') + + await self._check_doc_capability(kb_uuid, 'document upload') + result = await runtime_kb.store_file(file_id) # Update the KB's updated_at timestamp @@ -85,14 +96,23 @@ async def store_file(self, kb_uuid: str, file_id: str) -> int: return result - async def retrieve_knowledge_base(self, kb_uuid: str, query: str) -> list[dict]: + async def retrieve_knowledge_base( + self, kb_uuid: str, query: str, retrieval_settings: dict | None = None + ) -> list[dict]: """检索知识库""" runtime_kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid) if runtime_kb is None: raise Exception('Knowledge base not found') - return [ - result.model_dump() for result in await runtime_kb.retrieve(query, runtime_kb.knowledge_base_entity.top_k) - ] + + # Pass retrieval_settings + results = await runtime_kb.retrieve( + query, + runtime_kb.knowledge_base_entity.top_k, + settings=retrieval_settings + ) + + return [result.model_dump() for result in results] + async def get_files_by_knowledge_base(self, kb_uuid: str) -> list[dict]: """获取知识库文件""" @@ -107,9 +127,9 @@ async def delete_file(self, kb_uuid: str, file_id: str) -> None: runtime_kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid) if runtime_kb is None: raise Exception('Knowledge base not found') - # Only internal KBs support file deletion - if runtime_kb.get_type() != 'internal': - raise Exception('Only internal knowledge bases support file deletion') + + await self._check_doc_capability(kb_uuid, 'document deletion') + await runtime_kb.delete_file(file_id) # Update the KB's updated_at timestamp @@ -140,3 +160,37 @@ async def delete_knowledge_base(self, kb_uuid: str) -> None: await self.ap.persistence_mgr.execute_async( sqlalchemy.delete(persistence_rag.File).where(persistence_rag.File.uuid == file.uuid) ) + + # ================= RAG Engine Discovery ================= + + async def list_rag_engines(self) -> list[dict]: + """List all available RAG engines from plugins.""" + engines = [] + + if not self.ap.plugin_connector.is_enable_plugin: + return engines + + # Get RAGEngine plugins + try: + rag_engines = await self.ap.plugin_connector.list_rag_engines() + engines.extend(rag_engines) + except Exception as e: + self.ap.logger.warning(f"Failed to list RAG engines from plugins: {e}") + + return engines + + async def get_engine_creation_schema(self, plugin_id: str) -> dict: + """Get creation settings schema for a specific RAG engine.""" + try: + return await self.ap.plugin_connector.get_rag_creation_schema(plugin_id) + except Exception as e: + self.ap.logger.warning(f"Failed to get creation schema for {plugin_id}: {e}") + return {} + + async def get_engine_retrieval_schema(self, plugin_id: str) -> dict: + """Get retrieval settings schema for a specific RAG engine.""" + try: + return await self.ap.plugin_connector.get_rag_retrieval_schema(plugin_id) + except Exception as e: + self.ap.logger.warning(f"Failed to get retrieval schema for {plugin_id}: {e}") + return {} diff --git a/src/langbot/pkg/core/app.py b/src/langbot/pkg/core/app.py index 62f8e7aee..f181623b8 100644 --- a/src/langbot/pkg/core/app.py +++ b/src/langbot/pkg/core/app.py @@ -28,7 +28,6 @@ from ..api.http.service import mcp as mcp_service from ..api.http.service import apikey as apikey_service from ..api.http.service import webhook as webhook_service -from ..api.http.service import external_kb as external_kb_service from ..api.http.service import monitoring as monitoring_service from ..discover import engine as discover_engine from ..storage import mgr as storagemgr @@ -36,6 +35,7 @@ from . import taskmgr from . import entities as core_entities from ..rag.knowledge import kbmgr as rag_mgr +from ..rag.service import RAGRuntimeService from ..vector import mgr as vectordb_mgr from ..telemetry import telemetry as telemetry_module @@ -61,6 +61,7 @@ class Application: model_mgr: llm_model_mgr.ModelManager = None rag_mgr: rag_mgr.RAGManager = None + rag_runtime_service: RAGRuntimeService = None # TODO move to pipeline tool_mgr: llm_tool_mgr.ToolManager = None @@ -134,8 +135,6 @@ class Application: knowledge_service: knowledge_service.KnowledgeService = None - external_kb_service: external_kb_service.ExternalKBService = None - mcp_service: mcp_service.MCPService = None apikey_service: apikey_service.ApiKeyService = None diff --git a/src/langbot/pkg/core/stages/build_app.py b/src/langbot/pkg/core/stages/build_app.py index 6f84f299e..d733bf4ac 100644 --- a/src/langbot/pkg/core/stages/build_app.py +++ b/src/langbot/pkg/core/stages/build_app.py @@ -11,6 +11,7 @@ from ...provider.modelmgr import modelmgr as llm_model_mgr from ...provider.tools import toolmgr as llm_tool_mgr from ...rag.knowledge import kbmgr as rag_mgr +from ...rag.service import RAGRuntimeService from ...platform import botmgr as im_mgr from ...platform.webhook_pusher import WebhookPusher from ...persistence import mgr as persistencemgr @@ -25,7 +26,6 @@ from ...api.http.service import mcp as mcp_service from ...api.http.service import apikey as apikey_service from ...api.http.service import webhook as webhook_service -from ...api.http.service import external_kb as external_kb_service from ...api.http.service import monitoring as monitoring_service from ...discover import engine as discover_engine from ...storage import mgr as storagemgr @@ -71,9 +71,6 @@ async def run(self, ap: app.Application): knowledge_service_inst = knowledge_service.KnowledgeService(ap) ap.knowledge_service = knowledge_service_inst - external_kb_service_inst = external_kb_service.ExternalKBService(ap) - ap.external_kb_service = external_kb_service_inst - mcp_service_inst = mcp_service.MCPService(ap) ap.mcp_service = mcp_service_inst @@ -141,6 +138,9 @@ async def run(self, ap: app.Application): await rag_mgr_inst.initialize() ap.rag_mgr = rag_mgr_inst + # Initialize RAG Runtime Service for plugins + ap.rag_runtime_service = RAGRuntimeService(ap) + # 初始化向量数据库管理器 vectordb_mgr_inst = vectordb_mgr.VectorDBManager(ap) await vectordb_mgr_inst.initialize() diff --git a/src/langbot/pkg/entity/persistence/rag.py b/src/langbot/pkg/entity/persistence/rag.py index 5abd6c1a5..75fde91d8 100644 --- a/src/langbot/pkg/entity/persistence/rag.py +++ b/src/langbot/pkg/entity/persistence/rag.py @@ -13,6 +13,21 @@ class KnowledgeBase(Base): embedding_model_uuid = sqlalchemy.Column(sqlalchemy.String, default='') top_k = sqlalchemy.Column(sqlalchemy.Integer, default=5) + # New fields for plugin-based RAG + rag_engine_plugin_id = sqlalchemy.Column(sqlalchemy.String, nullable=True) + collection_id = sqlalchemy.Column(sqlalchemy.String, nullable=True) + creation_settings = sqlalchemy.Column(sqlalchemy.JSON, nullable=True, default=None) + + # Field sets for different operations + MUTABLE_FIELDS = {'name', 'description', 'top_k', 'creation_settings', 'embedding_model_uuid'} + """Fields that can be updated after creation.""" + + CREATE_FIELDS = MUTABLE_FIELDS | {'uuid', 'rag_engine_plugin_id', 'collection_id'} + """Fields used when creating a new knowledge base.""" + + ALL_DB_FIELDS = CREATE_FIELDS | {'emoji', 'created_at', 'updated_at'} + """All fields stored in database (for loading from DB row).""" + class File(Base): __tablename__ = 'knowledge_base_files' @@ -29,16 +44,3 @@ class Chunk(Base): uuid = sqlalchemy.Column(sqlalchemy.String(255), primary_key=True, unique=True) file_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True) text = sqlalchemy.Column(sqlalchemy.Text) - - -class ExternalKnowledgeBase(Base): - __tablename__ = 'external_knowledge_bases' - uuid = sqlalchemy.Column(sqlalchemy.String(255), primary_key=True, unique=True) - name = sqlalchemy.Column(sqlalchemy.String, index=True) - description = sqlalchemy.Column(sqlalchemy.Text) - emoji = sqlalchemy.Column(sqlalchemy.String(10), nullable=True, default='🔗') - plugin_author = sqlalchemy.Column(sqlalchemy.String, nullable=False) - plugin_name = sqlalchemy.Column(sqlalchemy.String, nullable=False) - retriever_name = sqlalchemy.Column(sqlalchemy.String, nullable=False) - retriever_config = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={}) - created_at = sqlalchemy.Column(sqlalchemy.DateTime, default=sqlalchemy.func.now()) diff --git a/src/langbot/pkg/persistence/migrations/dbm019_rag_engine_plugin_architecture.py b/src/langbot/pkg/persistence/migrations/dbm019_rag_engine_plugin_architecture.py new file mode 100644 index 000000000..9a26003e3 --- /dev/null +++ b/src/langbot/pkg/persistence/migrations/dbm019_rag_engine_plugin_architecture.py @@ -0,0 +1,93 @@ +import sqlalchemy +from .. import migration + + +@migration.migration_class(19) +class DBMigrateRAGEnginePluginArchitecture(migration.DBMigration): + """Migrate to unified RAG Engine plugin architecture. + + Changes: + - Add rag_engine_plugin_id, collection_id, creation_settings columns to knowledge_bases + - Drop external_knowledge_bases table (no longer needed; external KB data is not migrated) + """ + + async def upgrade(self): + """Upgrade""" + await self._add_columns_to_knowledge_bases() + await self._drop_external_knowledge_bases_table() + + async def _get_table_columns(self, table_name: str) -> list[str]: + """Get column names from a table (works for both SQLite and PostgreSQL).""" + if self.ap.persistence_mgr.db.name == 'postgresql': + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + f"SELECT column_name FROM information_schema.columns WHERE table_name = '{table_name}';" + ) + ) + return [row[0] for row in result.fetchall()] + else: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text(f'PRAGMA table_info({table_name});') + ) + return [row[1] for row in result.fetchall()] + + async def _table_exists(self, table_name: str) -> bool: + """Check if a table exists.""" + if self.ap.persistence_mgr.db.name == 'postgresql': + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + f"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '{table_name}');" + ) + ) + return result.scalar() + else: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';" + ) + ) + return result.first() is not None + + async def _add_columns_to_knowledge_bases(self): + """Add new RAG plugin architecture columns to knowledge_bases table.""" + columns = await self._get_table_columns('knowledge_bases') + + new_columns = { + 'rag_engine_plugin_id': 'VARCHAR', + 'collection_id': 'VARCHAR', + 'creation_settings': 'TEXT', # JSON stored as TEXT for SQLite compatibility + } + + for col_name, col_type in new_columns.items(): + if col_name not in columns: + if self.ap.persistence_mgr.db.name == 'postgresql': + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + f"ALTER TABLE knowledge_bases ADD COLUMN {col_name} {col_type};" + ) + ) + else: + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + f"ALTER TABLE knowledge_bases ADD COLUMN {col_name} {col_type};" + ) + ) + + # For existing knowledge bases without rag_engine_plugin_id, + # set collection_id = uuid (same default as new KBs) + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + "UPDATE knowledge_bases SET collection_id = uuid WHERE collection_id IS NULL;" + ) + ) + + async def _drop_external_knowledge_bases_table(self): + """Drop the external_knowledge_bases table if it exists.""" + if await self._table_exists('external_knowledge_bases'): + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text("DROP TABLE external_knowledge_bases;") + ) + + async def downgrade(self): + """Downgrade""" + pass diff --git a/src/langbot/pkg/plugin/connector.py b/src/langbot/pkg/plugin/connector.py index 1ae543750..5e3bf60a5 100644 --- a/src/langbot/pkg/plugin/connector.py +++ b/src/langbot/pkg/plugin/connector.py @@ -463,15 +463,6 @@ async def execute_command( yield cmd_ret - # KnowledgeRetriever methods - async def list_knowledge_retrievers(self, bound_plugins: list[str] | None = None) -> list[dict[str, Any]]: - """List all available KnowledgeRetriever components.""" - if not self.is_enable_plugin: - return [] - - retrievers_data = await self.handler.list_knowledge_retrievers(include_plugins=bound_plugins) - return retrievers_data - async def retrieve_knowledge( self, plugin_author: str, @@ -479,8 +470,8 @@ async def retrieve_knowledge( retriever_name: str, instance_id: str, retrieval_context: dict[str, Any], - ) -> list[dict[str, Any]]: - """Retrieve knowledge using a KnowledgeRetriever instance.""" + ) -> dict[str, Any]: + """Retrieve knowledge using a RAGEngine instance.""" if not self.is_enable_plugin: return [] @@ -503,38 +494,74 @@ def dispose(self): async def sync_polymorphic_component_instances(self) -> dict[str, Any]: """Sync polymorphic component instances with runtime. - This collects all external knowledge bases from database and sends to runtime - to ensure instance integrity across restarts. + Currently no polymorphic components need syncing. + This method is kept for future extensibility. """ if not self.is_enable_plugin: return {} - # ===== external knowledge bases ===== - - external_kbs = await self.ap.external_kb_service.get_external_knowledge_bases() - - # Build required_instances list - required_instances = [] - for kb in external_kbs: - required_instances.append( - { - 'instance_id': kb['uuid'], - 'plugin_author': kb['plugin_author'], - 'plugin_name': kb['plugin_name'], - 'component_kind': 'KnowledgeRetriever', - 'component_name': kb['retriever_name'], - 'config': kb['retriever_config'], - } + return await self.handler.sync_polymorphic_component_instances([]) + + # ================= RAG Capability Callers ================= + + @staticmethod + def _parse_plugin_id(plugin_id: str) -> tuple[str, str]: + """Parse a plugin ID string into (author, name). + + Args: + plugin_id: Plugin ID in 'author/name' format. + + Returns: + Tuple of (plugin_author, plugin_name). + + Raises: + ValueError: If plugin_id is not in the expected 'author/name' format. + """ + if '/' not in plugin_id: + raise ValueError( + f"Invalid plugin_id format: '{plugin_id}'. " + "Expected 'author/name' format (e.g. 'langbot/rag-engine')." ) + return plugin_id.split('/', 1) - self.ap.logger.info(f'Syncing {len(required_instances)} polymorphic component instances to runtime') + async def call_rag_ingest(self, plugin_id: str, context_data: dict[str, Any]) -> dict[str, Any]: + """Call plugin to ingest document. - # Send to runtime - sync_result = await self.handler.sync_polymorphic_component_instances(required_instances) + Args: + plugin_id: Target plugin ID (author/name). + context_data: IngestionContext data. + """ + plugin_author, plugin_name = self._parse_plugin_id(plugin_id) + return await self.handler.rag_ingest_document(plugin_author, plugin_name, context_data) - self.ap.logger.info( - f'Sync complete: {len(sync_result.get("success_instances", []))} succeeded, ' - f'{len(sync_result.get("failed_instances", []))} failed' - ) + async def call_rag_delete_document(self, plugin_id: str, document_id: str, kb_id: str) -> bool: + plugin_author, plugin_name = self._parse_plugin_id(plugin_id) + return await self.handler.rag_delete_document(plugin_author, plugin_name, document_id, kb_id) + + async def get_rag_creation_schema(self, plugin_id: str) -> dict[str, Any]: + plugin_author, plugin_name = self._parse_plugin_id(plugin_id) + return await self.handler.get_rag_creation_schema(plugin_author, plugin_name) + + async def get_rag_retrieval_schema(self, plugin_id: str) -> dict[str, Any]: + plugin_author, plugin_name = self._parse_plugin_id(plugin_id) + return await self.handler.get_rag_retrieval_schema(plugin_author, plugin_name) + + async def rag_on_kb_create(self, plugin_id: str, kb_id: str, config: dict[str, Any]) -> dict[str, Any]: + """Notify plugin about KB creation.""" + plugin_author, plugin_name = self._parse_plugin_id(plugin_id) + return await self.handler.rag_on_kb_create(plugin_author, plugin_name, kb_id, config) + + async def rag_on_kb_delete(self, plugin_id: str, kb_id: str) -> dict[str, Any]: + """Notify plugin about KB deletion.""" + plugin_author, plugin_name = self._parse_plugin_id(plugin_id) + return await self.handler.rag_on_kb_delete(plugin_author, plugin_name, kb_id) + + async def list_rag_engines(self) -> list[dict[str, Any]]: + """List all available RAG engines from plugins. + + Returns a list of RAG engines with their capabilities and configuration schemas. + """ + if not self.is_enable_plugin: + return [] - return sync_result + return await self.handler.list_rag_engines() diff --git a/src/langbot/pkg/plugin/handler.py b/src/langbot/pkg/plugin/handler.py index 39f50c32b..d33342abc 100644 --- a/src/langbot/pkg/plugin/handler.py +++ b/src/langbot/pkg/plugin/handler.py @@ -26,6 +26,20 @@ from ..utils import constants +def _make_rag_error_response(error: Exception, error_type: str, **extra_context) -> handler.ActionResponse: + """Create a clean error response for RAG operations. + + Args: + error: The caught exception. + error_type: A category string like 'EmbeddingError', 'VectorStoreError'. + **extra_context: Additional context fields for the error message. + """ + context_parts = [f"{k}={v}" for k, v in extra_context.items()] + context_str = f" [{', '.join(context_parts)}]" if context_parts else "" + message = f"[{error_type}/{type(error).__name__}]{context_str} {str(error)}" + return handler.ActionResponse.error(message=message) + + class RuntimeConnectionHandler(handler.Handler): """Runtime connection handler""" @@ -438,7 +452,7 @@ async def get_binary_storage_keys(data: dict[str, Any]) -> handler.ActionRespons }, ) - @self.action(RuntimeToLangBotAction.GET_CONFIG_FILE) + @self.action(PluginToRuntimeAction.GET_CONFIG_FILE) async def get_config_file(data: dict[str, Any]) -> handler.ActionResponse: """Get a config file by file key""" file_key = data['file_key'] @@ -457,6 +471,83 @@ async def get_config_file(data: dict[str, Any]) -> handler.ActionResponse: message=f'Failed to load config file {file_key}: {e}', ) + # ================= RAG Capability Handlers ================= + + @self.action(PluginToRuntimeAction.RAG_EMBED_DOCUMENTS) + async def rag_embed_documents(data: dict[str, Any]) -> handler.ActionResponse: + kb_id = data['kb_id'] + texts = data['texts'] + try: + vectors = await self.ap.rag_runtime_service.embed_documents(kb_id, texts) + return handler.ActionResponse.success(data={'vectors': vectors}) + except Exception as e: + return _make_rag_error_response(e, 'EmbeddingError', kb_id=kb_id) + + @self.action(PluginToRuntimeAction.RAG_EMBED_QUERY) + async def rag_embed_query(data: dict[str, Any]) -> handler.ActionResponse: + kb_id = data['kb_id'] + text = data['text'] + try: + vector = await self.ap.rag_runtime_service.embed_query(kb_id, text) + return handler.ActionResponse.success(data={'vector': vector}) + except Exception as e: + return _make_rag_error_response(e, 'EmbeddingError', kb_id=kb_id) + + @self.action(PluginToRuntimeAction.RAG_VECTOR_UPSERT) + async def rag_vector_upsert(data: dict[str, Any]) -> handler.ActionResponse: + collection_id = data['collection_id'] + vectors = data['vectors'] + ids = data['ids'] + metadata = data.get('metadata') + try: + await self.ap.rag_runtime_service.vector_upsert(collection_id, vectors, ids, metadata) + return handler.ActionResponse.success(data={}) + except Exception as e: + return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id) + + @self.action(PluginToRuntimeAction.RAG_VECTOR_SEARCH) + async def rag_vector_search(data: dict[str, Any]) -> handler.ActionResponse: + collection_id = data['collection_id'] + query_vector = data['query_vector'] + top_k = data['top_k'] + filters = data.get('filters') + try: + results = await self.ap.rag_runtime_service.vector_search(collection_id, query_vector, top_k, filters) + return handler.ActionResponse.success(data={'results': results}) + except Exception as e: + return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id) + + @self.action(PluginToRuntimeAction.RAG_VECTOR_DELETE) + async def rag_vector_delete(data: dict[str, Any]) -> handler.ActionResponse: + collection_id = data['collection_id'] + # Support both 'file_ids' (preferred) and 'ids' (legacy) for backward compatibility + file_ids = data.get('file_ids') or data.get('ids') + filters = data.get('filters') + try: + count = await self.ap.rag_runtime_service.vector_delete(collection_id, file_ids, filters) + return handler.ActionResponse.success(data={'count': count}) + except Exception as e: + return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id) + + @self.action(PluginToRuntimeAction.RAG_GET_FILE_STREAM) + async def rag_get_file_stream(data: dict[str, Any]) -> handler.ActionResponse: + storage_path = data['storage_path'] + try: + content_bytes = await self.ap.rag_runtime_service.get_file_stream(storage_path) + content_base64 = base64.b64encode(content_bytes).decode('utf-8') + return handler.ActionResponse.success(data={'content_base64': content_base64}) + except Exception as e: + return _make_rag_error_response(e, 'FileServiceError', storage_path=storage_path) + + @self.action(CommonAction.PING) + async def ping(data: dict[str, Any]) -> handler.ActionResponse: + """Ping""" + return handler.ActionResponse.success( + data={ + "pong": "pong", + }, + ) + async def ping(self) -> dict[str, Any]: """Ping the runtime""" return await self.call_action( @@ -716,18 +807,6 @@ async def execute_command( async for ret in gen: yield ret - # KnowledgeRetriever methods - async def list_knowledge_retrievers(self, include_plugins: list[str] | None = None) -> list[dict[str, Any]]: - """List knowledge retrievers""" - result = await self.call_action( - LangBotToRuntimeAction.LIST_KNOWLEDGE_RETRIEVERS, - { - 'include_plugins': include_plugins, - }, - timeout=10, - ) - return result['retrievers'] - async def retrieve_knowledge( self, plugin_author: str, @@ -735,7 +814,7 @@ async def retrieve_knowledge( retriever_name: str, instance_id: str, retrieval_context: dict[str, Any], - ) -> list[dict[str, Any]]: + ) -> dict[str, Any]: """Retrieve knowledge""" result = await self.call_action( LangBotToRuntimeAction.RETRIEVE_KNOWLEDGE, @@ -748,7 +827,7 @@ async def retrieve_knowledge( }, timeout=30, ) - return result['retrieval_results'] + return result async def sync_polymorphic_component_instances(self, required_instances: list[dict[str, Any]]) -> dict[str, Any]: """Sync polymorphic component instances with runtime""" @@ -769,3 +848,87 @@ async def get_debug_info(self) -> dict[str, Any]: timeout=10, ) return result + + # ================= RAG Capability Callers (LangBot -> Runtime) ================= + + async def rag_ingest_document(self, plugin_author: str, plugin_name: str, context_data: dict[str, Any]) -> dict[str, Any]: + """Send INGEST_DOCUMENT action to runtime.""" + result = await self.call_action( + LangBotToRuntimeAction.RAG_INGEST_DOCUMENT, + { + "plugin_author": plugin_author, + "plugin_name": plugin_name, + "context": context_data + }, + timeout=300 # Ingestion can be slow + ) + return result + + async def rag_delete_document(self, plugin_author: str, plugin_name: str, document_id: str, kb_id: str) -> bool: + result = await self.call_action( + LangBotToRuntimeAction.RAG_DELETE_DOCUMENT, + { + "plugin_author": plugin_author, + "plugin_name": plugin_name, + "document_id": document_id, + "kb_id": kb_id + }, + timeout=30 + ) + return result.get("success", False) + + async def rag_on_kb_create(self, plugin_author: str, plugin_name: str, kb_id: str, config: dict[str, Any]) -> dict[str, Any]: + """Notify plugin about KB creation.""" + result = await self.call_action( + LangBotToRuntimeAction.RAG_ON_KB_CREATE, + { + "plugin_author": plugin_author, + "plugin_name": plugin_name, + "kb_id": kb_id, + "config": config + }, + timeout=30 + ) + return result + + async def rag_on_kb_delete(self, plugin_author: str, plugin_name: str, kb_id: str) -> dict[str, Any]: + """Notify plugin about KB deletion.""" + result = await self.call_action( + LangBotToRuntimeAction.RAG_ON_KB_DELETE, + { + "plugin_author": plugin_author, + "plugin_name": plugin_name, + "kb_id": kb_id + }, + timeout=30 + ) + return result + + async def get_rag_creation_schema(self, plugin_author: str, plugin_name: str) -> dict[str, Any]: + return await self.call_action( + LangBotToRuntimeAction.GET_RAG_CREATION_SETTINGS_SCHEMA, + { + "plugin_author": plugin_author, + "plugin_name": plugin_name + }, + timeout=10 + ) + + async def get_rag_retrieval_schema(self, plugin_author: str, plugin_name: str) -> dict[str, Any]: + return await self.call_action( + LangBotToRuntimeAction.GET_RAG_RETRIEVAL_SETTINGS_SCHEMA, + { + "plugin_author": plugin_author, + "plugin_name": plugin_name + }, + timeout=10 + ) + + async def list_rag_engines(self) -> list[dict[str, Any]]: + """List all available RAG engines from plugins.""" + result = await self.call_action( + LangBotToRuntimeAction.LIST_RAG_ENGINES, + {}, + timeout=60 + ) + return result.get("engines", []) diff --git a/src/langbot/pkg/provider/modelmgr/requester.py b/src/langbot/pkg/provider/modelmgr/requester.py index c281d8aec..3a55cef4b 100644 --- a/src/langbot/pkg/provider/modelmgr/requester.py +++ b/src/langbot/pkg/provider/modelmgr/requester.py @@ -283,6 +283,15 @@ def __init__( self.model_entity = model_entity self.provider = provider + async def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a list of documents""" + return await self.provider.invoke_embedding(self, texts) + + async def embed_query(self, text: str) -> list[float]: + """Embed a query string""" + vectors = await self.provider.invoke_embedding(self, [text]) + return vectors[0] if vectors else [] + class ProviderAPIRequester(metaclass=abc.ABCMeta): """Provider API请求器""" diff --git a/src/langbot/pkg/provider/runners/localagent.py b/src/langbot/pkg/provider/runners/localagent.py index dbda66229..a0b111e73 100644 --- a/src/langbot/pkg/provider/runners/localagent.py +++ b/src/langbot/pkg/provider/runners/localagent.py @@ -74,13 +74,8 @@ async def run( self.ap.logger.warning(f'Knowledge base {kb_uuid} not found, skipping') continue - # Get top_k based on KB type - if kb.get_type() == 'internal': - top_k = kb.knowledge_base_entity.top_k - elif kb.get_type() == 'external': - top_k = 5 # external kb's top_k is managed by plugin config - else: - top_k = 5 # default fallback + # Get top_k from KB entity, default to 5 if not set + top_k = kb.knowledge_base_entity.top_k or 5 result = await kb.retrieve(user_message_text, top_k) diff --git a/src/langbot/pkg/rag/knowledge/base.py b/src/langbot/pkg/rag/knowledge/base.py index 4b183eae2..749404066 100644 --- a/src/langbot/pkg/rag/knowledge/base.py +++ b/src/langbot/pkg/rag/knowledge/base.py @@ -22,18 +22,20 @@ async def initialize(self): pass @abc.abstractmethod - async def retrieve(self, query: str, top_k: int) -> list[rag_context.RetrievalResultEntry]: + async def retrieve(self, query: str, top_k: int, settings: dict | None = None) -> list[rag_context.RetrievalResultEntry]: """Retrieve relevant documents from the knowledge base Args: query: The query string top_k: Number of top results to return + settings: Optional retrieval settings/configuration Returns: List of retrieve result entries """ pass + @abc.abstractmethod def get_uuid(self) -> str: """Get the UUID of the knowledge base""" @@ -45,8 +47,8 @@ def get_name(self) -> str: pass @abc.abstractmethod - def get_type(self) -> str: - """Get the type of knowledge base (internal/external)""" + def get_rag_engine_plugin_id(self) -> str: + """Get the RAG engine plugin ID""" pass @abc.abstractmethod diff --git a/src/langbot/pkg/rag/knowledge/external.py b/src/langbot/pkg/rag/knowledge/external.py deleted file mode 100644 index f1a5fed3b..000000000 --- a/src/langbot/pkg/rag/knowledge/external.py +++ /dev/null @@ -1,85 +0,0 @@ -"""External knowledge base implementation""" - -from __future__ import annotations - -from langbot.pkg.core import app -from langbot.pkg.entity.persistence import rag as persistence_rag -from langbot_plugin.api.entities.builtin.rag import context as rag_context -from .base import KnowledgeBaseInterface - - -class ExternalKnowledgeBase(KnowledgeBaseInterface): - """External knowledge base that queries via HTTP API or plugin retriever""" - - external_kb_entity: persistence_rag.ExternalKnowledgeBase - - # Plugin retriever instance ID - retriever_instance_id: str | None - - def __init__(self, ap: app.Application, external_kb_entity: persistence_rag.ExternalKnowledgeBase): - super().__init__(ap) - self.external_kb_entity = external_kb_entity - self.retriever_instance_id = None - - async def initialize(self): - """Initialize the external knowledge base""" - # Use KB UUID as instance ID - # Instance creation is now handled by the unified sync mechanism - # when LangBot connects to runtime - self.retriever_instance_id = self.external_kb_entity.uuid - - self.ap.logger.info( - f'Initialized external KB {self.external_kb_entity.uuid}, instance will be created by sync mechanism' - ) - - async def retrieve(self, query: str, top_k: int = 5) -> list[rag_context.RetrievalResultEntry]: - """Retrieve documents from external knowledge base via plugin retriever""" - if not self.retriever_instance_id: - self.ap.logger.error(f'No retriever instance for KB {self.external_kb_entity.uuid}') - return [] - - try: - results = await self.ap.plugin_connector.retrieve_knowledge( - self.external_kb_entity.plugin_author, - self.external_kb_entity.plugin_name, - self.external_kb_entity.retriever_name, - self.retriever_instance_id, - {'query': query}, - ) - - # Convert plugin results to RetrievalResultEntry - retrieval_entries = [] - for result in results: - retrieval_entries.append(rag_context.RetrievalResultEntry(**result)) - - return retrieval_entries - except Exception as e: - self.ap.logger.error(f'Plugin retriever error: {e}') - import traceback - - traceback.print_exc() - return [] - - def get_uuid(self) -> str: - """Get the UUID of the external knowledge base""" - return self.external_kb_entity.uuid - - def get_name(self) -> str: - """Get the name of the external knowledge base""" - return self.external_kb_entity.name - - def get_type(self) -> str: - """Get the type of knowledge base""" - return 'external' - - async def dispose(self): - """Clean up resources""" - # Trigger sync to immediately delete the instance from plugin process - # This ensures instance is cleaned up without waiting for next LangBot restart - try: - await self.ap.plugin_connector.sync_polymorphic_component_instances() - self.ap.logger.info( - f'Disposed external KB {self.external_kb_entity.uuid}, triggered sync to delete instance' - ) - except Exception as e: - self.ap.logger.error(f'Failed to sync after disposing KB: {e}') diff --git a/src/langbot/pkg/rag/knowledge/kbmgr.py b/src/langbot/pkg/rag/knowledge/kbmgr.py index 5fd44854e..a8fe72618 100644 --- a/src/langbot/pkg/rag/knowledge/kbmgr.py +++ b/src/langbot/pkg/rag/knowledge/kbmgr.py @@ -1,18 +1,21 @@ from __future__ import annotations +import logging +import mimetypes import traceback import uuid import zipfile import io -from .services import parser, chunker +from typing import Any, Dict from langbot.pkg.core import app -from langbot.pkg.rag.knowledge.services.embedder import Embedder -from langbot.pkg.rag.knowledge.services.retriever import Retriever import sqlalchemy + + from langbot.pkg.entity.persistence import rag as persistence_rag from langbot.pkg.core import taskmgr from langbot_plugin.api.entities.builtin.rag import context as rag_context from .base import KnowledgeBaseInterface -from .external import ExternalKnowledgeBase + + class RuntimeKnowledgeBase(KnowledgeBaseInterface): @@ -20,23 +23,9 @@ class RuntimeKnowledgeBase(KnowledgeBaseInterface): knowledge_base_entity: persistence_rag.KnowledgeBase - parser: parser.FileParser - - chunker: chunker.Chunker - - embedder: Embedder - - retriever: Retriever - def __init__(self, ap: app.Application, knowledge_base_entity: persistence_rag.KnowledgeBase): super().__init__(ap) self.knowledge_base_entity = knowledge_base_entity - self.parser = parser.FileParser(ap=self.ap) - self.chunker = chunker.Chunker(ap=self.ap) - self.embedder = Embedder(ap=self.ap) - self.retriever = Retriever(ap=self.ap) - # 传递kb_id给retriever - self.retriever.kb_id = knowledge_base_entity.uuid async def initialize(self): pass @@ -50,29 +39,27 @@ async def _store_file_task(self, file: persistence_rag.File, task_context: taskm .values(status='processing') ) - task_context.set_current_action('Parsing file') - # parse file - text = await self.parser.parse(file.file_name, file.extension) - if not text: - raise Exception(f'No text extracted from file {file.file_name}') - - task_context.set_current_action('Chunking file') - # chunk file - chunks_texts = await self.chunker.chunk(text) - if not chunks_texts: - raise Exception(f'No chunks extracted from file {file.file_name}') - - task_context.set_current_action('Embedding chunks') - - embedding_model = await self.ap.model_mgr.get_embedding_model_by_uuid( - self.knowledge_base_entity.embedding_model_uuid - ) - # embed chunks - await self.embedder.embed_and_store( - kb_id=self.knowledge_base_entity.uuid, - file_id=file.uuid, - chunks=chunks_texts, - embedding_model=embedding_model, + task_context.set_current_action('Processing file') + + # Get file size from storage + file_bytes = await self.ap.storage_mgr.storage_provider.load(file.file_name) + file_size = len(file_bytes) if file_bytes else 0 + + # Detect MIME type from extension + mime_type, _ = mimetypes.guess_type(file.file_name) + if mime_type is None: + mime_type = "application/octet-stream" + + # Call plugin to ingest document + await self._ingest_document( + { + "document_id": file.uuid, + "filename": file.file_name, + "extension": file.extension, + "file_size": file_size, + "mime_type": mime_type + }, + file.file_name # storage path ) # set file status to completed @@ -189,21 +176,27 @@ async def _store_zip_file(self, zip_file_id: str) -> str: return stored_file_tasks[0] if stored_file_tasks else '' - async def retrieve(self, query: str, top_k: int) -> list[rag_context.RetrievalResultEntry]: - embedding_model = await self.ap.model_mgr.get_embedding_model_by_uuid( - self.knowledge_base_entity.embedding_model_uuid - ) - return await self.retriever.retrieve(self.knowledge_base_entity.uuid, query, embedding_model, top_k) + async def retrieve(self, query: str, top_k: int, settings: dict | None = None) -> list[rag_context.RetrievalResultEntry]: + # Merge top_k into settings or use as default + retrieve_settings = {"top_k": top_k} + if settings: + retrieve_settings.update(settings) - async def delete_file(self, file_id: str): - # delete vector - await self.ap.vector_db_mgr.vector_db.delete_by_file_id(self.knowledge_base_entity.uuid, file_id) + response = await self._retrieve(query, retrieve_settings) - # delete chunk - await self.ap.persistence_mgr.execute_async( - sqlalchemy.delete(persistence_rag.Chunk).where(persistence_rag.Chunk.file_id == file_id) - ) + results_data = response.get("results", []) + entries = [] + for r in results_data: + if isinstance(r, dict): + entries.append(rag_context.RetrievalResultEntry(**r)) + elif isinstance(r, rag_context.RetrievalResultEntry): + entries.append(r) + return entries + + async def delete_file(self, file_id: str): + await self._delete_document(file_id) + # Also cleanup DB record await self.ap.persistence_mgr.execute_async( sqlalchemy.delete(persistence_rag.File).where(persistence_rag.File.uuid == file_id) ) @@ -216,12 +209,134 @@ def get_name(self) -> str: """Get the name of the knowledge base""" return self.knowledge_base_entity.name - def get_type(self) -> str: - """Get the type of knowledge base""" - return 'internal' + def get_rag_engine_plugin_id(self) -> str: + """Get the RAG engine plugin ID""" + return self.knowledge_base_entity.rag_engine_plugin_id or "" async def dispose(self): - await self.ap.vector_db_mgr.vector_db.delete_collection(self.knowledge_base_entity.uuid) + """Dispose the knowledge base, notifying the plugin to cleanup.""" + await self._on_kb_delete() + + # ========== Plugin Communication Methods ========== + + async def _on_kb_create(self) -> None: + """Notify plugin about KB creation.""" + plugin_id = self.knowledge_base_entity.rag_engine_plugin_id + if not plugin_id: + return + + try: + config = self.knowledge_base_entity.creation_settings or {} + self.ap.logger.info( + f"Calling RAG plugin {plugin_id}: on_knowledge_base_create(kb_id={self.knowledge_base_entity.uuid})" + ) + await self.ap.plugin_connector.rag_on_kb_create(plugin_id, self.knowledge_base_entity.uuid, config) + except Exception as e: + self.ap.logger.error(f"Failed to notify plugin {plugin_id} on KB create: {e}") + + async def _on_kb_delete(self) -> None: + """Notify plugin about KB deletion.""" + plugin_id = self.knowledge_base_entity.rag_engine_plugin_id + if not plugin_id: + return + + try: + self.ap.logger.info( + f"Calling RAG plugin {plugin_id}: on_knowledge_base_delete(kb_id={self.knowledge_base_entity.uuid})" + ) + await self.ap.plugin_connector.rag_on_kb_delete(plugin_id, self.knowledge_base_entity.uuid) + except Exception as e: + self.ap.logger.error(f"Failed to notify plugin {plugin_id} on KB delete: {e}") + + async def _ingest_document( + self, + file_metadata: Dict[str, Any], + storage_path: str, + ) -> Dict[str, Any]: + """Call plugin to ingest document.""" + kb = self.knowledge_base_entity + plugin_id = kb.rag_engine_plugin_id + if not plugin_id: + self.ap.logger.error(f"No RAG plugin ID configured for KB {kb.uuid}. Ingestion failed.") + raise ValueError("RAG Plugin ID required") + + self.ap.logger.info(f"Calling RAG plugin {plugin_id}: ingest(doc={file_metadata.get('filename')})") + + # Inject knowledge_base_id into file metadata as required by SDK schema + file_metadata["knowledge_base_id"] = kb.uuid + + context_data = { + "file_object": { + "metadata": file_metadata, + "storage_path": storage_path, + }, + "knowledge_base_id": kb.uuid, + "collection_id": kb.collection_id or kb.uuid, + "chunking_strategy": kb.creation_settings.get("chunking_strategy", "fixed_size") if kb.creation_settings else "fixed_size", + "custom_settings": kb.creation_settings or {}, + } + + try: + result = await self.ap.plugin_connector.call_rag_ingest(plugin_id, context_data) + return result + except Exception as e: + self.ap.logger.error(f"Plugin ingestion failed: {e}") + raise + + async def _retrieve( + self, + query: str, + settings: Dict[str, Any], + ) -> Dict[str, Any]: + """Call plugin to retrieve documents. + + Raises: + ValueError: If no RAG plugin is configured for this KB. + Exception: If the plugin retrieval call fails. + """ + kb = self.knowledge_base_entity + plugin_id = kb.rag_engine_plugin_id + if not plugin_id: + raise ValueError(f"No RAG plugin ID configured for KB {kb.uuid}. Retrieval failed.") + + if '/' not in plugin_id: + raise ValueError(f"Invalid plugin_id format: '{plugin_id}' for KB {kb.uuid}") + + plugin_author, plugin_name = plugin_id.split('/', 1) + + retrieval_context = { + "query": query, + "knowledge_base_id": kb.uuid, + "collection_id": kb.collection_id or kb.uuid, + "top_k": settings.get("top_k") or kb.top_k or 5, + "retrieval_settings": settings, + "creation_settings": kb.creation_settings or {}, + } + + result = await self.ap.plugin_connector.retrieve_knowledge( + plugin_author, + plugin_name, + "", # retriever_name - runtime will find the RAGEngine component + kb.uuid, # instance_id + retrieval_context + ) + return result + + async def _delete_document(self, document_id: str) -> bool: + """Call plugin to delete document.""" + kb = self.knowledge_base_entity + plugin_id = kb.rag_engine_plugin_id + if not plugin_id: + return False + + self.ap.logger.info(f"Calling RAG plugin {plugin_id}: delete_document(doc_id={document_id})") + + try: + return await self.ap.plugin_connector.call_rag_delete_document(plugin_id, document_id, kb.uuid) + except Exception as e: + self.ap.logger.error(f"Plugin document deletion failed: {e}") + return False + class RAGManager: @@ -236,12 +351,135 @@ def __init__(self, ap: app.Application): async def initialize(self): await self.load_knowledge_bases_from_db() + async def get_all_knowledge_base_details(self) -> list[dict]: + """Get all knowledge bases with enriched RAG engine details.""" + # 1. Get raw KBs from DB + result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_rag.KnowledgeBase)) + knowledge_bases = result.all() + + # 2. Get all available RAG engines for enrichment + engine_map = {} + if self.ap.plugin_connector.is_enable_plugin: + try: + engines = await self.ap.plugin_connector.list_rag_engines() + engine_map = {e["plugin_id"]: e for e in engines} + except Exception as e: + self.ap.logger.warning(f"Failed to list RAG engines: {e}") + + # 3. Serialize and enrich + kb_list = [] + for kb in knowledge_bases: + kb_dict = self.ap.persistence_mgr.serialize_model(persistence_rag.KnowledgeBase, kb) + self._enrich_kb_dict(kb_dict, engine_map) + kb_list.append(kb_dict) + + return kb_list + + async def get_knowledge_base_details(self, kb_uuid: str) -> dict | None: + """Get specific knowledge base with enriched RAG engine details.""" + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.select(persistence_rag.KnowledgeBase).where(persistence_rag.KnowledgeBase.uuid == kb_uuid) + ) + kb = result.first() + if not kb: + return None + + kb_dict = self.ap.persistence_mgr.serialize_model(persistence_rag.KnowledgeBase, kb) + + # Fetch engines + engine_map = {} + if self.ap.plugin_connector.is_enable_plugin: + try: + engines = await self.ap.plugin_connector.list_rag_engines() + engine_map = {e["plugin_id"]: e for e in engines} + except Exception as e: + self.ap.logger.warning(f"Failed to list RAG engines: {e}") + + self._enrich_kb_dict(kb_dict, engine_map) + return kb_dict + + @staticmethod + def _to_i18n_name(name) -> dict: + """Ensure name is always an I18nObject-compatible dict. + + If *name* is already a dict (with ``en_US`` / ``zh_Hans`` keys) it is + returned as-is. A plain string is wrapped into an I18nObject so the + frontend ``extractI18nObject`` helper never receives an unexpected type. + """ + if isinstance(name, dict): + return name + return {"en_US": str(name), "zh_Hans": str(name)} + + def _enrich_kb_dict(self, kb_dict: dict, engine_map: dict) -> None: + """Helper to inject engine info into KB dict.""" + plugin_id = kb_dict.get("rag_engine_plugin_id") + + # Default fallback structure — name must be I18nObject for frontend compatibility + fallback_name = self._to_i18n_name(plugin_id or "Internal (Legacy)") + fallback_info = { + "plugin_id": plugin_id, + "name": fallback_name, + "capabilities": ["doc_ingestion"], + } + + if not plugin_id: + kb_dict["rag_engine"] = fallback_info + return + + engine_info = engine_map.get(plugin_id) + if engine_info: + kb_dict["rag_engine"] = { + "plugin_id": plugin_id, + "name": self._to_i18n_name(engine_info.get("name", plugin_id)), + "capabilities": engine_info.get("capabilities", []), + } + else: + kb_dict["rag_engine"] = fallback_info + + async def create_knowledge_base( + self, + name: str, + rag_engine_plugin_id: str, + creation_settings: dict, + description: str = "", + embedding_model_uuid: str = "" + ) -> persistence_rag.KnowledgeBase: + """Create a new knowledge base using a RAG plugin.""" + kb_uuid = str(uuid.uuid4()) + # Use UUID as collection ID by default for isolation + collection_id = kb_uuid + + kb_data = { + "uuid": kb_uuid, + "name": name, + "description": description, + "rag_engine_plugin_id": rag_engine_plugin_id, + "collection_id": collection_id, + "creation_settings": creation_settings, + "embedding_model_uuid": embedding_model_uuid + } + + # Create Entity + kb = persistence_rag.KnowledgeBase(**kb_data) + + # Persist + await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_rag.KnowledgeBase).values(kb_data)) + + # Load into Runtime + runtime_kb = await self.load_knowledge_base(kb) + + # Notify Plugin + await runtime_kb._on_kb_create() + + self.ap.logger.info(f"Created new Knowledge Base {name} ({kb_uuid}) using plugin {rag_engine_plugin_id}") + return kb + async def load_knowledge_bases_from_db(self): self.ap.logger.info('Loading knowledge bases from db...') self.knowledge_bases = [] - # Load internal knowledge bases + # Load knowledge bases result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_rag.KnowledgeBase)) knowledge_bases = result.all() @@ -253,29 +491,17 @@ async def load_knowledge_bases_from_db(self): f'Error loading knowledge base {knowledge_base.uuid}: {e}\n{traceback.format_exc()}' ) - # Load external knowledge bases - external_result = await self.ap.persistence_mgr.execute_async( - sqlalchemy.select(persistence_rag.ExternalKnowledgeBase) - ) - external_kbs = external_result.all() - - for external_kb in external_kbs: - try: - # Don't trigger sync during batch loading - will sync once after LangBot connects to runtime - await self.load_external_knowledge_base(external_kb, trigger_sync=False) - except Exception as e: - self.ap.logger.error( - f'Error loading external knowledge base {external_kb.uuid}: {e}\n{traceback.format_exc()}' - ) - async def load_knowledge_base( self, knowledge_base_entity: persistence_rag.KnowledgeBase | sqlalchemy.Row | dict, ) -> RuntimeKnowledgeBase: if isinstance(knowledge_base_entity, sqlalchemy.Row): + # Safe access to _mapping for SQLAlchemy 1.4+ knowledge_base_entity = persistence_rag.KnowledgeBase(**knowledge_base_entity._mapping) elif isinstance(knowledge_base_entity, dict): - knowledge_base_entity = persistence_rag.KnowledgeBase(**knowledge_base_entity) + # Filter out non-database fields (like rag_engine which is computed) + filtered_dict = {k: v for k, v in knowledge_base_entity.items() if k in persistence_rag.KnowledgeBase.ALL_DB_FIELDS} + knowledge_base_entity = persistence_rag.KnowledgeBase(**filtered_dict) runtime_knowledge_base = RuntimeKnowledgeBase(ap=self.ap, knowledge_base_entity=knowledge_base_entity) @@ -285,39 +511,6 @@ async def load_knowledge_base( return runtime_knowledge_base - async def load_external_knowledge_base( - self, - external_kb_entity: persistence_rag.ExternalKnowledgeBase | sqlalchemy.Row | dict, - trigger_sync: bool = True, - ) -> ExternalKnowledgeBase: - """Load external knowledge base into runtime - - Args: - external_kb_entity: External KB entity to load - trigger_sync: Whether to trigger sync after loading (default True for manual creation, False for batch loading) - """ - if isinstance(external_kb_entity, sqlalchemy.Row): - external_kb_entity = persistence_rag.ExternalKnowledgeBase(**external_kb_entity._mapping) - elif isinstance(external_kb_entity, dict): - external_kb_entity = persistence_rag.ExternalKnowledgeBase(**external_kb_entity) - - external_kb = ExternalKnowledgeBase(ap=self.ap, external_kb_entity=external_kb_entity) - - await external_kb.initialize() - - self.knowledge_bases.append(external_kb) - - # Trigger sync to create the instance immediately (for manual creation) - # Skip sync during batch loading from DB to avoid multiple sync calls - if trigger_sync: - try: - await self.ap.plugin_connector.sync_polymorphic_component_instances() - self.ap.logger.info(f'Triggered sync after loading external KB {external_kb_entity.uuid}') - except Exception as e: - self.ap.logger.error(f'Failed to sync after loading external KB: {e}') - - return external_kb - async def get_knowledge_base_by_uuid(self, kb_uuid: str) -> KnowledgeBaseInterface | None: for kb in self.knowledge_bases: if kb.get_uuid() == kb_uuid: diff --git a/src/langbot/pkg/rag/knowledge/services/__init__.py b/src/langbot/pkg/rag/knowledge/services/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/langbot/pkg/rag/knowledge/services/base_service.py b/src/langbot/pkg/rag/knowledge/services/base_service.py deleted file mode 100644 index 0f71a508a..000000000 --- a/src/langbot/pkg/rag/knowledge/services/base_service.py +++ /dev/null @@ -1,15 +0,0 @@ -# 封装异步操作 -import asyncio - - -class BaseService: - def __init__(self): - pass - - async def _run_sync(self, func, *args, **kwargs): - """ - 在单独的线程中运行同步函数。 - 如果第一个参数是 session,则在 to_thread 中获取新的 session。 - """ - - return await asyncio.to_thread(func, *args, **kwargs) diff --git a/src/langbot/pkg/rag/knowledge/services/chunker.py b/src/langbot/pkg/rag/knowledge/services/chunker.py deleted file mode 100644 index 0cb168161..000000000 --- a/src/langbot/pkg/rag/knowledge/services/chunker.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import json -from typing import List -from langbot.pkg.rag.knowledge.services import base_service -from langbot.pkg.core import app -from langchain_text_splitters import RecursiveCharacterTextSplitter - - -class Chunker(base_service.BaseService): - """ - A class for splitting long texts into smaller, overlapping chunks. - """ - - def __init__(self, ap: app.Application, chunk_size: int = 500, chunk_overlap: int = 50): - self.ap = ap - self.chunk_size = chunk_size - self.chunk_overlap = chunk_overlap - if self.chunk_overlap >= self.chunk_size: - self.ap.logger.warning( - 'Chunk overlap is greater than or equal to chunk size. This may lead to empty or malformed chunks.' - ) - - def _split_text_sync(self, text: str) -> List[str]: - """ - Synchronously splits a long text into chunks with specified overlap. - This is a CPU-bound operation, intended to be run in a separate thread. - """ - if not text: - return [] - - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=self.chunk_size, - chunk_overlap=self.chunk_overlap, - length_function=len, - is_separator_regex=False, - ) - return text_splitter.split_text(text) - - async def chunk(self, text: str) -> List[str]: - """ - Asynchronously chunks a given text into smaller pieces. - """ - self.ap.logger.info(f'Chunking text (length: {len(text)})...') - # Run the synchronous splitting logic in a separate thread - chunks = await self._run_sync(self._split_text_sync, text) - self.ap.logger.info(f'Text chunked into {len(chunks)} pieces.') - self.ap.logger.debug(f'Chunks: {json.dumps(chunks, indent=4, ensure_ascii=False)}') - return chunks diff --git a/src/langbot/pkg/rag/knowledge/services/embedder.py b/src/langbot/pkg/rag/knowledge/services/embedder.py deleted file mode 100644 index 168b839d3..000000000 --- a/src/langbot/pkg/rag/knowledge/services/embedder.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import annotations -import uuid -from typing import List -from langbot.pkg.rag.knowledge.services.base_service import BaseService -from langbot.pkg.entity.persistence import rag as persistence_rag -from langbot.pkg.core import app -from langbot.pkg.provider.modelmgr.requester import RuntimeEmbeddingModel -import sqlalchemy - - -class Embedder(BaseService): - def __init__(self, ap: app.Application) -> None: - super().__init__() - self.ap = ap - - async def embed_and_store( - self, kb_id: str, file_id: str, chunks: List[str], embedding_model: RuntimeEmbeddingModel - ) -> list[persistence_rag.Chunk]: - # save chunk to db - chunk_entities: list[persistence_rag.Chunk] = [] - chunk_ids: list[str] = [] - - for chunk_text in chunks: - chunk_uuid = str(uuid.uuid4()) - chunk_ids.append(chunk_uuid) - chunk_entity = persistence_rag.Chunk(uuid=chunk_uuid, file_id=file_id, text=chunk_text) - chunk_entities.append(chunk_entity) - - chunk_dicts = [ - self.ap.persistence_mgr.serialize_model(persistence_rag.Chunk, chunk) for chunk in chunk_entities - ] - - await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_rag.Chunk).values(chunk_dicts)) - - # get embeddings (batch size limit: 64 for OpenAI) - MAX_BATCH_SIZE = 64 - embeddings_list: list[list[float]] = [] - - for i in range(0, len(chunks), MAX_BATCH_SIZE): - batch = chunks[i : i + MAX_BATCH_SIZE] - batch_embeddings = await embedding_model.provider.invoke_embedding( - model=embedding_model, - input_text=batch, - extra_args={}, # TODO: add extra args - knowledge_base_id=kb_id, - call_type='embedding', - ) - embeddings_list.extend(batch_embeddings) - - # save embeddings to vdb - await self.ap.vector_db_mgr.vector_db.add_embeddings(kb_id, chunk_ids, embeddings_list, chunk_dicts) - - self.ap.logger.info(f'Successfully saved {len(chunk_entities)} embeddings to Knowledge Base.') - - return chunk_entities diff --git a/src/langbot/pkg/rag/knowledge/services/parser.py b/src/langbot/pkg/rag/knowledge/services/parser.py deleted file mode 100644 index 504107385..000000000 --- a/src/langbot/pkg/rag/knowledge/services/parser.py +++ /dev/null @@ -1,291 +0,0 @@ -from __future__ import annotations - -import PyPDF2 -import io -from docx import Document -import chardet -from typing import Union, Callable, Any -import markdown -from bs4 import BeautifulSoup -import re -import asyncio # Import asyncio for async operations -from langbot.pkg.core import app - - -class FileParser: - """ - A robust file parser class to extract text content from various document formats. - It supports TXT, PDF, DOCX, XLSX, CSV, Markdown, HTML, and EPUB files. - All core file reading operations are designed to be run synchronously in a thread pool - to avoid blocking the asyncio event loop. - """ - - def __init__(self, ap: app.Application): - self.ap = ap - - async def _run_sync(self, sync_func: Callable, *args: Any, **kwargs: Any) -> Any: - """ - Runs a synchronous function in a separate thread to prevent blocking the event loop. - This is a general utility method for wrapping blocking I/O operations. - """ - try: - return await asyncio.to_thread(sync_func, *args, **kwargs) - except Exception as e: - self.ap.logger.error(f'Error running synchronous function {sync_func.__name__}: {e}') - raise - - async def parse(self, file_name: str, extension: str) -> Union[str, None]: - """ - Parses the file based on its extension and returns the extracted text content. - This is the main asynchronous entry point for parsing. - - Args: - file_name (str): The name of the file to be parsed, get from ap.storage_mgr - - Returns: - Union[str, None]: The extracted text content as a single string, or None if parsing fails. - """ - - file_extension = extension.lower() - parser_method = getattr(self, f'_parse_{file_extension}', None) - - if parser_method is None: - self.ap.logger.error(f'Unsupported file format: {file_extension} for file {file_name}') - return None - - try: - # Pass file_path to the specific parser methods - return await parser_method(file_name) - except Exception as e: - self.ap.logger.error(f'Failed to parse {file_extension} file {file_name}: {e}') - return None - - # --- Helper for reading files with encoding detection --- - async def _read_file_content(self, file_name: str) -> Union[str, bytes]: - """ - Reads a file with automatic encoding detection, ensuring the synchronous - file read operation runs in a separate thread. - """ - - # def _read_sync(): - # with open(file_path, 'rb') as file: - # raw_data = file.read() - # detected = chardet.detect(raw_data) - # encoding = detected['encoding'] or 'utf-8' - - # if mode == 'r': - # return raw_data.decode(encoding, errors='ignore') - # return raw_data # For binary mode - - # return await self._run_sync(_read_sync) - file_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - detected = chardet.detect(file_bytes) - encoding = detected['encoding'] or 'utf-8' - - return file_bytes.decode(encoding, errors='ignore') - - # --- Specific Parser Methods --- - - async def _parse_txt(self, file_name: str) -> str: - """Parses a TXT file and returns its content.""" - self.ap.logger.info(f'Parsing TXT file: {file_name}') - return await self._read_file_content(file_name) - - async def _parse_pdf(self, file_name: str) -> str: - """Parses a PDF file and returns its text content.""" - self.ap.logger.info(f'Parsing PDF file: {file_name}') - - # def _parse_pdf_sync(): - # text_content = [] - # with open(file_name, 'rb') as file: - # pdf_reader = PyPDF2.PdfReader(file) - # for page in pdf_reader.pages: - # text = page.extract_text() - # if text: - # text_content.append(text) - # return '\n'.join(text_content) - - # return await self._run_sync(_parse_pdf_sync) - - pdf_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - def _parse_pdf_sync(): - pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) - text_content = [] - for page in pdf_reader.pages: - text = page.extract_text() - if text: - text_content.append(text) - return '\n'.join(text_content) - - return await self._run_sync(_parse_pdf_sync) - - async def _parse_docx(self, file_name: str) -> str: - """Parses a DOCX file and returns its text content.""" - self.ap.logger.info(f'Parsing DOCX file: {file_name}') - - docx_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - def _parse_docx_sync(): - doc = Document(io.BytesIO(docx_bytes)) - text_content = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()] - return '\n'.join(text_content) - - return await self._run_sync(_parse_docx_sync) - - async def _parse_doc(self, file_name: str) -> str: - """Handles .doc files, explicitly stating lack of direct support.""" - self.ap.logger.warning(f'Direct .doc parsing is not supported for {file_name}. Please convert to .docx first.') - raise NotImplementedError('Direct .doc parsing not supported. Please convert to .docx first.') - - # async def _parse_xlsx(self, file_name: str) -> str: - # """Parses an XLSX file, returning text from all sheets.""" - # self.ap.logger.info(f'Parsing XLSX file: {file_name}') - - # xlsx_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - # def _parse_xlsx_sync(): - # excel_file = pd.ExcelFile(io.BytesIO(xlsx_bytes)) - # all_sheet_content = [] - # for sheet_name in excel_file.sheet_names: - # df = pd.read_excel(io.BytesIO(xlsx_bytes), sheet_name=sheet_name) - # sheet_text = f'--- Sheet: {sheet_name} ---\n{df.to_string(index=False)}\n' - # all_sheet_content.append(sheet_text) - # return '\n'.join(all_sheet_content) - - # return await self._run_sync(_parse_xlsx_sync) - - # async def _parse_csv(self, file_name: str) -> str: - # """Parses a CSV file and returns its content as a string.""" - # self.ap.logger.info(f'Parsing CSV file: {file_name}') - - # csv_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - # def _parse_csv_sync(): - # # pd.read_csv can often detect encoding, but explicit detection is safer - # # raw_data = self._read_file_content( - # # file_name, mode='rb' - # # ) # Note: this will need to be await outside this sync function - # # _ = raw_data - # # For simplicity, we'll let pandas handle encoding internally after a raw read. - # # A more robust solution might pass encoding directly to pd.read_csv after detection. - # detected = chardet.detect(io.BytesIO(csv_bytes)) - # encoding = detected['encoding'] or 'utf-8' - # df = pd.read_csv(io.BytesIO(csv_bytes), encoding=encoding) - # return df.to_string(index=False) - - # return await self._run_sync(_parse_csv_sync) - - async def _parse_md(self, file_name: str) -> str: - """Parses a Markdown file, converting it to structured plain text.""" - self.ap.logger.info(f'Parsing Markdown file: {file_name}') - - md_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - def _parse_markdown_sync(): - md_content = io.BytesIO(md_bytes).read().decode('utf-8', errors='ignore') - html_content = markdown.markdown( - md_content, extensions=['extra', 'codehilite', 'tables', 'toc', 'fenced_code'] - ) - soup = BeautifulSoup(html_content, 'html.parser') - text_parts = [] - for element in soup.children: - if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: - level = int(element.name[1]) - text_parts.append('#' * level + ' ' + element.get_text().strip()) - elif element.name == 'p': - text = element.get_text().strip() - if text: - text_parts.append(text) - elif element.name in ['ul', 'ol']: - for li in element.find_all('li'): - text_parts.append(f'* {li.get_text().strip()}') - elif element.name == 'pre': - code_block = element.get_text().strip() - if code_block: - text_parts.append(f'```\n{code_block}\n```') - elif element.name == 'table': - table_str = self._extract_table_to_markdown_sync(element) # Call sync helper - if table_str: - text_parts.append(table_str) - elif element.name: - text = element.get_text(separator=' ', strip=True) - if text: - text_parts.append(text) - cleaned_text = re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_parts)) - return cleaned_text.strip() - - return await self._run_sync(_parse_markdown_sync) - - async def _parse_html(self, file_name: str) -> str: - """Parses an HTML file, extracting structured plain text.""" - self.ap.logger.info(f'Parsing HTML file: {file_name}') - - html_bytes = await self.ap.storage_mgr.storage_provider.load(file_name) - - def _parse_html_sync(): - html_content = io.BytesIO(html_bytes).read().decode('utf-8', errors='ignore') - soup = BeautifulSoup(html_content, 'html.parser') - for script_or_style in soup(['script', 'style']): - script_or_style.decompose() - text_parts = [] - for element in soup.body.children if soup.body else soup.children: - if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: - level = int(element.name[1]) - text_parts.append('#' * level + ' ' + element.get_text().strip()) - elif element.name == 'p': - text = element.get_text().strip() - if text: - text_parts.append(text) - elif element.name in ['ul', 'ol']: - for li in element.find_all('li'): - text = li.get_text().strip() - if text: - text_parts.append(f'* {text}') - elif element.name == 'table': - table_str = self._extract_table_to_markdown_sync(element) # Call sync helper - if table_str: - text_parts.append(table_str) - elif element.name: - text = element.get_text(separator=' ', strip=True) - if text: - text_parts.append(text) - cleaned_text = re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_parts)) - return cleaned_text.strip() - - return await self._run_sync(_parse_html_sync) - - def _add_toc_items_sync(self, toc_list: list, text_content: list, level: int): - """Recursively adds TOC items to text_content (synchronous helper).""" - indent = ' ' * level - for item in toc_list: - if isinstance(item, tuple): - chapter, subchapters = item - text_content.append(f'{indent}- {chapter.title}') - self._add_toc_items_sync(subchapters, text_content, level + 1) - else: - text_content.append(f'{indent}- {item.title}') - - def _extract_table_to_markdown_sync(self, table_element: BeautifulSoup) -> str: - """Helper to convert a BeautifulSoup table element into a Markdown table string (synchronous).""" - headers = [th.get_text().strip() for th in table_element.find_all('th')] - rows = [] - for tr in table_element.find_all('tr'): - cells = [td.get_text().strip() for td in tr.find_all('td')] - if cells: - rows.append(cells) - - if not headers and not rows: - return '' - - table_lines = [] - if headers: - table_lines.append(' | '.join(headers)) - table_lines.append(' | '.join(['---'] * len(headers))) - - for row_cells in rows: - padded_cells = row_cells + [''] * (len(headers) - len(row_cells)) if headers else row_cells - table_lines.append(' | '.join(padded_cells)) - - return '\n'.join(table_lines) diff --git a/src/langbot/pkg/rag/knowledge/services/retriever.py b/src/langbot/pkg/rag/knowledge/services/retriever.py deleted file mode 100644 index 15619c94f..000000000 --- a/src/langbot/pkg/rag/knowledge/services/retriever.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import annotations - -from . import base_service -from ....core import app -from ....provider.modelmgr.requester import RuntimeEmbeddingModel -from langbot_plugin.api.entities.builtin.rag import context as rag_context -from langbot_plugin.api.entities.builtin.provider.message import ContentElement - - -class Retriever(base_service.BaseService): - def __init__(self, ap: app.Application): - super().__init__() - self.ap = ap - - async def retrieve( - self, kb_id: str, query: str, embedding_model: RuntimeEmbeddingModel, k: int = 5 - ) -> list[rag_context.RetrievalResultEntry]: - self.ap.logger.info( - f"Retrieving for query: '{query[:10]}' with k={k} using {embedding_model.model_entity.uuid}" - ) - - query_embedding: list[float] = await embedding_model.provider.invoke_embedding( - model=embedding_model, - input_text=[query], - extra_args={}, # TODO: add extra args - knowledge_base_id=kb_id, - query_text=query, - call_type='retrieve', - ) - - vector_results = await self.ap.vector_db_mgr.vector_db.search(kb_id, query_embedding[0], k) - - # 'ids' shape mirrors the Chroma-style response contract for compatibility - matched_vector_ids = vector_results.get('ids', [[]])[0] - distances = vector_results.get('distances', [[]])[0] - vector_metadatas = vector_results.get('metadatas', [[]])[0] - - if not matched_vector_ids: - self.ap.logger.info('No relevant chunks found in vector database.') - return [] - - result: list[rag_context.RetrievalResultEntry] = [] - - for i, id in enumerate(matched_vector_ids): - entry = rag_context.RetrievalResultEntry( - id=id, - content=[ContentElement.from_text(vector_metadatas[i].get('text', ''))], - metadata=vector_metadatas[i], - distance=distances[i], - ) - result.append(entry) - - return result diff --git a/src/langbot/pkg/rag/service/__init__.py b/src/langbot/pkg/rag/service/__init__.py new file mode 100644 index 000000000..6872460dd --- /dev/null +++ b/src/langbot/pkg/rag/service/__init__.py @@ -0,0 +1 @@ +from .runtime import RAGRuntimeService diff --git a/src/langbot/pkg/rag/service/runtime.py b/src/langbot/pkg/rag/service/runtime.py new file mode 100644 index 000000000..62fbfcedb --- /dev/null +++ b/src/langbot/pkg/rag/service/runtime.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import sqlalchemy +from typing import Any, List, Dict, Optional +from langbot.pkg.core import app + +from langbot.pkg.entity.persistence import rag as persistence_rag + + +class RAGRuntimeService: + """Service to handle RAG-related requests from plugins (Runtime). + + This service acts as the bridge between plugin RPC requests and + LangBot's infrastructure (embedding models, vector databases, file storage). + """ + + def __init__(self, ap: app.Application): + self.ap = ap + + async def _get_kb_entity(self, kb_id: str) -> persistence_rag.KnowledgeBase: + stmt = sqlalchemy.select(persistence_rag.KnowledgeBase).where( + persistence_rag.KnowledgeBase.uuid == kb_id + ) + result = await self.ap.persistence_mgr.execute_async(stmt) + row = result.first() + if not row: + raise ValueError(f"Knowledge Base {kb_id} not found") + kb = persistence_rag.KnowledgeBase(**row._mapping) + return kb + + def _get_embedding_model_uuid(self, kb: persistence_rag.KnowledgeBase) -> str | None: + """Get embedding model UUID from creation_settings (preferred) or KB field (fallback).""" + if kb.creation_settings and isinstance(kb.creation_settings, dict): + embed_uuid = kb.creation_settings.get('embedding_model_uuid') + if embed_uuid: + return embed_uuid + return kb.embedding_model_uuid + + async def embed_documents(self, kb_id: str, texts: List[str]) -> List[List[float]]: + """Handle RAG_EMBED_DOCUMENTS action.""" + kb = await self._get_kb_entity(kb_id) + embed_model_uuid = self._get_embedding_model_uuid(kb) + + if not embed_model_uuid: + raise ValueError(f"Embedding model not configured for this Knowledge Base (kb_id: {kb_id})") + + embedder_model = await self.ap.model_mgr.get_embedding_model_by_uuid(embed_model_uuid) + if not embedder_model: + raise ValueError(f"Embedding model {embed_model_uuid} not found") + + return await embedder_model.embed_documents(texts) + + async def embed_query(self, kb_id: str, text: str) -> List[float]: + """Handle RAG_EMBED_QUERY action.""" + kb = await self._get_kb_entity(kb_id) + embed_model_uuid = self._get_embedding_model_uuid(kb) + + if not embed_model_uuid: + raise ValueError(f"Embedding model not configured (kb_id: {kb_id})") + + embedder_model = await self.ap.model_mgr.get_embedding_model_by_uuid(embed_model_uuid) + if not embedder_model: + raise ValueError(f"Embedding model {embed_model_uuid} not found") + + return await embedder_model.embed_query(text) + + async def vector_upsert( + self, + collection_id: str, + vectors: List[List[float]], + ids: List[str], + metadata: Optional[List[Dict[str, Any]]] = None + ) -> None: + """Handle RAG_VECTOR_UPSERT action.""" + metadatas = metadata if metadata else [{} for _ in vectors] + await self.ap.vector_db_mgr.upsert( + collection_name=collection_id, + vectors=vectors, + ids=ids, + metadata=metadatas + ) + + async def vector_search( + self, + collection_id: str, + query_vector: List[float], + top_k: int, + filters: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + """Handle RAG_VECTOR_SEARCH action.""" + return await self.ap.vector_db_mgr.search( + collection_name=collection_id, + query_vector=query_vector, + limit=top_k, + filter=filters + ) + + async def vector_delete( + self, + collection_id: str, + file_ids: Optional[List[str]] = None, + filters: Optional[Dict[str, Any]] = None + ) -> int: + """Handle RAG_VECTOR_DELETE action. + + Deletes vectors associated with the given file IDs from the collection. + Each file_id corresponds to a document whose vectors will be removed. + + Args: + collection_id: The collection to delete from. + file_ids: File IDs whose associated vectors should be deleted. + Each file_id maps to a set of vectors stored with that file_id + in their metadata. + filters: Filter-based deletion (not yet supported, will raise). + """ + count = 0 + if file_ids: + await self.ap.vector_db_mgr.delete_by_file_id(collection_name=collection_id, file_ids=file_ids) + count = len(file_ids) + elif filters: + await self.ap.vector_db_mgr.delete_by_filter(collection_name=collection_id, filter=filters) + return count + + async def get_file_stream(self, storage_path: str) -> bytes: + """Handle RAG_GET_FILE_STREAM action. + + Uses the storage manager abstraction to load file content, + regardless of the underlying storage provider. + """ + content_bytes = await self.ap.storage_mgr.load(storage_path) + return content_bytes if content_bytes else b"" diff --git a/src/langbot/pkg/vector/mgr.py b/src/langbot/pkg/vector/mgr.py index f0cb742cf..348800207 100644 --- a/src/langbot/pkg/vector/mgr.py +++ b/src/langbot/pkg/vector/mgr.py @@ -65,3 +65,88 @@ async def initialize(self): else: self.vector_db = ChromaVectorDatabase(self.ap) self.ap.logger.warning('No vector database backend configured, defaulting to Chroma.') + + async def upsert( + self, + collection_name: str, + vectors: list[list[float]], + ids: list[str], + metadata: list[dict] | None = None, + ): + """Proxy: Upsert vectors""" + await self.vector_db.get_or_create_collection(collection_name) + await self.vector_db.add_embeddings( + collection=collection_name, + ids=ids, + embeddings_list=vectors, + metadatas=metadata or [{} for _ in vectors], + ) + + async def search( + self, + collection_name: str, + query_vector: list[float], + limit: int, + filter: dict | None = None, + ) -> list[dict]: + """Proxy: Search vectors. + + Returns a list of dicts with keys: 'id', 'score', 'metadata'. + The underlying VectorDatabase.search returns Chroma-style format: + { 'ids': [['id1']], 'distances': [[0.1]], 'metadatas': [[{}]] } + """ + import numpy as np + + results = await self.vector_db.search( + collection=collection_name, + query_embedding=np.array(query_vector), + k=limit, + ) + + if not results or 'ids' not in results or not results['ids']: + return [] + + # Flatten nested lists (Chroma returns batch-style: list of lists) + raw_ids = results['ids'] + raw_dists = results.get('distances', []) + raw_metas = results.get('metadatas', []) + + r_ids = raw_ids[0] if raw_ids and isinstance(raw_ids[0], list) else raw_ids + r_dists = raw_dists[0] if raw_dists and isinstance(raw_dists[0], list) else raw_dists + r_metas = raw_metas[0] if raw_metas and isinstance(raw_metas[0], list) else raw_metas + + parsed_results = [] + for i, id_val in enumerate(r_ids): + parsed_results.append({ + 'id': id_val, + 'score': r_dists[i] if r_dists and i < len(r_dists) else 0.0, + 'metadata': r_metas[i] if r_metas and i < len(r_metas) else {}, + }) + + return parsed_results + + async def delete_by_file_id(self, collection_name: str, file_ids: list[str]): + """Proxy: Delete vectors by file_id (metadata-level identifier). + + This delegates to VectorDatabase.delete_by_file_id which removes + all vectors associated with the given file IDs. + """ + for file_id in file_ids: + await self.vector_db.delete_by_file_id(collection_name, file_id) + + async def delete_collection(self, collection_name: str): + """Proxy: Delete an entire collection.""" + await self.vector_db.delete_collection(collection_name) + + async def delete_by_filter(self, collection_name: str, filter: dict): + """Proxy: Delete vectors by filter. + + Raises: + NotImplementedError: Filter-based deletion is not yet supported + by the VectorDatabase interface. + """ + raise NotImplementedError( + f"delete_by_filter called on collection '{collection_name}' but " + "filter-based deletion is not yet implemented in VectorDatabase interface. " + "Use delete_by_file_id for file-level deletion." + ) diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml index eedf657d6..424b12b71 100644 --- a/web/pnpm-lock.yaml +++ b/web/pnpm-lock.yaml @@ -505,6 +505,7 @@ packages: resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} cpu: [arm64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -513,6 +514,7 @@ packages: resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} cpu: [arm] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -521,6 +523,7 @@ packages: resolution: {integrity: sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==} cpu: [ppc64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -529,6 +532,7 @@ packages: resolution: {integrity: sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==} cpu: [riscv64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -537,6 +541,7 @@ packages: resolution: {integrity: sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==} cpu: [s390x] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -545,6 +550,7 @@ packages: resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} cpu: [x64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -553,6 +559,7 @@ packages: resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} cpu: [arm64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -561,6 +568,7 @@ packages: resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} cpu: [x64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -570,6 +578,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm64] os: [linux] + libc: [glibc] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linux-arm64': 1.2.4 @@ -581,6 +590,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm] os: [linux] + libc: [glibc] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linux-arm': 1.2.4 @@ -592,6 +602,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [ppc64] os: [linux] + libc: [glibc] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linux-ppc64': 1.2.4 @@ -603,6 +614,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [riscv64] os: [linux] + libc: [glibc] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linux-riscv64': 1.2.4 @@ -614,6 +626,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [s390x] os: [linux] + libc: [glibc] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linux-s390x': 1.2.4 @@ -625,6 +638,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [x64] os: [linux] + libc: [glibc] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linux-x64': 1.2.4 @@ -636,6 +650,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm64] os: [linux] + libc: [musl] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 @@ -647,6 +662,7 @@ packages: engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [x64] os: [linux] + libc: [musl] requiresBuild: true optionalDependencies: '@img/sharp-libvips-linuxmusl-x64': 1.2.4 @@ -763,6 +779,7 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -772,6 +789,7 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -781,6 +799,7 @@ packages: engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -790,6 +809,7 @@ packages: engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -1889,6 +1909,7 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -1898,6 +1919,7 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -1907,6 +1929,7 @@ packages: engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -1916,6 +1939,7 @@ packages: engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -2331,6 +2355,7 @@ packages: resolution: {integrity: sha512-34gw7PjDGB9JgePJEmhEqBhWvCiiWCuXsL9hYphDF7crW7UgI05gyBAi6MF58uGcMOiOqSJ2ybEeCvHcq0BCmQ==} cpu: [arm64] os: [linux] + libc: [glibc] requiresBuild: true dev: true optional: true @@ -2339,6 +2364,7 @@ packages: resolution: {integrity: sha512-RyMIx6Uf53hhOtJDIamSbTskA99sPHS96wxVE/bJtePJJtpdKGXO1wY90oRdXuYOGOTuqjT8ACccMc4K6QmT3w==} cpu: [arm64] os: [linux] + libc: [musl] requiresBuild: true dev: true optional: true @@ -2347,6 +2373,7 @@ packages: resolution: {integrity: sha512-D8Vae74A4/a+mZH0FbOkFJL9DSK2R6TFPC9M+jCWYia/q2einCubX10pecpDiTmkJVUH+y8K3BZClycD8nCShA==} cpu: [ppc64] os: [linux] + libc: [glibc] requiresBuild: true dev: true optional: true @@ -2355,6 +2382,7 @@ packages: resolution: {integrity: sha512-frxL4OrzOWVVsOc96+V3aqTIQl1O2TjgExV4EKgRY09AJ9leZpEg8Ak9phadbuX0BA4k8U5qtvMSQQGGmaJqcQ==} cpu: [riscv64] os: [linux] + libc: [glibc] requiresBuild: true dev: true optional: true @@ -2363,6 +2391,7 @@ packages: resolution: {integrity: sha512-mJ5vuDaIZ+l/acv01sHoXfpnyrNKOk/3aDoEdLO/Xtn9HuZlDD6jKxHlkN8ZhWyLJsRBxfv9GYM2utQ1SChKew==} cpu: [riscv64] os: [linux] + libc: [musl] requiresBuild: true dev: true optional: true @@ -2371,6 +2400,7 @@ packages: resolution: {integrity: sha512-kELo8ebBVtb9sA7rMe1Cph4QHreByhaZ2QEADd9NzIQsYNQpt9UkM9iqr2lhGr5afh885d/cB5QeTXSbZHTYPg==} cpu: [s390x] os: [linux] + libc: [glibc] requiresBuild: true dev: true optional: true @@ -2379,6 +2409,7 @@ packages: resolution: {integrity: sha512-C3ZAHugKgovV5YvAMsxhq0gtXuwESUKc5MhEtjBpLoHPLYM+iuwSj3lflFwK3DPm68660rZ7G8BMcwSro7hD5w==} cpu: [x64] os: [linux] + libc: [glibc] requiresBuild: true dev: true optional: true @@ -2387,6 +2418,7 @@ packages: resolution: {integrity: sha512-rV0YSoyhK2nZ4vEswT/QwqzqQXw5I6CjoaYMOX0TqBlWhojUf8P94mvI7nuJTeaCkkds3QE4+zS8Ko+GdXuZtA==} cpu: [x64] os: [linux] + libc: [musl] requiresBuild: true dev: true optional: true @@ -4413,6 +4445,7 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -4422,6 +4455,7 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true @@ -4431,6 +4465,7 @@ packages: engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [glibc] requiresBuild: true dev: false optional: true @@ -4440,6 +4475,7 @@ packages: engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [musl] requiresBuild: true dev: false optional: true diff --git a/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx b/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx index f46d42b50..87700e317 100644 --- a/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx +++ b/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx @@ -55,6 +55,9 @@ export default function DynamicFormComponent({ case 'llm-model-selector': fieldSchema = z.string(); break; + case 'embedding-model-selector': + fieldSchema = z.string(); + break; case 'knowledge-base-selector': fieldSchema = z.string(); break; diff --git a/web/src/app/home/components/dynamic-form/DynamicFormItemComponent.tsx b/web/src/app/home/components/dynamic-form/DynamicFormItemComponent.tsx index 7c89a34ef..52d7a1460 100644 --- a/web/src/app/home/components/dynamic-form/DynamicFormItemComponent.tsx +++ b/web/src/app/home/components/dynamic-form/DynamicFormItemComponent.tsx @@ -22,8 +22,7 @@ import { LLMModel, Bot, KnowledgeBase, - ExternalKnowledgeBase, - ApiRespPluginSystemStatus, + EmbeddingModel, } from '@/app/infra/entities/api'; import { toast } from 'sonner'; import { useTranslation } from 'react-i18next'; @@ -51,16 +50,12 @@ export default function DynamicFormItemComponent({ onFileUploaded?: (fileKey: string) => void; }) { const [llmModels, setLlmModels] = useState([]); + const [embeddingModels, setEmbeddingModels] = useState([]); const [knowledgeBases, setKnowledgeBases] = useState([]); - const [externalKnowledgeBases, setExternalKnowledgeBases] = useState< - ExternalKnowledgeBase[] - >([]); const [bots, setBots] = useState([]); const [uploading, setUploading] = useState(false); const [kbDialogOpen, setKbDialogOpen] = useState(false); const [tempSelectedKBIds, setTempSelectedKBIds] = useState([]); - const [pluginSystemStatus, setPluginSystemStatus] = - useState(null); const { t } = useTranslation(); const handleFileUpload = async (file: File): Promise => { @@ -117,48 +112,33 @@ export default function DynamicFormItemComponent({ }, [config.type]); useEffect(() => { - if ( - config.type === DynamicFormItemType.KNOWLEDGE_BASE_SELECTOR || - config.type === DynamicFormItemType.KNOWLEDGE_BASE_MULTI_SELECTOR - ) { + if (config.type === DynamicFormItemType.EMBEDDING_MODEL_SELECTOR) { httpClient - .getKnowledgeBases() + .getProviderEmbeddingModels() .then((resp) => { - setKnowledgeBases(resp.bases); - }) - .catch((err) => { - toast.error('Failed to get knowledge base list: ' + err.msg); - }); - - // Fetch plugin system status - httpClient - .getPluginSystemStatus() - .then((status) => { - setPluginSystemStatus(status); + setEmbeddingModels(resp.models); }) .catch((err) => { - console.error('Failed to get plugin system status:', err); + toast.error('Failed to get embedding model list: ' + err.msg); }); } }, [config.type]); useEffect(() => { if ( - (config.type === DynamicFormItemType.KNOWLEDGE_BASE_SELECTOR || - config.type === DynamicFormItemType.KNOWLEDGE_BASE_MULTI_SELECTOR) && - pluginSystemStatus?.is_enable && - pluginSystemStatus?.is_connected + config.type === DynamicFormItemType.KNOWLEDGE_BASE_SELECTOR || + config.type === DynamicFormItemType.KNOWLEDGE_BASE_MULTI_SELECTOR ) { httpClient - .getExternalKnowledgeBases() + .getKnowledgeBases() .then((resp) => { - setExternalKnowledgeBases(resp.bases); + setKnowledgeBases(resp.bases); }) .catch((err) => { - console.error('Failed to get external knowledge base list:', err); + toast.error('Failed to get knowledge base list: ' + err.msg); }); } - }, [config.type, pluginSystemStatus]); + }, [config.type]); useEffect(() => { if (config.type === DynamicFormItemType.BOT_SELECTOR) { @@ -299,7 +279,54 @@ export default function DynamicFormItemComponent({ ); + case DynamicFormItemType.EMBEDDING_MODEL_SELECTOR: + // Group embedding models by provider + const groupedEmbeddingModels = embeddingModels.reduce( + (acc, model) => { + const providerName = model.provider?.name || 'Unknown'; + if (!acc[providerName]) acc[providerName] = []; + acc[providerName].push(model); + return acc; + }, + {} as Record, + ); + + return ( + + ); + case DynamicFormItemType.KNOWLEDGE_BASE_SELECTOR: + // Group KBs by RAG engine name + const kbsByEngine = knowledgeBases.reduce( + (acc, kb) => { + const engineName = kb.rag_engine?.name + ? extractI18nObject(kb.rag_engine.name) + : t('knowledge.unknownEngine'); + if (!acc[engineName]) { + acc[engineName] = []; + } + acc[engineName].push(kb); + return acc; + }, + {} as Record, + ); + return ( ); case DynamicFormItemType.KNOWLEDGE_BASE_MULTI_SELECTOR: + // Group KBs by RAG engine name for multi-selector + const multiKbsByEngine = knowledgeBases.reduce( + (acc, kb) => { + const engineName = kb.rag_engine?.name + ? extractI18nObject(kb.rag_engine.name) + : t('knowledge.unknownEngine'); + if (!acc[engineName]) { + acc[engineName] = []; + } + acc[engineName].push(kb); + return acc; + }, + {} as Record, + ); + return ( <>
{field.value && field.value.length > 0 ? (
{field.value.map((kbId: string) => { - const kb = knowledgeBases.find((base) => base.uuid === kbId); - const externalKb = externalKnowledgeBases.find( + const currentKb = knowledgeBases.find( (base) => base.uuid === kbId, ); - const currentKb = kb || externalKb; if (!currentKb) return null; return ( @@ -365,18 +384,15 @@ export default function DynamicFormItemComponent({ className="flex items-center justify-between rounded-lg border p-3 hover:bg-accent" >
- {externalKb && ( - plugin icon - )}
-
{currentKb.name}
+
+ {currentKb.name} + {currentKb.rag_engine?.name && ( + + {extractI18nObject(currentKb.rag_engine.name)} + + )} +
{currentKb.description && (
{currentKb.description} @@ -430,54 +446,12 @@ export default function DynamicFormItemComponent({ {t('knowledge.selectKnowledgeBases')}
- {/* Built-in Knowledge Bases */} - {knowledgeBases.length > 0 && ( -
-
- {t('knowledge.builtIn')} -
- {knowledgeBases.map((base) => { - const isSelected = tempSelectedKBIds.includes( - base.uuid ?? '', - ); - return ( -
{ - const kbId = base.uuid ?? ''; - setTempSelectedKBIds((prev) => - prev.includes(kbId) - ? prev.filter((id) => id !== kbId) - : [...prev, kbId], - ); - }} - > - -
-
{base.name}
- {base.description && ( -
- {base.description} -
- )} -
-
- ); - })} -
- )} - - {/* External Knowledge Bases */} - {externalKnowledgeBases.length > 0 && ( -
+ {Object.entries(multiKbsByEngine).map(([engineName, kbs]) => ( +
- {t('knowledge.external')} + {engineName}
- {externalKnowledgeBases.map((base) => { + {kbs.map((base) => { const isSelected = tempSelectedKBIds.includes( base.uuid ?? '', ); @@ -498,14 +472,6 @@ export default function DynamicFormItemComponent({ checked={isSelected} aria-label={`Select ${base.name}`} /> - plugin icon
{base.name}
{base.description && ( @@ -518,7 +484,7 @@ export default function DynamicFormItemComponent({ ); })}
- )} + ))}
- -
- - )} + +
+ + +
+
@@ -216,33 +213,22 @@ export default function KBDetailDialog({
- {activeMenu === 'metadata' && - (kbType === 'builtin' ? ( - - ) : ( - onOpenChange(false)} - onKBDeleted={() => { - onKbDeleted(); - onOpenChange(false); - }} - onNewKBCreated={onNewKbCreated} - /> - ))} - {activeMenu === 'documents' && kbType === 'builtin' && ( + {activeMenu === 'metadata' && ( + + )} + {activeMenu === 'documents' && hasDocumentCapability() && ( )} - {activeMenu === 'retrieve' && - (kbType === 'builtin' ? ( - - ) : ( - - ))} + {activeMenu === 'retrieve' && ( + + )}
{activeMenu === 'metadata' && ( @@ -254,12 +240,7 @@ export default function KBDetailDialog({ > {t('common.delete')} - - - - - - - {/* Main Form */} -
- -
- {/* KB Name and Emoji in same row */} -
- ( - - - {t('knowledge.kbName')} - * - - - - - - - )} - /> - ( - - {t('common.icon')} - - - - - - )} - /> -
- - {/* KB Description */} - ( - - {t('knowledge.kbDescription')} - - - - - - )} - /> - - {/* Retriever Selector */} - ( - - - {t('knowledge.retriever')} - * - - - - - -

- {t('knowledge.retrieverInstallInfo')}{' '} - - {t('knowledge.retrieverMarketLink')} - -

-
- )} - /> - - {/* Selected Retriever Card */} - {currentRetrieverFullName && ( -
- plugin icon -
-
- {getRetrieverLabel(currentRetrieverFullName)} -
-
- {form.watch('plugin_author')} / {form.watch('plugin_name')} -
-
-
- )} - - {/* Dynamic Retriever Configuration Form */} - {showDynamicForm && dynamicFormConfigList.length > 0 && ( -
-
- {t('knowledge.retrieverConfiguration')} -
- { - form.setValue('retriever_config', values); - }} - /> -
- )} -
-
- -
- ); -} diff --git a/web/src/app/home/knowledge/components/kb-card/KBCard.module.css b/web/src/app/home/knowledge/components/kb-card/KBCard.module.css index df5c9cf9e..aaba9f6af 100644 --- a/web/src/app/home/knowledge/components/kb-card/KBCard.module.css +++ b/web/src/app/home/knowledge/components/kb-card/KBCard.module.css @@ -169,3 +169,18 @@ width: 1.2rem; height: 1.2rem; } + +.engineBadge { + font-size: 0.75rem; + line-height: 1rem; + padding: 0.125rem 0.5rem; + border-radius: 9999px; + background-color: #f3e8ff; + color: #7e22ce; + white-space: nowrap; +} + +:global(.dark) .engineBadge { + background-color: #581c87; + color: #d8b4fe; +} diff --git a/web/src/app/home/knowledge/components/kb-card/KBCard.tsx b/web/src/app/home/knowledge/components/kb-card/KBCard.tsx index 8e4de356f..4e29af46e 100644 --- a/web/src/app/home/knowledge/components/kb-card/KBCard.tsx +++ b/web/src/app/home/knowledge/components/kb-card/KBCard.tsx @@ -4,14 +4,21 @@ import styles from './KBCard.module.css'; export default function KBCard({ kbCardVO }: { kbCardVO: KnowledgeBaseVO }) { const { t } = useTranslation(); + return (
{kbCardVO.emoji || '📚'}
-
- {kbCardVO.name} +
+
+ {kbCardVO.name} +
+ {/* Engine badge */} + + {kbCardVO.getEngineName()} +
{kbCardVO.description} diff --git a/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts b/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts index e7c20ed9f..56808997c 100644 --- a/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts +++ b/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts @@ -1,21 +1,28 @@ +import { RAGEngineInfo } from '@/app/infra/entities/api'; +import { extractI18nObject } from '@/i18n/I18nProvider'; + export interface IKnowledgeBaseVO { id: string; name: string; description: string; - embeddingModelUUID: string; + embeddingModelUUID?: string; top_k: number; lastUpdatedTimeAgo: string; emoji?: string; + ragEngine?: RAGEngineInfo; + ragEnginePluginId?: string; } export class KnowledgeBaseVO implements IKnowledgeBaseVO { id: string; name: string; description: string; - embeddingModelUUID: string; + embeddingModelUUID?: string; top_k: number; lastUpdatedTimeAgo: string; emoji?: string; + ragEngine?: RAGEngineInfo; + ragEnginePluginId?: string; constructor(props: IKnowledgeBaseVO) { this.id = props.id; @@ -25,5 +32,27 @@ export class KnowledgeBaseVO implements IKnowledgeBaseVO { this.top_k = props.top_k; this.lastUpdatedTimeAgo = props.lastUpdatedTimeAgo; this.emoji = props.emoji; + this.ragEngine = props.ragEngine; + this.ragEnginePluginId = props.ragEnginePluginId; + } + + /** + * Check if this KB supports document management + */ + hasDocumentCapability(): boolean { + if (!this.ragEngine) { + return false; + } + return this.ragEngine.capabilities.includes('doc_ingestion'); + } + + /** + * Get display name for the RAG engine + */ + getEngineName(): string { + if (!this.ragEngine) { + return 'Unknown'; + } + return extractI18nObject(this.ragEngine.name); } } diff --git a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx index 8ed045c26..9f6461645 100644 --- a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx +++ b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx @@ -14,18 +14,25 @@ import { FormMessage, FormDescription, } from '@/components/ui/form'; -import { httpClient, systemInfo, userInfo } from '@/app/infra/http'; +import { httpClient } from '@/app/infra/http/HttpClient'; import { Select, SelectContent, - SelectGroup, SelectItem, - SelectLabel, SelectTrigger, SelectValue, } from '@/components/ui/select'; -import { KnowledgeBase, EmbeddingModel } from '@/app/infra/entities/api'; +import { KnowledgeBase, RAGEngine } from '@/app/infra/entities/api'; import { toast } from 'sonner'; +import { extractI18nObject } from '@/i18n/I18nProvider'; +import DynamicFormComponent from '@/app/home/components/dynamic-form/DynamicFormComponent'; +import { IDynamicFormItemSchema } from '@/app/infra/entities/form/dynamic'; +import { + DynamicFormItemConfig, + getDefaultValues, + parseDynamicFormItemType, +} from '@/app/home/components/dynamic-form/DynamicFormItemConfig'; +import { UUID } from 'uuidjs'; const getFormSchema = (t: (key: string) => string) => z.object({ @@ -34,15 +41,41 @@ const getFormSchema = (t: (key: string) => string) => .string() .min(1, { message: t('knowledge.kbDescriptionRequired') }), emoji: z.string().optional(), - embeddingModelUUID: z + ragEngineId: z .string() - .min(1, { message: t('knowledge.embeddingModelUUIDRequired') }), - top_k: z - .number() - .min(1, { message: t('knowledge.topKRequired') }) - .max(30, { message: t('knowledge.topKMax') }), + .min(1, { message: t('knowledge.ragEngineRequired') }), }); +/** + * Parse creation schema from RAG engine to IDynamicFormItemSchema[] + * Same pattern as ExternalKBForm uses for retriever config + */ +function parseCreationSchema( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + schemaItems: any | any[] | undefined, +): IDynamicFormItemSchema[] { + if (!schemaItems) return []; + + // Handle wrapped schema (e.g. { schema: [...] }) which might be returned by the API + const items = Array.isArray(schemaItems) ? schemaItems : schemaItems.schema; + + if (!items || !Array.isArray(items)) return []; + + return items.map( + (item) => + new DynamicFormItemConfig({ + default: item.default, + id: UUID.generate(), + label: item.label, + description: item.description, + name: item.name, + required: item.required, + type: parseDynamicFormItemType(item.type), + options: item.options, + }), + ); +} + export default function KBForm({ initKbId, onNewKbCreated, @@ -53,6 +86,14 @@ export default function KBForm({ onKbUpdated: (kbId: string) => void; }) { const { t } = useTranslation(); + const [ragEngines, setRagEngines] = useState([]); + const [selectedEngineId, setSelectedEngineId] = useState(''); + const [configSettings, setConfigSettings] = useState< + Record + >({}); + const [isEditing, setIsEditing] = useState(false); + const [loading, setLoading] = useState(true); + const formSchema = getFormSchema(t); const form = useForm>({ @@ -61,70 +102,100 @@ export default function KBForm({ name: '', description: t('knowledge.defaultDescription'), emoji: '📚', - embeddingModelUUID: '', - top_k: 5, + ragEngineId: '', }, }); - const [embeddingModels, setEmbeddingModels] = useState([]); + // Get selected engine details + const selectedEngine = ragEngines.find( + (e) => e.plugin_id === selectedEngineId, + ); useEffect(() => { - getEmbeddingModelNameList().then(() => { + loadRagEngines().then(() => { if (initKbId) { - getKbConfig(initKbId).then((val) => { - form.setValue('name', val.name); - form.setValue('description', val.description); - form.setValue('emoji', val.emoji); - form.setValue('embeddingModelUUID', val.embeddingModelUUID); - form.setValue('top_k', val.top_k || 5); - }); + loadKbConfig(initKbId); } }); }, []); - const getKbConfig = async ( - kbId: string, - ): Promise> => { - return new Promise((resolve) => { - httpClient.getKnowledgeBase(kbId).then((res) => { - resolve({ - name: res.base.name, - description: res.base.description, - emoji: res.base.emoji || '📚', - embeddingModelUUID: res.base.embedding_model_uuid, - top_k: res.base.top_k || 5, - }); - }); - }); + // Auto-select first engine when engines are loaded and no selection + useEffect(() => { + if (ragEngines.length > 0 && !selectedEngineId && !isEditing) { + const firstEngine = ragEngines[0]; + setSelectedEngineId(firstEngine.plugin_id); + form.setValue('ragEngineId', firstEngine.plugin_id); + // Initialize config settings with defaults + const formItems = parseCreationSchema(firstEngine.creation_schema); + if (formItems.length > 0) { + setConfigSettings(getDefaultValues(formItems)); + } + } + }, [ragEngines, selectedEngineId, isEditing]); + + const loadRagEngines = async () => { + setLoading(true); + try { + const resp = await httpClient.getRagEngines(); + setRagEngines(resp.engines); + } catch (err) { + console.error('Failed to load RAG engines:', err); + } finally { + setLoading(false); + } + }; + + const loadKbConfig = async (kbId: string) => { + try { + setIsEditing(true); + + const res = await httpClient.getKnowledgeBase(kbId); + const kb = res.base; + + const engineId = kb.rag_engine_plugin_id || ''; + setSelectedEngineId(engineId); + + form.setValue('name', kb.name); + form.setValue('description', kb.description); + form.setValue('emoji', kb.emoji || '📚'); + form.setValue('ragEngineId', engineId); + + setConfigSettings(kb.creation_settings || {}); + } catch (err) { + console.error('Failed to load KB config:', err); + } }; - const getEmbeddingModelNameList = async () => { - const resp = await httpClient.getProviderEmbeddingModels(); - let models = resp.models; - // Filter out space-chat-completions models when not logged in with space account or when models service is disabled - if ( - systemInfo.disable_models_service || - userInfo?.account_type !== 'space' - ) { - models = models.filter( - (m) => m.provider?.requester !== 'space-chat-completions', - ); + const handleEngineChange = (engineId: string) => { + setSelectedEngineId(engineId); + form.setValue('ragEngineId', engineId); + + // Find engine and initialize config settings with defaults from schema + const engine = ragEngines.find((e) => e.plugin_id === engineId); + if (engine) { + const formItems = parseCreationSchema(engine.creation_schema); + if (formItems.length > 0) { + setConfigSettings(getDefaultValues(formItems)); + } else { + setConfigSettings({}); + } } - setEmbeddingModels(models); }; const onSubmit = (data: z.infer) => { + const kbData: KnowledgeBase = { + name: data.name, + description: data.description, + emoji: data.emoji, + rag_engine_plugin_id: selectedEngineId, + creation_settings: configSettings, + top_k: 5, + }; + if (initKbId) { - // update knowledge base - const updateKb: KnowledgeBase = { - name: data.name, - description: data.description, - emoji: data.emoji, - embedding_model_uuid: data.embeddingModelUUID, - top_k: data.top_k, - }; + // Update knowledge base httpClient - .updateKnowledgeBase(initKbId, updateKb) + .updateKnowledgeBase(initKbId, kbData) .then((res) => { onKbUpdated(res.uuid); toast.success(t('knowledge.updateKnowledgeBaseSuccess')); @@ -134,25 +205,43 @@ export default function KBForm({ toast.error(t('knowledge.updateKnowledgeBaseFailed')); }); } else { - // create knowledge base - const newKb: KnowledgeBase = { - name: data.name, - description: data.description, - emoji: data.emoji, - embedding_model_uuid: data.embeddingModelUUID, - top_k: data.top_k, - }; + // Create knowledge base httpClient - .createKnowledgeBase(newKb) + .createKnowledgeBase(kbData) .then((res) => { onNewKbCreated(res.uuid); }) .catch((err) => { console.error('create knowledge base failed', err); + toast.error(t('knowledge.createKnowledgeBaseFailed')); }); } }; + // Convert creation schema to dynamic form items (same as ExternalKBForm) + const configFormItems = parseCreationSchema(selectedEngine?.creation_schema); + + // Show loading state + if (loading) { + return ( +
+

{t('common.loading')}

+
+ ); + } + + // Show message if no engines available + if (ragEngines.length === 0) { + return ( +
+

{t('knowledge.noEnginesAvailable')}

+

+ {t('knowledge.installEngineHint')} +

+
+ ); + } + return ( <>
@@ -162,6 +251,57 @@ export default function KBForm({ className="space-y-8" >
+ {/* RAG Engine Selector */} + ( + + + {t('knowledge.ragEngine')} + * + + + + + {selectedEngine?.description && ( + + {extractI18nObject(selectedEngine.description)} + + )} + {isEditing && ( + + {t('knowledge.cannotChangeRagEngine')} + + )} + + + )} + /> + {/* Name and Emoji in same row */}
+ + {/* Description */} )} /> - ( - - - {t('knowledge.embeddingModelUUID')} - * - - -
- -
-
- - {initKbId - ? t('knowledge.cannotChangeEmbeddingModel') - : t('knowledge.embeddingModelDescription')} - - -
- )} - /> - ( - - - {t('knowledge.topK')} - * - - - field.onChange(Number(e.target.value))} - className="w-[180px] h-10 text-base appearance-none" - /> - - - {t('knowledge.topKdescription')} - - - - )} - /> + + {/* Engine specific fields (dynamic form from creation_schema) */} + {configFormItems.length > 0 && ( +
+
+ {t('knowledge.engineSettings')} + {isEditing && ( + + ({t('knowledge.engineSettingsReadonly')}) + + )} +
+
+ } + onSubmit={(val) => + setConfigSettings(val as Record) + } + /> +
+
+ )}
diff --git a/web/src/app/home/knowledge/components/kb-retrieve/ExternalKBRetrieve.tsx b/web/src/app/home/knowledge/components/kb-retrieve/ExternalKBRetrieve.tsx deleted file mode 100644 index c45145d7c..000000000 --- a/web/src/app/home/knowledge/components/kb-retrieve/ExternalKBRetrieve.tsx +++ /dev/null @@ -1,35 +0,0 @@ -'use client'; - -import React from 'react'; -import { httpClient } from '@/app/infra/http/HttpClient'; -import { RetrieveResult } from '@/app/infra/entities/api'; -import KBRetrieveGeneric from './KBRetrieveGeneric'; - -interface ExternalKBRetrieveProps { - kbId: string; -} - -/** - * External knowledge base retrieve component - * Uses the generic retrieve component with external KB API - */ -export default function ExternalKBRetrieve({ kbId }: ExternalKBRetrieveProps) { - const getResultTitle = (result: RetrieveResult): string => { - // For external KB, try to get document_name or use a generic title - return ( - (result.metadata.document_name as string) || - (result.metadata.source as string) || - result.id - ); - }; - - return ( - - ); -} diff --git a/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieve.tsx b/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieve.tsx deleted file mode 100644 index ef831703f..000000000 --- a/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieve.tsx +++ /dev/null @@ -1,124 +0,0 @@ -'use client'; - -import React, { useState, useEffect } from 'react'; -import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; -import { Button } from '@/components/ui/button'; -import { Input } from '@/components/ui/input'; -import { useTranslation } from 'react-i18next'; -import { httpClient } from '@/app/infra/http/HttpClient'; -import { RetrieveResult, KnowledgeBaseFile } from '@/app/infra/entities/api'; -import { toast } from 'sonner'; - -interface KBRetrieveProps { - kbId: string; -} - -export default function KBRetrieve({ kbId }: KBRetrieveProps) { - const { t } = useTranslation(); - const [query, setQuery] = useState(''); - const [results, setResults] = useState([]); - const [files, setFiles] = useState([]); - const [loading, setLoading] = useState(false); - - useEffect(() => { - const loadFiles = async () => { - try { - const response = await httpClient.getKnowledgeBaseFiles(kbId); - setFiles(response.files); - } catch (error) { - console.error('Failed to load files:', error); - } - }; - loadFiles(); - }, [kbId]); - - const handleRetrieve = async () => { - if (!query.trim()) return; - - setLoading(true); - try { - setResults([]); - const response = await httpClient.retrieveKnowledgeBase(kbId, query); - setResults(response.results); - } catch (error) { - console.error('Retrieve failed:', error); - toast.error(t('knowledge.retrieveError')); - } finally { - setLoading(false); - } - }; - - const getFileName = (fileId?: string) => { - if (!fileId) return ''; - const file = files.find((f) => f.uuid === fileId); - return file?.file_name || fileId; - }; - - /** - * Extract text content from the content array - * The content array may contain multiple items with type 'text' - */ - const extractTextFromContent = (result: RetrieveResult): string => { - // First try to get content from the new format - if (result.content && Array.isArray(result.content)) { - const textParts = result.content - .filter((item) => item.type === 'text' && item.text) - .map((item) => item.text); - - if (textParts.length > 0) { - return textParts.join('\n\n'); - } - } - - // Fallback to metadata.text for backward compatibility - if (result.metadata?.text) { - return result.metadata.text as string; - } - - return ''; - }; - - return ( -
-
- setQuery(e.target.value)} - placeholder={t('knowledge.queryPlaceholder')} - onKeyPress={(e) => e.key === 'Enter' && handleRetrieve()} - /> - -
- -
- {results.length === 0 && !loading && ( -

{t('knowledge.noResults')}

- )} - - {loading ? ( -

{t('common.loading')}

- ) : ( - results.map((result) => ( - - - - {getFileName(result.metadata.file_id)} - - {t('knowledge.distance')}: {result.distance.toFixed(4)} - - - - -

- {extractTextFromContent(result)} -

-
-
- )) - )} -
-
- ); -} diff --git a/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieveGeneric.tsx b/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieveGeneric.tsx index 5f52569b0..ce660b9c4 100644 --- a/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieveGeneric.tsx +++ b/web/src/app/home/knowledge/components/kb-retrieve/KBRetrieveGeneric.tsx @@ -106,7 +106,7 @@ export default function KBRetrieveGeneric({ {getTitle(result)} - {t('knowledge.distance')}: {result.distance.toFixed(4)} + {t('knowledge.distance')}: {(result.distance ?? 0).toFixed(4)} diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx index 26a976d9a..c4118a8e5 100644 --- a/web/src/app/home/knowledge/page.tsx +++ b/web/src/app/home/knowledge/page.tsx @@ -5,139 +5,66 @@ import styles from './knowledgeBase.module.css'; import { useTranslation } from 'react-i18next'; import { useEffect, useState } from 'react'; import { KnowledgeBaseVO } from '@/app/home/knowledge/components/kb-card/KBCardVO'; -import { ExternalKBCardVO } from '@/app/home/knowledge/components/external-kb-card/ExternalKBCardVO'; import KBCard from '@/app/home/knowledge/components/kb-card/KBCard'; -import ExternalKBCard from '@/app/home/knowledge/components/external-kb-card/ExternalKBCard'; import KBDetailDialog from '@/app/home/knowledge/KBDetailDialog'; import { httpClient } from '@/app/infra/http/HttpClient'; -import { - KnowledgeBase, - ExternalKnowledgeBase, - ApiRespPluginSystemStatus, -} from '@/app/infra/entities/api'; -import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; +import { KnowledgeBase } from '@/app/infra/entities/api'; export default function KnowledgePage() { const { t } = useTranslation(); - const [activeTab, setActiveTab] = useState('builtin'); const [knowledgeBaseList, setKnowledgeBaseList] = useState( [], ); - const [externalKBList, setExternalKBList] = useState([]); const [selectedKbId, setSelectedKbId] = useState(''); - const [selectedKbType, setSelectedKbType] = useState<'builtin' | 'external'>( - 'builtin', - ); const [detailDialogOpen, setDetailDialogOpen] = useState(false); - const [pluginSystemStatus, setPluginSystemStatus] = - useState(null); useEffect(() => { getKnowledgeBaseList(); - getExternalKBList(); - fetchPluginSystemStatus(); }, []); - async function fetchPluginSystemStatus() { - try { - const status = await httpClient.getPluginSystemStatus(); - setPluginSystemStatus(status); - } catch (error) { - console.error('Failed to fetch plugin system status:', error); - } - } - async function getKnowledgeBaseList() { const resp = await httpClient.getKnowledgeBases(); - setKnowledgeBaseList( - resp.bases.map((kb: KnowledgeBase) => { - const currentTime = new Date(); - const lastUpdatedTimeAgo = Math.floor( - (currentTime.getTime() - - new Date(kb.updated_at ?? currentTime.getTime()).getTime()) / - 1000 / - 60 / - 60 / - 24, - ); - - const lastUpdatedTimeAgoText = - lastUpdatedTimeAgo > 0 - ? ` ${lastUpdatedTimeAgo} ${t('knowledge.daysAgo')}` - : t('knowledge.today'); - - return new KnowledgeBaseVO({ - id: kb.uuid || '', - name: kb.name, - description: kb.description, - emoji: kb.emoji, - embeddingModelUUID: kb.embedding_model_uuid, - top_k: kb.top_k ?? 5, - lastUpdatedTimeAgo: lastUpdatedTimeAgoText, - }); - }), - ); - } - async function getExternalKBList() { - try { - const resp = await httpClient.getExternalKnowledgeBases(); - setExternalKBList( - resp.bases.map((kb: ExternalKnowledgeBase) => { - const currentTime = new Date(); - const lastUpdatedTimeAgo = Math.floor( - (currentTime.getTime() - - new Date(kb.created_at ?? currentTime.getTime()).getTime()) / - 1000 / - 60 / - 60 / - 24, - ); + const currentTime = new Date(); - const lastUpdatedTimeAgoText = - lastUpdatedTimeAgo > 0 - ? ` ${lastUpdatedTimeAgo} ${t('knowledge.daysAgo')}` - : t('knowledge.today'); - - return new ExternalKBCardVO({ - id: kb.uuid || '', - name: kb.name, - description: kb.description, - emoji: kb.emoji, - retrieverName: `${kb.plugin_author}/${kb.plugin_name}/${kb.retriever_name}`, - retrieverConfig: kb.retriever_config || {}, - lastUpdatedTimeAgo: lastUpdatedTimeAgoText, - pluginAuthor: kb.plugin_author, - pluginName: kb.plugin_name, - }); - }), + const kbs = resp.bases.map((kb: KnowledgeBase) => { + const lastUpdatedTimeAgo = Math.floor( + (currentTime.getTime() - + new Date(kb.updated_at ?? currentTime.getTime()).getTime()) / + 1000 / + 60 / + 60 / + 24, ); - } catch (error) { - console.error('Failed to load external knowledge bases:', error); - } + + const lastUpdatedTimeAgoText = + lastUpdatedTimeAgo > 0 + ? ` ${lastUpdatedTimeAgo} ${t('knowledge.daysAgo')}` + : t('knowledge.today'); + + return new KnowledgeBaseVO({ + id: kb.uuid || '', + name: kb.name, + description: kb.description, + emoji: kb.emoji, + embeddingModelUUID: kb.embedding_model_uuid, + top_k: kb.top_k ?? 5, + lastUpdatedTimeAgo: lastUpdatedTimeAgoText, + ragEngine: kb.rag_engine, + ragEnginePluginId: kb.rag_engine_plugin_id, + }); + }); + + setKnowledgeBaseList(kbs); } const handleKBCardClick = (kbId: string) => { setSelectedKbId(kbId); - setSelectedKbType('builtin'); setDetailDialogOpen(true); }; const handleCreateKBClick = () => { setSelectedKbId(''); - setSelectedKbType('builtin'); - setDetailDialogOpen(true); - }; - - const handleExternalKBCardClick = (kbId: string) => { - setSelectedKbId(kbId); - setSelectedKbType('external'); - setDetailDialogOpen(true); - }; - - const handleCreateExternalKB = () => { - setSelectedKbId(''); - setSelectedKbType('external'); setDetailDialogOpen(true); }; @@ -146,30 +73,18 @@ export default function KnowledgePage() { }; const handleKbDeleted = () => { - if (selectedKbType === 'builtin') { - getKnowledgeBaseList(); - } else { - getExternalKBList(); - } + getKnowledgeBaseList(); setDetailDialogOpen(false); }; const handleNewKbCreated = (newKbId: string) => { - if (selectedKbType === 'builtin') { - getKnowledgeBaseList(); - } else { - getExternalKBList(); - } + getKnowledgeBaseList(); setSelectedKbId(newKbId); setDetailDialogOpen(true); }; const handleKbUpdated = () => { - if (selectedKbType === 'builtin') { - getKnowledgeBaseList(); - } else { - getExternalKBList(); - } + getKnowledgeBaseList(); }; return ( @@ -178,73 +93,28 @@ export default function KnowledgePage() { open={detailDialogOpen} onOpenChange={setDetailDialogOpen} kbId={selectedKbId || undefined} - kbType={selectedKbType} onFormCancel={handleFormCancel} onKbDeleted={handleKbDeleted} onNewKbCreated={handleNewKbCreated} onKbUpdated={handleKbUpdated} /> - -
- - - {t('knowledge.builtIn')} - - {/* Only show external tab if plugin system is enabled and connected */} - {pluginSystemStatus?.is_enable && - pluginSystemStatus?.is_connected && ( - - {t('knowledge.external')} - - )} - -
- - -
- - - {knowledgeBaseList.map((kb) => { - return ( -
handleKBCardClick(kb.id)}> - -
- ); - })} -
-
- - -
- - - {externalKBList.map((kb) => { - return ( -
handleExternalKBCardClick(kb.id)} - > - -
- ); - })} -
-
-
+
+ + + {knowledgeBaseList.map((kb) => { + return ( +
handleKBCardClick(kb.id)}> + +
+ ); + })} +
); } diff --git a/web/src/app/home/plugins/components/plugin-installed/PluginComponentList.tsx b/web/src/app/home/plugins/components/plugin-installed/PluginComponentList.tsx index 23ddfaa29..24f2fe480 100644 --- a/web/src/app/home/plugins/components/plugin-installed/PluginComponentList.tsx +++ b/web/src/app/home/plugins/components/plugin-installed/PluginComponentList.tsx @@ -32,45 +32,39 @@ export default function PluginComponentList({ {componentKindList.length > 0 && ( <> {componentKindList.map((kind) => { - return ( - <> - {useBadge && ( - - {kindIconMap[kind]} - {/* 响应式显示组件名称:在中等屏幕以上显示 */} - {responsive ? ( - - {t('plugins.componentName.' + kind)} - - ) : ( - showComponentName && t('plugins.componentName.' + kind) - )} - {components[kind]} - + return useBadge ? ( + + {kindIconMap[kind]} + {/* 响应式显示组件名称:在中等屏幕以上显示 */} + {responsive ? ( + + {t('plugins.componentName.' + kind)} + + ) : ( + showComponentName && t('plugins.componentName.' + kind) )} - - {!useBadge && ( -
- {kindIconMap[kind]} - {/* 响应式显示组件名称:在中等屏幕以上显示 */} - {responsive ? ( - - {t('plugins.componentName.' + kind)} - - ) : ( - showComponentName && t('plugins.componentName.' + kind) - )} - {components[kind]} -
+ {components[kind]} +
+ ) : ( +
+ {kindIconMap[kind]} + {/* 响应式显示组件名称:在中等屏幕以上显示 */} + {responsive ? ( + + {t('plugins.componentName.' + kind)} + + ) : ( + showComponentName && t('plugins.componentName.' + kind) )} - + {components[kind]} +
); })} diff --git a/web/src/app/infra/entities/api/index.ts b/web/src/app/infra/entities/api/index.ts index 50bc13e86..f3e66007f 100644 --- a/web/src/app/infra/entities/api/index.ts +++ b/web/src/app/infra/entities/api/index.ts @@ -70,17 +70,6 @@ export interface LLMModel { extra_args?: object; } -export interface KnowledgeBase { - uuid?: string; - name: string; - description: string; - embedding_model_uuid: string; - created_at?: string; - updated_at?: string; - top_k: number; - emoji?: string; -} - export interface ApiRespProviderEmbeddingModels { models: EmbeddingModel[]; } @@ -166,31 +155,37 @@ export interface KnowledgeBase { uuid?: string; name: string; description: string; - embedding_model_uuid: string; + embedding_model_uuid?: string; // Optional - can be in creation_settings instead top_k: number; created_at?: string; updated_at?: string; emoji?: string; + // New unified fields + rag_engine_plugin_id?: string; + creation_settings?: Record; + rag_engine?: RAGEngineInfo; } -export interface ExternalKnowledgeBase { - uuid?: string; - name: string; - description: string; - created_at?: string; - plugin_author: string; - plugin_name: string; - retriever_name: string; - retriever_config?: Record; - emoji?: string; +// RAG Engine types +export interface RAGEngineInfo { + plugin_id: string | null; + name: I18nObject; + capabilities: string[]; } -export interface ApiRespExternalKnowledgeBases { - bases: ExternalKnowledgeBase[]; +export interface RAGEngine { + plugin_id: string; + name: I18nObject; + description?: I18nObject; + capabilities: string[]; + // Schema format: Array of form field definitions (IDynamicFormItemSchema-like) + // Each item: { name, label, type, required, default, description?, options? } + creation_schema?: unknown[]; + retrieval_schema?: unknown[]; } -export interface ApiRespExternalKnowledgeBase { - base: ExternalKnowledgeBase; +export interface ApiRespRAGEngines { + engines: RAGEngine[]; } export interface ApiRespKnowledgeBaseFiles { diff --git a/web/src/app/infra/entities/form/dynamic.ts b/web/src/app/infra/entities/form/dynamic.ts index d48801436..6422f1a4b 100644 --- a/web/src/app/infra/entities/form/dynamic.ts +++ b/web/src/app/infra/entities/form/dynamic.ts @@ -26,6 +26,7 @@ export enum DynamicFormItemType { FILE_ARRAY = 'array[file]', SELECT = 'select', LLM_MODEL_SELECTOR = 'llm-model-selector', + EMBEDDING_MODEL_SELECTOR = 'embedding-model-selector', PROMPT_EDITOR = 'prompt-editor', UNKNOWN = 'unknown', KNOWLEDGE_BASE_SELECTOR = 'knowledge-base-selector', diff --git a/web/src/app/infra/http/BackendClient.ts b/web/src/app/infra/http/BackendClient.ts index bb913bd0a..25db4a8b2 100644 --- a/web/src/app/infra/http/BackendClient.ts +++ b/web/src/app/infra/http/BackendClient.ts @@ -35,12 +35,10 @@ import { ApiRespMCPServers, ApiRespMCPServer, MCPServer, - ExternalKnowledgeBase, - ApiRespExternalKnowledgeBases, - ApiRespExternalKnowledgeBase, ApiRespModelProviders, ApiRespModelProvider, ModelProvider, + ApiRespRAGEngines, } from '@/app/infra/entities/api'; import { Plugin } from '@/app/infra/entities/plugin'; import { GetBotLogsRequest } from '@/app/infra/http/requestParam/bots/GetBotLogsRequest'; @@ -403,49 +401,17 @@ export class BackendClient extends BaseHttpClient { public retrieveKnowledgeBase( uuid: string, query: string, + retrievalSettings?: Record, ): Promise { - return this.post(`/api/v1/knowledge/bases/${uuid}/retrieve`, { query }); - } - - // ============ External Knowledge Base API ============ - public getExternalKnowledgeBases(): Promise { - return this.get('/api/v1/knowledge/external-bases'); - } - - public getExternalKnowledgeBase( - uuid: string, - ): Promise { - return this.get(`/api/v1/knowledge/external-bases/${uuid}`); - } - - public createExternalKnowledgeBase( - base: ExternalKnowledgeBase, - ): Promise<{ uuid: string }> { - return this.post('/api/v1/knowledge/external-bases', base); - } - - public updateExternalKnowledgeBase( - uuid: string, - base: ExternalKnowledgeBase, - ): Promise<{ uuid: string }> { - return this.put(`/api/v1/knowledge/external-bases/${uuid}`, base); - } - - public deleteExternalKnowledgeBase(uuid: string): Promise { - return this.delete(`/api/v1/knowledge/external-bases/${uuid}`); - } - - public retrieveExternalKnowledgeBase( - uuid: string, - query: string, - ): Promise { - return this.post(`/api/v1/knowledge/external-bases/${uuid}/retrieve`, { + return this.post(`/api/v1/knowledge/bases/${uuid}/retrieve`, { query, + retrieval_settings: retrievalSettings ?? {}, }); } - public listKnowledgeRetrievers(): Promise<{ retrievers: unknown[] }> { - return this.get('/api/v1/knowledge/external-bases/retrievers'); + // ============ RAG Engines API ============ + public getRagEngines(): Promise { + return this.get('/api/v1/knowledge/engines'); } // ============ Plugins API ============ diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index e988ad315..fc6fbb270 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -718,8 +718,19 @@ const enUS = { fileName: 'File Name', noResults: 'No results', retrieveError: 'Retrieve failed', - builtIn: 'Built-in', - external: 'External', + unknownEngine: 'Unknown Engine', + ragEngine: 'RAG Engine', + ragEngineRequired: 'RAG engine is required', + selectRagEngine: 'Select RAG Engine', + builtInEngine: 'Built-in Engine', + cannotChangeRagEngine: 'RAG engine cannot be changed after creation', + engineSettings: 'Engine Settings', + engineSettingsReadonly: 'read-only in edit mode', + noEnginesAvailable: 'No knowledge base engines available', + installEngineHint: 'Please install a knowledge base plugin first', + createKnowledgeBaseFailed: 'Failed to create knowledge base', + embeddingModel: 'Embedding Model', + embeddingModelRequired: 'Embedding model is required for this engine', addExternal: 'Add External Knowledge Base', createExternalSuccess: 'External knowledge base created successfully', updateExternalSuccess: 'External knowledge base updated successfully', diff --git a/web/src/i18n/locales/ja-JP.ts b/web/src/i18n/locales/ja-JP.ts index 64b63cca1..39c63b009 100644 --- a/web/src/i18n/locales/ja-JP.ts +++ b/web/src/i18n/locales/ja-JP.ts @@ -721,8 +721,7 @@ const jaJP = { fileName: 'ファイル名', noResults: '検索結果がありません', retrieveError: '検索に失敗しました', - builtIn: '内蔵', - external: '外部ナレッジベース', + unknownEngine: '不明なエンジン', addExternal: '外部ナレッジベースを追加', createExternalSuccess: '外部ナレッジベースが正常に作成されました', updateExternalSuccess: '外部ナレッジベースが正常に更新されました', diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 141f073d5..2c9ab6d78 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -688,8 +688,19 @@ const zhHans = { fileName: '文件名', noResults: '暂无结果', retrieveError: '检索失败', - builtIn: '内置', - external: '外部知识库', + unknownEngine: '未知引擎', + ragEngine: 'RAG 引擎', + ragEngineRequired: 'RAG 引擎不能为空', + selectRagEngine: '选择 RAG 引擎', + builtInEngine: '内置引擎', + cannotChangeRagEngine: '知识库创建后不可修改 RAG 引擎', + engineSettings: '引擎设置', + engineSettingsReadonly: '编辑模式下不可修改', + noEnginesAvailable: '没有可用的知识库引擎', + installEngineHint: '请先安装知识库插件', + createKnowledgeBaseFailed: '知识库创建失败', + embeddingModel: '嵌入模型', + embeddingModelRequired: '此引擎需要选择嵌入模型', addExternal: '添加外部知识库', createExternalSuccess: '外部知识库创建成功', updateExternalSuccess: '外部知识库更新成功', diff --git a/web/src/i18n/locales/zh-Hant.ts b/web/src/i18n/locales/zh-Hant.ts index 6f9266c3c..f7ab4beb1 100644 --- a/web/src/i18n/locales/zh-Hant.ts +++ b/web/src/i18n/locales/zh-Hant.ts @@ -681,8 +681,7 @@ const zhHant = { fileName: '文檔名稱', noResults: '暫無結果', retrieveError: '檢索失敗', - builtIn: '內置', - external: '外部知識庫', + unknownEngine: '未知引擎', addExternal: '添加外部知識庫', createExternalSuccess: '外部知識庫創建成功', updateExternalSuccess: '外部知識庫更新成功',