-
Notifications
You must be signed in to change notification settings - Fork 480
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Integrated Vectorization] Changes for Admin Configuration page (#798)
* downloading the contents of url & uploading blob * changes for downloading url content & uploading to blob * Adding url as the metadata to the blob * PR only for adding environment var * removing chnages unrelated to env variable * Grouping integrated vec. with azure search * upadting description to match the usage * grouping search variables * Spike for resource creation * Failing build due to poetry lock file * pull resources creation * Pull resource creation refactoring * updating index field names * fixing tests for when using IV * changes for rbac IV * role for storage & tests * unit tests for helpers * unit tests for helper classes * code review changes * tests update * Explore Data changes * decoupled explore & delete pages * showing only the chunk number to match push model * Deleting unused files * Deleting unused files * unused code * unit tests for explore & delete * Code review changes * Chat changes for IV * missed mapping * Question Answer tool changes * module load issue * mapping chunk id as chunk number * sync main * test fix * code review comments * method name change * Admin config changes * Refactoring Document Processor * unit test fix * code review changes * updating configHelper path * test fix * test fix * unit test fix * code review comments * unit test fix * config type * loading env variables inAdmin.py * type declaration for env_helper * moving env helper below app insghts * unused variable
- Loading branch information
Showing
34 changed files
with
453 additions
and
285 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 0 additions & 88 deletions
88
code/backend/batch/utilities/helpers/DocumentProcessorHelper.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
code/backend/batch/utilities/helpers/config/EmbeddingConfig.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from ..DocumentLoadingHelper import LoadingSettings | ||
from ..DocumentChunkingHelper import ChunkingSettings | ||
|
||
|
||
class EmbeddingConfig(ChunkingSettings, LoadingSettings): | ||
def __init__( | ||
self, | ||
document_type: str, | ||
chunking: ChunkingSettings | None, | ||
loading: LoadingSettings | None, | ||
use_advanced_image_processing: bool, | ||
): | ||
self.document_type = document_type | ||
self.chunking = chunking | ||
self.loading = loading | ||
self.use_advanced_image_processing = use_advanced_image_processing | ||
|
||
def __eq__(self, other): | ||
if isinstance(self, other.__class__): | ||
return ( | ||
self.document_type == other.document_type | ||
and self.chunking == other.chunking | ||
and self.loading == other.loading | ||
and self.use_advanced_image_processing | ||
== other.use_advanced_image_processing | ||
) | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
code/backend/batch/utilities/helpers/embedders/EmbedderBase.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
class EmbedderBase(ABC): | ||
@abstractmethod | ||
def embed_file(self, source_url: str, file_name: str): | ||
pass |
15 changes: 15 additions & 0 deletions
15
code/backend/batch/utilities/helpers/embedders/EmbedderFactory.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from ..EnvHelper import EnvHelper | ||
from ..AzureBlobStorageClient import AzureBlobStorageClient | ||
from .PushEmbedder import PushEmbedder | ||
from .IntegratedVectorizationEmbedder import ( | ||
IntegratedVectorizationEmbedder, | ||
) | ||
|
||
|
||
class EmbedderFactory: | ||
@staticmethod | ||
def create(env_helper: EnvHelper): | ||
if env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION: | ||
return IntegratedVectorizationEmbedder(env_helper) | ||
else: | ||
return PushEmbedder(AzureBlobStorageClient()) |
41 changes: 41 additions & 0 deletions
41
code/backend/batch/utilities/helpers/embedders/IntegratedVectorizationEmbedder.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from .EmbedderBase import EmbedderBase | ||
from ..EnvHelper import EnvHelper | ||
from ..LLMHelper import LLMHelper | ||
from ...integrated_vectorization.AzureSearchIndex import AzureSearchIndex | ||
from ...integrated_vectorization.AzureSearchIndexer import AzureSearchIndexer | ||
from ...integrated_vectorization.AzureSearchDatasource import AzureSearchDatasource | ||
from ...integrated_vectorization.AzureSearchSkillset import AzureSearchSkillset | ||
from ..config.ConfigHelper import ConfigHelper | ||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class IntegratedVectorizationEmbedder(EmbedderBase): | ||
def __init__(self, env_helper: EnvHelper): | ||
self.env_helper = env_helper | ||
self.llm_helper: LLMHelper = LLMHelper() | ||
|
||
def embed_file(self, source_url: str, file_name: str): | ||
self.process_using_integrated_vectorization(source_url=source_url) | ||
|
||
def process_using_integrated_vectorization(self, source_url: str): | ||
config = ConfigHelper.get_active_config_or_default() | ||
try: | ||
search_datasource = AzureSearchDatasource(self.env_helper) | ||
search_datasource.create_or_update_datasource() | ||
search_index = AzureSearchIndex(self.env_helper, self.llm_helper) | ||
search_index.create_or_update_index() | ||
search_skillset = AzureSearchSkillset( | ||
self.env_helper, config.integrated_vectorization_config | ||
) | ||
search_skillset_result = search_skillset.create_skillset() | ||
search_indexer = AzureSearchIndexer(self.env_helper) | ||
indexer_result = search_indexer.create_or_update_indexer( | ||
self.env_helper.AZURE_SEARCH_INDEXER_NAME, | ||
skillset_name=search_skillset_result.name, | ||
) | ||
return indexer_result | ||
except Exception as e: | ||
logger.error(f"Error processing {source_url}: {e}") | ||
raise e |
Oops, something went wrong.