From 05970157f634193b073c8e141bdf19f5fc854a8b Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 3 Oct 2024 23:06:47 -0700 Subject: [PATCH] refac --- backend/open_webui/apps/retrieval/main.py | 11 +++++++---- backend/open_webui/apps/retrieval/utils.py | 15 ++++++++++----- .../apps/retrieval/vector/dbs/chroma.py | 11 +++++++---- .../open_webui/apps/webui/routers/knowledge.py | 9 ++++++++- backend/open_webui/constants.py | 4 +++- .../workspace/Knowledge/Collection.svelte | 5 +++-- 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index c9ba33211..cd27b5530 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -643,13 +643,16 @@ def save_docs_to_vector_db( # Check if entries with the same hash (metadata.hash) already exist if metadata and "hash" in metadata: - existing_docs = VECTOR_DB_CLIENT.query( + result = VECTOR_DB_CLIENT.query( collection_name=collection_name, filter={"hash": metadata["hash"]}, ) - if existing_docs: - log.info(f"Document with hash {metadata['hash']} already exists") - raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT) + + if result: + existing_doc_ids = result.ids[0] + if existing_doc_ids: + log.info(f"Document with hash {metadata['hash']} already exists") + raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT) if split: text_splitter = RecursiveCharacterTextSplitter( diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index c671b03b4..f8c9ded5b 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -325,11 +325,16 @@ def get_rag_context( else: context = None - collection_names = ( - file["collection_names"] - if file["type"] == "collection" - else [file["collection_name"]] if file["collection_name"] else [] - ) + collection_names = [] + if file.get("type") == "collection": + if file.get("legacy"): + collection_names = file.get("collection_names", []) + else: + collection_names.append(file["id"]) + elif file.get("collection_name"): + collection_names.append(file["collection_name"]) + elif file.get("id"): + collection_names.append(f"file-{file['id']}") collection_names = set(collection_names).difference(extracted_collections) if not collection_names: diff --git a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py index 4a85c1251..00b4af441 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py @@ -70,7 +70,7 @@ class ChromaClient: return None def query( - self, collection_name: str, filter: dict, limit: int = 1 + self, collection_name: str, filter: dict, limit: int = 2 ) -> Optional[GetResult]: # Query the items from the collection based on the filter. @@ -82,15 +82,18 @@ class ChromaClient: limit=limit, ) + print(result) + return GetResult( **{ - "ids": result["ids"], - "documents": result["documents"], - "metadatas": result["metadatas"], + "ids": [result["ids"]], + "documents": [result["documents"]], + "metadatas": [result["metadatas"]], } ) return None except Exception as e: + print(e) return None def get(self, collection_name: str) -> Optional[GetResult]: diff --git a/backend/open_webui/apps/webui/routers/knowledge.py b/backend/open_webui/apps/webui/routers/knowledge.py index c55d84d22..1fb23b40b 100644 --- a/backend/open_webui/apps/webui/routers/knowledge.py +++ b/backend/open_webui/apps/webui/routers/knowledge.py @@ -152,7 +152,13 @@ def add_file_to_knowledge_by_id( ) # Add content to the vector database - process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id)) + try: + process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id)) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) if knowledge: data = knowledge.data or {} @@ -263,5 +269,6 @@ def remove_file_from_knowledge_by_id( @router.delete("/{id}/delete", response_model=bool) async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)): + VECTOR_DB_CLIENT.delete_collection(collection_name=id) result = Knowledges.delete_knowledge_by_id(id=id) return result diff --git a/backend/open_webui/constants.py b/backend/open_webui/constants.py index 0326ae96e..37461402b 100644 --- a/backend/open_webui/constants.py +++ b/backend/open_webui/constants.py @@ -94,7 +94,9 @@ class ERROR_MESSAGES(str, Enum): lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}." ) - DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding." + DUPLICATE_CONTENT = ( + "Duplicate content detected. Please provide unique content to proceed." + ) FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding." diff --git a/src/lib/components/workspace/Knowledge/Collection.svelte b/src/lib/components/workspace/Knowledge/Collection.svelte index 46bfafc36..e343c0099 100644 --- a/src/lib/components/workspace/Knowledge/Collection.svelte +++ b/src/lib/components/workspace/Knowledge/Collection.svelte @@ -94,7 +94,7 @@ const addFileHandler = async (fileId) => { const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch( (e) => { - console.error(e); + toast.error(e); } ); @@ -110,7 +110,7 @@ id, fileId ).catch((e) => { - console.error(e); + toast.error(e); }); if (updatedKnowledge) { @@ -341,6 +341,7 @@ on:delete={(e) => { console.log(e.detail); + selectedFileId = null; deleteFileHandler(e.detail); }} />