Browse Source

enh: web embed bypass embedding and retrieval support

Timothy Jaeryang Baek 2 months ago
parent
commit
d0ddb0637e
2 changed files with 15 additions and 3 deletions
  1. 7 0
      backend/open_webui/retrieval/utils.py
  2. 8 3
      backend/open_webui/routers/retrieval.py

+ 7 - 0
backend/open_webui/retrieval/utils.py

@@ -414,6 +414,13 @@ def get_sources_from_files(
                             ]
                         ],
                     }
+            elif file.get("file").get("data"):
+                context = {
+                    "documents": [[file.get("file").get("data", {}).get("content")]],
+                    "metadatas": [
+                        [file.get("file").get("data", {}).get("metadata", {})]
+                    ],
+                }
         else:
             collection_names = []
             if file.get("type") == "collection":

+ 8 - 3
backend/open_webui/routers/retrieval.py

@@ -1187,9 +1187,13 @@ def process_web(
         content = " ".join([doc.page_content for doc in docs])
 
         log.debug(f"text_content: {content}")
-        save_docs_to_vector_db(
-            request, docs, collection_name, overwrite=True, user=user
-        )
+
+        if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
+            save_docs_to_vector_db(
+                request, docs, collection_name, overwrite=True, user=user
+            )
+        else:
+            collection_name = None
 
         return {
             "status": True,
@@ -1201,6 +1205,7 @@ def process_web(
                 },
                 "meta": {
                     "name": form_data.url,
+                    "source": form_data.url,
                 },
             },
         }