浏览代码

Merge pull request #6543 from execgit/main

fix: omit document contents in logging document embedding
Timothy Jaeryang Baek 6 月之前
父节点
当前提交
adfce28732
共有 2 个文件被更改,包括 25 次插入3 次删除
  1. 20 1
      backend/open_webui/apps/retrieval/main.py
  2. 5 2
      backend/open_webui/apps/retrieval/utils.py

+ 20 - 1
backend/open_webui/apps/retrieval/main.py

@@ -637,6 +637,25 @@ async def update_query_settings(
 ####################################
 ####################################
 
 
 
 
+def _get_docs_info(
+    docs: list[Document]
+) -> str:
+    docs_info = set()
+
+    # Trying to select relevant metadata identifying the document.
+    for doc in docs:
+        metadata = getattr(doc, 'metadata', {})
+        doc_name = metadata.get('name', '')
+        if not doc_name:
+            doc_name = metadata.get('title', '')
+        if not doc_name:
+            doc_name = metadata.get('source', '')
+        if doc_name:
+            docs_info.add(doc_name)
+
+    return ', '.join(docs_info)
+
+
 def save_docs_to_vector_db(
 def save_docs_to_vector_db(
     docs,
     docs,
     collection_name,
     collection_name,
@@ -645,7 +664,7 @@ def save_docs_to_vector_db(
     split: bool = True,
     split: bool = True,
     add: bool = False,
     add: bool = False,
 ) -> bool:
 ) -> bool:
-    log.info(f"save_docs_to_vector_db {docs} {collection_name}")
+    log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}")
 
 
     # Check if entries with the same hash (metadata.hash) already exist
     # Check if entries with the same hash (metadata.hash) already exist
     if metadata and "hash" in metadata:
     if metadata and "hash" in metadata:

+ 5 - 2
backend/open_webui/apps/retrieval/utils.py

@@ -76,7 +76,7 @@ def query_doc(
             limit=k,
             limit=k,
         )
         )
 
 
-        log.info(f"query_doc:result {result}")
+        log.info(f"query_doc:result {result.ids} {result.metadatas}")
         return result
         return result
     except Exception as e:
     except Exception as e:
         print(e)
         print(e)
@@ -127,7 +127,10 @@ def query_doc_with_hybrid_search(
             "metadatas": [[d.metadata for d in result]],
             "metadatas": [[d.metadata for d in result]],
         }
         }
 
 
-        log.info(f"query_doc_with_hybrid_search:result {result}")
+        log.info(
+            "query_doc_with_hybrid_search:result " +
+            f"{result.metadatas} {result.distances}"
+        )
         return result
         return result
     except Exception as e:
     except Exception as e:
         raise e
         raise e