Explorar o código

Merge pull request #10459 from open-webui/dev

0.5.16
Timothy Jaeryang Baek hai 2 meses
pai
achega
6fedd72e39

+ 6 - 0
CHANGELOG.md

@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.5.16] - 2025-02-20
+
+### Fixed
+
+- **🔍 Web Search Retrieval Restored**: Resolved a critical issue that broke web search retrieval by reverting deduplication changes, ensuring complete and accurate search results once again.
+
 ## [0.5.15] - 2025-02-20
 
 ### Added

+ 2 - 2
backend/open_webui/config.py

@@ -1714,7 +1714,7 @@ Respond to the user query using the provided context, incorporating inline citat
 - Respond in the same language as the user's query.
 - If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
 - If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
-- **Only include inline citations using [source_id] when a <source_id> tag is explicitly provided in the context.**  
+- **Only include inline citations using [source_id] (e.g., [1], [2]) when a `<source_id>` tag is explicitly provided in the context.**
 - Do not cite if the <source_id> tag is not provided in the context.  
 - Do not use XML tags in your response.
 - Ensure citations are concise and directly related to the information provided.
@@ -1957,7 +1957,7 @@ RAG_WEB_LOADER_ENGINE = PersistentConfig(
 RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig(
     "RAG_WEB_SEARCH_TRUST_ENV",
     "rag.web.search.trust_env",
-    os.getenv("RAG_WEB_SEARCH_TRUST_ENV", False),
+    os.getenv("RAG_WEB_SEARCH_TRUST_ENV", "False").lower() == "true",
 )
 
 PLAYWRIGHT_WS_URI = PersistentConfig(

+ 15 - 28
backend/open_webui/retrieval/utils.py

@@ -14,7 +14,8 @@ from langchain_core.documents import Document
 
 from open_webui.config import VECTOR_DB
 from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
-from open_webui.utils.misc import get_last_user_message
+from open_webui.utils.misc import get_last_user_message, calculate_sha256_string
+
 from open_webui.models.users import UserModel
 
 from open_webui.env import (
@@ -178,45 +179,31 @@ def merge_and_sort_query_results(
     combined_distances = []
     combined_documents = []
     combined_metadatas = []
-    combined_ids = []
 
     for data in query_results:
         combined_distances.extend(data["distances"][0])
         combined_documents.extend(data["documents"][0])
         combined_metadatas.extend(data["metadatas"][0])
-        # DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals
-        combined_ids.extend(
-            [
-                f"{id}-{meta['file_id']}"
-                for id, meta in zip(data["ids"][0], data["metadatas"][0])
-            ]
-        )
 
-    # Create a list of tuples (distance, document, metadata, ids)
-    combined = list(
-        zip(combined_distances, combined_documents, combined_metadatas, combined_ids)
-    )
+    # Create a list of tuples (distance, document, metadata)
+    combined = list(zip(combined_distances, combined_documents, combined_metadatas))
 
     # Sort the list based on distances
     combined.sort(key=lambda x: x[0], reverse=reverse)
 
-    sorted_distances = []
-    sorted_documents = []
-    sorted_metadatas = []
-    # Otherwise we don't have anything :-(
-    if combined:
+    # We don't have anything :-(
+    if not combined:
+        sorted_distances = []
+        sorted_documents = []
+        sorted_metadatas = []
+    else:
         # Unzip the sorted list
-        all_distances, all_documents, all_metadatas, all_ids = zip(*combined)
-        seen_ids = set()
+        sorted_distances, sorted_documents, sorted_metadatas = zip(*combined)
+
         # Slicing the lists to include only k elements
-        for index, id in enumerate(all_ids):
-            if id not in seen_ids:
-                sorted_distances.append(all_distances[index])
-                sorted_documents.append(all_documents[index])
-                sorted_metadatas.append(all_metadatas[index])
-                seen_ids.add(id)
-                if len(sorted_distances) >= k:
-                    break
+        sorted_distances = list(sorted_distances)[:k]
+        sorted_documents = list(sorted_documents)[:k]
+        sorted_metadatas = list(sorted_metadatas)[:k]
 
     # Create the output dictionary
     result = {

+ 2 - 2
package-lock.json

@@ -1,12 +1,12 @@
 {
 	"name": "open-webui",
-	"version": "0.5.15",
+	"version": "0.5.16",
 	"lockfileVersion": 3,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "open-webui",
-			"version": "0.5.15",
+			"version": "0.5.16",
 			"dependencies": {
 				"@codemirror/lang-javascript": "^6.2.2",
 				"@codemirror/lang-python": "^6.1.6",

+ 1 - 1
package.json

@@ -1,6 +1,6 @@
 {
 	"name": "open-webui",
-	"version": "0.5.15",
+	"version": "0.5.16",
 	"private": true,
 	"scripts": {
 		"dev": "npm run pyodide:fetch && vite dev --host",

+ 10 - 10
src/lib/i18n/locales/zh-CN/translation.json

@@ -182,7 +182,7 @@
 	"Code execution": "代码执行",
 	"Code Execution": "代码执行",
 	"Code Execution Engine": "代码执行引擎",
-	"Code Execution Timeout": "",
+	"Code Execution Timeout": "代码执行超时时间",
 	"Code formatted successfully": "代码格式化成功",
 	"Code Interpreter": "代码解释器",
 	"Code Interpreter Engine": "代码解释引擎",
@@ -205,7 +205,7 @@
 	"Confirm your new password": "确认新密码",
 	"Connect to your own OpenAI compatible API endpoints.": "连接到你自己的与 OpenAI 兼容的 API 接口端点。",
 	"Connections": "外部连接",
-	"Constrains effort on reasoning for reasoning models. Only applicable to reasoning models from specific providers that support reasoning effort. (Default: medium)": "限制推理模型的推理努力。仅适用于支持推理努力的特定提供商的推理模型。(默认值:中等)",
+	"Constrains effort on reasoning for reasoning models. Only applicable to reasoning models from specific providers that support reasoning effort. (Default: medium)": "限制模型的努力。仅适用于支持努力的特定提供商的模型。(默认值:中等)",
 	"Contact Admin for WebUI Access": "请联系管理员以获取访问权限",
 	"Content": "内容",
 	"Content Extraction": "内容提取",
@@ -322,7 +322,7 @@
 	"Draw": "平局",
 	"Drop any files here to add to the conversation": "拖动文件到此处以添加到对话中",
 	"e.g. '30s','10m'. Valid time units are 's', 'm', 'h'.": "例如 '30s','10m'。有效的时间单位是秒:'s',分:'m',时:'h'。",
-	"e.g. 60": "",
+	"e.g. 60": "例如 '60'",
 	"e.g. A filter to remove profanity from text": "例如:一个用于过滤文本中不当内容的过滤器",
 	"e.g. My Filter": "例如:我的过滤器",
 	"e.g. My Tools": "例如:我的工具",
@@ -410,7 +410,7 @@
 	"Enter Tavily API Key": "输入 Tavily API 密钥",
 	"Enter the public URL of your WebUI. This URL will be used to generate links in the notifications.": "输入 WebUI 的公共 URL。此 URL 将用于在通知中生成链接。",
 	"Enter Tika Server URL": "输入 Tika 服务器地址",
-	"Enter timeout in seconds": "",
+	"Enter timeout in seconds": "输入以秒为单位的超时时间",
 	"Enter Top K": "输入 Top K",
 	"Enter URL (e.g. http://127.0.0.1:7860/)": "输入地址 (例如:http://127.0.0.1:7860/)",
 	"Enter URL (e.g. http://localhost:11434)": "输入地址 (例如:http://localhost:11434)",
@@ -503,9 +503,9 @@
 	"Functions allow arbitrary code execution": "注意:函数有权执行任意代码",
 	"Functions allow arbitrary code execution.": "注意:函数有权执行任意代码。",
 	"Functions imported successfully": "函数导入成功",
-	"Gemini": "",
-	"Gemini API Config": "",
-	"Gemini API Key is required.": "",
+	"Gemini": "Gemini",
+	"Gemini API Config": "Gemini API 配置",
+	"Gemini API Key is required.": "需要 Gemini API 密钥。",
 	"General": "通用",
 	"General Settings": "通用设置",
 	"Generate an image": "生成图像",
@@ -981,7 +981,7 @@
 	"The score should be a value between 0.0 (0%) and 1.0 (100%).": "分值应介于 0.0(0%)和 1.0(100%)之间。",
 	"The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)": "模型的温度。提高温度将使模型更具创造性地回答。(默认值:0.8)",
 	"Theme": "主题",
-	"Thinking...": "正在深度思考...",
+	"Thinking...": "正在思考...",
 	"This action cannot be undone. Do you wish to continue?": "此操作无法撤销。是否确认继续?",
 	"This ensures that your valuable conversations are securely saved to your backend database. Thank you!": "这将确保您的宝贵对话被安全地保存到后台数据库中。感谢!",
 	"This is an experimental feature, it may not function as expected and is subject to change at any time.": "这是一个实验功能,可能不会如预期那样工作,而且可能随时发生变化。",
@@ -995,8 +995,8 @@
 	"This will delete all models including custom models and cannot be undone.": "这将删除所有模型,包括自定义模型,且无法撤销。",
 	"This will reset the knowledge base and sync all files. Do you wish to continue?": "这将重置知识库并替换所有文件为目录下文件。确认继续?",
 	"Thorough explanation": "解释较为详细",
-	"Thought for {{DURATION}}": "已深度思考 用时 {{DURATION}}",
-	"Thought for {{DURATION}} seconds": "已深度思考 用时 {{DURATION}} 秒",
+	"Thought for {{DURATION}}": "已推理 持续 {{DURATION}}",
+	"Thought for {{DURATION}} seconds": "已推理 持续 {{DURATION}} 秒",
 	"Tika": "Tika",
 	"Tika Server URL required.": "请输入 Tika 服务器地址。",
 	"Tiktoken": "Tiktoken",