files.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. import logging
  2. import os
  3. import shutil
  4. import uuid
  5. from pathlib import Path
  6. from typing import Optional
  7. from pydantic import BaseModel
  8. import mimetypes
  9. from open_webui.apps.webui.models.files import FileForm, FileModel, Files
  10. from open_webui.apps.retrieval.main import process_file, ProcessFileForm
  11. from open_webui.config import UPLOAD_DIR, DOCS_DIR
  12. from open_webui.env import SRC_LOG_LEVELS
  13. from open_webui.constants import ERROR_MESSAGES
  14. from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
  15. from fastapi.responses import FileResponse, StreamingResponse
  16. from open_webui.utils.utils import get_admin_user, get_verified_user
  17. log = logging.getLogger(__name__)
  18. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  19. router = APIRouter()
  20. ############################
  21. # Upload File
  22. ############################
  23. @router.post("/")
  24. def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
  25. log.info(f"file.content_type: {file.content_type}")
  26. try:
  27. unsanitized_filename = file.filename
  28. filename = os.path.basename(unsanitized_filename)
  29. # replace filename with uuid
  30. id = str(uuid.uuid4())
  31. name = filename
  32. filename = f"{id}_{filename}"
  33. file_path = f"{UPLOAD_DIR}/{filename}"
  34. contents = file.file.read()
  35. with open(file_path, "wb") as f:
  36. f.write(contents)
  37. f.close()
  38. file = Files.insert_new_file(
  39. user.id,
  40. FileForm(
  41. **{
  42. "id": id,
  43. "filename": filename,
  44. "meta": {
  45. "name": name,
  46. "content_type": file.content_type,
  47. "size": len(contents),
  48. "path": file_path,
  49. },
  50. }
  51. ),
  52. )
  53. try:
  54. process_file(ProcessFileForm(file_id=id))
  55. file = Files.get_file_by_id(id=id)
  56. except Exception as e:
  57. log.exception(e)
  58. log.error(f"Error processing file: {file.id}")
  59. if file:
  60. return file
  61. else:
  62. raise HTTPException(
  63. status_code=status.HTTP_400_BAD_REQUEST,
  64. detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
  65. )
  66. except Exception as e:
  67. log.exception(e)
  68. raise HTTPException(
  69. status_code=status.HTTP_400_BAD_REQUEST,
  70. detail=ERROR_MESSAGES.DEFAULT(e),
  71. )
  72. @router.post("/upload/dir")
  73. def upload_dir(user=Depends(get_admin_user)):
  74. file_ids = []
  75. for path in Path(DOCS_DIR).rglob("./**/*"):
  76. if path.is_file() and not path.name.startswith("."):
  77. try:
  78. log.debug(f"Processing file from path: {path}")
  79. filename = path.name
  80. file_content_type = mimetypes.guess_type(path)
  81. # replace filename with uuid
  82. id = str(uuid.uuid4())
  83. name = filename
  84. contents = path.read_bytes()
  85. file_path = str(path)
  86. file = Files.insert_new_file(
  87. user.id,
  88. FileForm(
  89. **{
  90. "id": id,
  91. "filename": filename,
  92. "meta": {
  93. "name": name,
  94. "content_type": file_content_type,
  95. "size": len(contents),
  96. "path": file_path,
  97. },
  98. }
  99. ),
  100. )
  101. try:
  102. process_file(ProcessFileForm(file_id=id))
  103. log.debug(f"File processed: {path}, {file.id}")
  104. file_ids.append(file.id)
  105. except Exception as e:
  106. log.exception(e)
  107. log.error(f"Error processing file: {file.id}")
  108. except Exception as e:
  109. log.exception(e)
  110. pass
  111. return file_ids
  112. ############################
  113. # List Files
  114. ############################
  115. @router.get("/", response_model=list[FileModel])
  116. async def list_files(user=Depends(get_verified_user)):
  117. if user.role == "admin":
  118. files = Files.get_files()
  119. else:
  120. files = Files.get_files_by_user_id(user.id)
  121. return files
  122. ############################
  123. # Delete All Files
  124. ############################
  125. @router.delete("/all")
  126. async def delete_all_files(user=Depends(get_admin_user)):
  127. result = Files.delete_all_files()
  128. if result:
  129. folder = f"{UPLOAD_DIR}"
  130. try:
  131. # Check if the directory exists
  132. if os.path.exists(folder):
  133. # Iterate over all the files and directories in the specified directory
  134. for filename in os.listdir(folder):
  135. file_path = os.path.join(folder, filename)
  136. try:
  137. if os.path.isfile(file_path) or os.path.islink(file_path):
  138. os.unlink(file_path) # Remove the file or link
  139. elif os.path.isdir(file_path):
  140. shutil.rmtree(file_path) # Remove the directory
  141. except Exception as e:
  142. print(f"Failed to delete {file_path}. Reason: {e}")
  143. else:
  144. print(f"The directory {folder} does not exist")
  145. except Exception as e:
  146. print(f"Failed to process the directory {folder}. Reason: {e}")
  147. return {"message": "All files deleted successfully"}
  148. else:
  149. raise HTTPException(
  150. status_code=status.HTTP_400_BAD_REQUEST,
  151. detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
  152. )
  153. ############################
  154. # Get File By Id
  155. ############################
  156. @router.get("/{id}", response_model=Optional[FileModel])
  157. async def get_file_by_id(id: str, user=Depends(get_verified_user)):
  158. file = Files.get_file_by_id(id)
  159. if file and (file.user_id == user.id or user.role == "admin"):
  160. return file
  161. else:
  162. raise HTTPException(
  163. status_code=status.HTTP_404_NOT_FOUND,
  164. detail=ERROR_MESSAGES.NOT_FOUND,
  165. )
  166. ############################
  167. # Get File Data Content By Id
  168. ############################
  169. @router.get("/{id}/data/content")
  170. async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
  171. file = Files.get_file_by_id(id)
  172. if file and (file.user_id == user.id or user.role == "admin"):
  173. return {"content": file.data.get("content", "")}
  174. else:
  175. raise HTTPException(
  176. status_code=status.HTTP_404_NOT_FOUND,
  177. detail=ERROR_MESSAGES.NOT_FOUND,
  178. )
  179. ############################
  180. # Update File Data Content By Id
  181. ############################
  182. class ContentForm(BaseModel):
  183. content: str
  184. @router.post("/{id}/data/content/update")
  185. async def update_file_data_content_by_id(
  186. id: str, form_data: ContentForm, user=Depends(get_verified_user)
  187. ):
  188. file = Files.get_file_by_id(id)
  189. if file and (file.user_id == user.id or user.role == "admin"):
  190. try:
  191. process_file(ProcessFileForm(file_id=id, content=form_data.content))
  192. file = Files.get_file_by_id(id=id)
  193. except Exception as e:
  194. log.exception(e)
  195. log.error(f"Error processing file: {file.id}")
  196. return {"content": file.data.get("content", "")}
  197. else:
  198. raise HTTPException(
  199. status_code=status.HTTP_404_NOT_FOUND,
  200. detail=ERROR_MESSAGES.NOT_FOUND,
  201. )
  202. ############################
  203. # Get File Content By Id
  204. ############################
  205. @router.get("/{id}/content", response_model=Optional[FileModel])
  206. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  207. file = Files.get_file_by_id(id)
  208. if file and (file.user_id == user.id or user.role == "admin"):
  209. file_path = Path(file.meta["path"])
  210. # Check if the file already exists in the cache
  211. if file_path.is_file():
  212. print(f"file_path: {file_path}")
  213. return FileResponse(file_path)
  214. else:
  215. raise HTTPException(
  216. status_code=status.HTTP_404_NOT_FOUND,
  217. detail=ERROR_MESSAGES.NOT_FOUND,
  218. )
  219. else:
  220. raise HTTPException(
  221. status_code=status.HTTP_404_NOT_FOUND,
  222. detail=ERROR_MESSAGES.NOT_FOUND,
  223. )
  224. @router.get("/{id}/content/{file_name}", response_model=Optional[FileModel])
  225. async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
  226. file = Files.get_file_by_id(id)
  227. if file and (file.user_id == user.id or user.role == "admin"):
  228. file_path = file.meta.get("path")
  229. if file_path:
  230. file_path = Path(file_path)
  231. # Check if the file already exists in the cache
  232. if file_path.is_file():
  233. print(f"file_path: {file_path}")
  234. return FileResponse(file_path)
  235. else:
  236. raise HTTPException(
  237. status_code=status.HTTP_404_NOT_FOUND,
  238. detail=ERROR_MESSAGES.NOT_FOUND,
  239. )
  240. else:
  241. # File path doesn’t exist, return the content as .txt if possible
  242. file_content = file.content.get("content", "")
  243. file_name = file.filename
  244. # Create a generator that encodes the file content
  245. def generator():
  246. yield file_content.encode("utf-8")
  247. return StreamingResponse(
  248. generator(),
  249. media_type="text/plain",
  250. headers={"Content-Disposition": f"attachment; filename={file_name}"},
  251. )
  252. else:
  253. raise HTTPException(
  254. status_code=status.HTTP_404_NOT_FOUND,
  255. detail=ERROR_MESSAGES.NOT_FOUND,
  256. )
  257. ############################
  258. # Delete File By Id
  259. ############################
  260. @router.delete("/{id}")
  261. async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
  262. file = Files.get_file_by_id(id)
  263. if file and (file.user_id == user.id or user.role == "admin"):
  264. result = Files.delete_file_by_id(id)
  265. if result:
  266. return {"message": "File deleted successfully"}
  267. else:
  268. raise HTTPException(
  269. status_code=status.HTTP_400_BAD_REQUEST,
  270. detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
  271. )
  272. else:
  273. raise HTTPException(
  274. status_code=status.HTTP_404_NOT_FOUND,
  275. detail=ERROR_MESSAGES.NOT_FOUND,
  276. )