11 months ago · 79f440f302
--- a/.env.example
+++ b/.env.example
@@ -10,8 +10,4 @@ OPENAI_API_KEY=''
 
				 # DO NOT TRACK
			
 
				 SCARF_NO_ANALYTICS=true
			
 
				 DO_NOT_TRACK=true
			
 
				-ANONYMIZED_TELEMETRY=false
			
 
				-
			
 
				-# Use locally bundled version of the LiteLLM cost map json
			
 
				-# to avoid repetitive startup connections
			
 
				-LITELLM_LOCAL_MODEL_COST_MAP="True"
			
 
				+ANONYMIZED_TELEMETRY=false
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -59,11 +59,6 @@ ENV OPENAI_API_KEY="" \
 
				     DO_NOT_TRACK=true \
			
 
				     ANONYMIZED_TELEMETRY=false
			
 
				 
			
 
				-# Use locally bundled version of the LiteLLM cost map json
			
 
				-# to avoid repetitive startup connections
			
 
				-ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
			
 
				-
			
 
				-
			
 
				 #### Other models #########################################################
			
 
				 ## whisper TTS model settings ##
			
 
				 ENV WHISPER_MODEL="base" \
			
@@ -83,10 +78,10 @@ WORKDIR /app/backend
 
				 ENV HOME /root
			
 
				 # Create user and group if not root
			
 
				 RUN if [ $UID -ne 0 ]; then \
			
 
				-      if [ $GID -ne 0 ]; then \
			
 
				-        addgroup --gid $GID app; \
			
 
				-      fi; \
			
 
				-      adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
			
 
				+    if [ $GID -ne 0 ]; then \
			
 
				+    addgroup --gid $GID app; \
			
 
				+    fi; \
			
 
				+    adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
			
 
				     fi
			
 
				 
			
 
				 RUN mkdir -p $HOME/.cache/chroma
			
--- a/backend/apps/litellm/main.py
+++ b/backend/apps/litellm/main.py
@@ -1,388 +0,0 @@
 
				-import sys
			
 
				-from contextlib import asynccontextmanager
			
 
				-
			
 
				-from fastapi import FastAPI, Depends, HTTPException
			
 
				-from fastapi.routing import APIRoute
			
 
				-from fastapi.middleware.cors import CORSMiddleware
			
 
				-
			
 
				-import logging
			
 
				-from fastapi import FastAPI, Request, Depends, status, Response
			
 
				-from fastapi.responses import JSONResponse
			
 
				-
			
 
				-from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
			
 
				-from starlette.responses import StreamingResponse
			
 
				-import json
			
 
				-import time
			
 
				-import requests
			
 
				-
			
 
				-from pydantic import BaseModel, ConfigDict
			
 
				-from typing import Optional, List
			
 
				-
			
 
				-from apps.web.models.models import Models
			
 
				-from utils.utils import get_verified_user, get_current_user, get_admin_user
			
 
				-from config import SRC_LOG_LEVELS
			
 
				-from constants import MESSAGES
			
 
				-
			
 
				-import os
			
 
				-
			
 
				-log = logging.getLogger(__name__)
			
 
				-log.setLevel(SRC_LOG_LEVELS["LITELLM"])
			
 
				-
			
 
				-
			
 
				-from config import (
			
 
				-    ENABLE_LITELLM,
			
 
				-    ENABLE_MODEL_FILTER,
			
 
				-    MODEL_FILTER_LIST,
			
 
				-    DATA_DIR,
			
 
				-    LITELLM_PROXY_PORT,
			
 
				-    LITELLM_PROXY_HOST,
			
 
				-)
			
 
				-
			
 
				-import warnings
			
 
				-
			
 
				-warnings.simplefilter("ignore")
			
 
				-
			
 
				-from litellm.utils import get_llm_provider
			
 
				-
			
 
				-import asyncio
			
 
				-import subprocess
			
 
				-import yaml
			
 
				-
			
 
				-
			
 
				-@asynccontextmanager
			
 
				-async def lifespan(app: FastAPI):
			
 
				-    log.info("startup_event")
			
 
				-    # TODO: Check config.yaml file and create one
			
 
				-    asyncio.create_task(start_litellm_background())
			
 
				-    yield
			
 
				-
			
 
				-
			
 
				-app = FastAPI(lifespan=lifespan)
			
 
				-
			
 
				-origins = ["*"]
			
 
				-
			
 
				-app.add_middleware(
			
 
				-    CORSMiddleware,
			
 
				-    allow_origins=origins,
			
 
				-    allow_credentials=True,
			
 
				-    allow_methods=["*"],
			
 
				-    allow_headers=["*"],
			
 
				-)
			
 
				-
			
 
				-
			
 
				-LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml"
			
 
				-
			
 
				-with open(LITELLM_CONFIG_DIR, "r") as file:
			
 
				-    litellm_config = yaml.safe_load(file)
			
 
				-
			
 
				-
			
 
				-app.state.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER.value
			
 
				-app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST.value
			
 
				-app.state.MODEL_CONFIG = Models.get_all_models()
			
 
				-
			
 
				-app.state.ENABLE = ENABLE_LITELLM
			
 
				-app.state.CONFIG = litellm_config
			
 
				-
			
 
				-# Global variable to store the subprocess reference
			
 
				-background_process = None
			
 
				-
			
 
				-CONFLICT_ENV_VARS = [
			
 
				-    # Uvicorn uses PORT, so LiteLLM might use it as well
			
 
				-    "PORT",
			
 
				-    # LiteLLM uses DATABASE_URL for Prisma connections
			
 
				-    "DATABASE_URL",
			
 
				-]
			
 
				-
			
 
				-
			
 
				-async def run_background_process(command):
			
 
				-    global background_process
			
 
				-    log.info("run_background_process")
			
 
				-
			
 
				-    try:
			
 
				-        # Log the command to be executed
			
 
				-        log.info(f"Executing command: {command}")
			
 
				-        # Filter environment variables known to conflict with litellm
			
 
				-        env = {k: v for k, v in os.environ.items() if k not in CONFLICT_ENV_VARS}
			
 
				-        # Execute the command and create a subprocess
			
 
				-        process = await asyncio.create_subprocess_exec(
			
 
				-            *command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
			
 
				-        )
			
 
				-        background_process = process
			
 
				-        log.info("Subprocess started successfully.")
			
 
				-
			
 
				-        # Capture STDERR for debugging purposes
			
 
				-        stderr_output = await process.stderr.read()
			
 
				-        stderr_text = stderr_output.decode().strip()
			
 
				-        if stderr_text:
			
 
				-            log.info(f"Subprocess STDERR: {stderr_text}")
			
 
				-
			
 
				-        # log.info output line by line
			
 
				-        async for line in process.stdout:
			
 
				-            log.info(line.decode().strip())
			
 
				-
			
 
				-        # Wait for the process to finish
			
 
				-        returncode = await process.wait()
			
 
				-        log.info(f"Subprocess exited with return code {returncode}")
			
 
				-    except Exception as e:
			
 
				-        log.error(f"Failed to start subprocess: {e}")
			
 
				-        raise  # Optionally re-raise the exception if you want it to propagate
			
 
				-
			
 
				-
			
 
				-async def start_litellm_background():
			
 
				-    log.info("start_litellm_background")
			
 
				-    # Command to run in the background
			
 
				-    command = [
			
 
				-        "litellm",
			
 
				-        "--port",
			
 
				-        str(LITELLM_PROXY_PORT),
			
 
				-        "--host",
			
 
				-        LITELLM_PROXY_HOST,
			
 
				-        "--telemetry",
			
 
				-        "False",
			
 
				-        "--config",
			
 
				-        LITELLM_CONFIG_DIR,
			
 
				-    ]
			
 
				-
			
 
				-    await run_background_process(command)
			
 
				-
			
 
				-
			
 
				-async def shutdown_litellm_background():
			
 
				-    log.info("shutdown_litellm_background")
			
 
				-    global background_process
			
 
				-    if background_process:
			
 
				-        background_process.terminate()
			
 
				-        await background_process.wait()  # Ensure the process has terminated
			
 
				-        log.info("Subprocess terminated")
			
 
				-        background_process = None
			
 
				-
			
 
				-
			
 
				-@app.get("/")
			
 
				-async def get_status():
			
 
				-    return {"status": True}
			
 
				-
			
 
				-
			
 
				-async def restart_litellm():
			
 
				-    """
			
 
				-    Endpoint to restart the litellm background service.
			
 
				-    """
			
 
				-    log.info("Requested restart of litellm service.")
			
 
				-    try:
			
 
				-        # Shut down the existing process if it is running
			
 
				-        await shutdown_litellm_background()
			
 
				-        log.info("litellm service shutdown complete.")
			
 
				-
			
 
				-        # Restart the background service
			
 
				-
			
 
				-        asyncio.create_task(start_litellm_background())
			
 
				-        log.info("litellm service restart complete.")
			
 
				-
			
 
				-        return {
			
 
				-            "status": "success",
			
 
				-            "message": "litellm service restarted successfully.",
			
 
				-        }
			
 
				-    except Exception as e:
			
 
				-        log.info(f"Error restarting litellm service: {e}")
			
 
				-        raise HTTPException(
			
 
				-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-@app.get("/restart")
			
 
				-async def restart_litellm_handler(user=Depends(get_admin_user)):
			
 
				-    return await restart_litellm()
			
 
				-
			
 
				-
			
 
				-@app.get("/config")
			
 
				-async def get_config(user=Depends(get_admin_user)):
			
 
				-    return app.state.CONFIG
			
 
				-
			
 
				-
			
 
				-class LiteLLMConfigForm(BaseModel):
			
 
				-    general_settings: Optional[dict] = None
			
 
				-    litellm_settings: Optional[dict] = None
			
 
				-    model_list: Optional[List[dict]] = None
			
 
				-    router_settings: Optional[dict] = None
			
 
				-
			
 
				-    model_config = ConfigDict(protected_namespaces=())
			
 
				-
			
 
				-
			
 
				-@app.post("/config/update")
			
 
				-async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
			
 
				-    app.state.CONFIG = form_data.model_dump(exclude_none=True)
			
 
				-
			
 
				-    with open(LITELLM_CONFIG_DIR, "w") as file:
			
 
				-        yaml.dump(app.state.CONFIG, file)
			
 
				-
			
 
				-    await restart_litellm()
			
 
				-    return app.state.CONFIG
			
 
				-
			
 
				-
			
 
				-@app.get("/models")
			
 
				-@app.get("/v1/models")
			
 
				-async def get_models(user=Depends(get_current_user)):
			
 
				-
			
 
				-    if app.state.ENABLE:
			
 
				-        while not background_process:
			
 
				-            await asyncio.sleep(0.1)
			
 
				-
			
 
				-        url = f"http://localhost:{LITELLM_PROXY_PORT}/v1"
			
 
				-        r = None
			
 
				-        try:
			
 
				-            r = requests.request(method="GET", url=f"{url}/models")
			
 
				-            r.raise_for_status()
			
 
				-
			
 
				-            data = r.json()
			
 
				-
			
 
				-            if app.state.ENABLE_MODEL_FILTER:
			
 
				-                if user and user.role == "user":
			
 
				-                    data["data"] = list(
			
 
				-                        filter(
			
 
				-                            lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
			
 
				-                            data["data"],
			
 
				-                        )
			
 
				-                    )
			
 
				-
			
 
				-            return data
			
 
				-        except Exception as e:
			
 
				-
			
 
				-            log.exception(e)
			
 
				-            error_detail = "Open WebUI: Server Connection Error"
			
 
				-            if r is not None:
			
 
				-                try:
			
 
				-                    res = r.json()
			
 
				-                    if "error" in res:
			
 
				-                        error_detail = f"External: {res['error']}"
			
 
				-                except:
			
 
				-                    error_detail = f"External: {e}"
			
 
				-
			
 
				-            return {
			
 
				-                "data": [
			
 
				-                    {
			
 
				-                        "id": model["model_name"],
			
 
				-                        "object": "model",
			
 
				-                        "created": int(time.time()),
			
 
				-                        "owned_by": "openai",
			
 
				-                        "custom_info": next(
			
 
				-                            (
			
 
				-                                item
			
 
				-                                for item in app.state.MODEL_CONFIG
			
 
				-                                if item.id == model["model_name"]
			
 
				-                            ),
			
 
				-                            None,
			
 
				-                        ),
			
 
				-                    }
			
 
				-                    for model in app.state.CONFIG["model_list"]
			
 
				-                ],
			
 
				-                "object": "list",
			
 
				-            }
			
 
				-    else:
			
 
				-        return {
			
 
				-            "data": [],
			
 
				-            "object": "list",
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-@app.get("/model/info")
			
 
				-async def get_model_list(user=Depends(get_admin_user)):
			
 
				-    return {"data": app.state.CONFIG["model_list"]}
			
 
				-
			
 
				-
			
 
				-class AddLiteLLMModelForm(BaseModel):
			
 
				-    model_name: str
			
 
				-    litellm_params: dict
			
 
				-
			
 
				-    model_config = ConfigDict(protected_namespaces=())
			
 
				-
			
 
				-
			
 
				-@app.post("/model/new")
			
 
				-async def add_model_to_config(
			
 
				-    form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
			
 
				-):
			
 
				-    try:
			
 
				-        get_llm_provider(model=form_data.model_name)
			
 
				-        app.state.CONFIG["model_list"].append(form_data.model_dump())
			
 
				-
			
 
				-        with open(LITELLM_CONFIG_DIR, "w") as file:
			
 
				-            yaml.dump(app.state.CONFIG, file)
			
 
				-
			
 
				-        await restart_litellm()
			
 
				-
			
 
				-        return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
			
 
				-    except Exception as e:
			
 
				-        print(e)
			
 
				-        raise HTTPException(
			
 
				-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-class DeleteLiteLLMModelForm(BaseModel):
			
 
				-    id: str
			
 
				-
			
 
				-
			
 
				-@app.post("/model/delete")
			
 
				-async def delete_model_from_config(
			
 
				-    form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user)
			
 
				-):
			
 
				-    app.state.CONFIG["model_list"] = [
			
 
				-        model
			
 
				-        for model in app.state.CONFIG["model_list"]
			
 
				-        if model["model_name"] != form_data.id
			
 
				-    ]
			
 
				-
			
 
				-    with open(LITELLM_CONFIG_DIR, "w") as file:
			
 
				-        yaml.dump(app.state.CONFIG, file)
			
 
				-
			
 
				-    await restart_litellm()
			
 
				-
			
 
				-    return {"message": MESSAGES.MODEL_DELETED(form_data.id)}
			
 
				-
			
 
				-
			
 
				-@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
			
 
				-async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
			
 
				-    body = await request.body()
			
 
				-
			
 
				-    url = f"http://localhost:{LITELLM_PROXY_PORT}"
			
 
				-
			
 
				-    target_url = f"{url}/{path}"
			
 
				-
			
 
				-    headers = {}
			
 
				-    # headers["Authorization"] = f"Bearer {key}"
			
 
				-    headers["Content-Type"] = "application/json"
			
 
				-
			
 
				-    r = None
			
 
				-
			
 
				-    try:
			
 
				-        r = requests.request(
			
 
				-            method=request.method,
			
 
				-            url=target_url,
			
 
				-            data=body,
			
 
				-            headers=headers,
			
 
				-            stream=True,
			
 
				-        )
			
 
				-
			
 
				-        r.raise_for_status()
			
 
				-
			
 
				-        # Check if response is SSE
			
 
				-        if "text/event-stream" in r.headers.get("Content-Type", ""):
			
 
				-            return StreamingResponse(
			
 
				-                r.iter_content(chunk_size=8192),
			
 
				-                status_code=r.status_code,
			
 
				-                headers=dict(r.headers),
			
 
				-            )
			
 
				-        else:
			
 
				-            response_data = r.json()
			
 
				-            return response_data
			
 
				-    except Exception as e:
			
 
				-        log.exception(e)
			
 
				-        error_detail = "Open WebUI: Server Connection Error"
			
 
				-        if r is not None:
			
 
				-            try:
			
 
				-                res = r.json()
			
 
				-                if "error" in res:
			
 
				-                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
			
 
				-            except:
			
 
				-                error_detail = f"External: {e}"
			
 
				-
			
 
				-        raise HTTPException(
			
 
				-            status_code=r.status_code if r else 500, detail=error_detail
			
 
				-        )
			
--- a/backend/apps/web/models/modelfiles.py
+++ b/backend/apps/web/models/modelfiles.py
@@ -1,144 +0,0 @@
 
				-################################################################################
			
 
				-#                              DEPRECATION NOTICE                              #
			
 
				-#                                                                              #
			
 
				-# This file has been deprecated since version 0.2.0.                           #
			
 
				-#                                                                              #
			
 
				-################################################################################
			
 
				-
			
 
				-
			
 
				-from pydantic import BaseModel
			
 
				-from peewee import *
			
 
				-from playhouse.shortcuts import model_to_dict
			
 
				-from typing import List, Union, Optional
			
 
				-import time
			
 
				-
			
 
				-from utils.utils import decode_token
			
 
				-from utils.misc import get_gravatar_url
			
 
				-
			
 
				-from apps.web.internal.db import DB
			
 
				-
			
 
				-import json
			
 
				-
			
 
				-####################
			
 
				-# Modelfile DB Schema
			
 
				-####################
			
 
				-
			
 
				-
			
 
				-class Modelfile(Model):
			
 
				-    tag_name = CharField(unique=True)
			
 
				-    user_id = CharField()
			
 
				-    modelfile = TextField()
			
 
				-    timestamp = BigIntegerField()
			
 
				-
			
 
				-    class Meta:
			
 
				-        database = DB
			
 
				-
			
 
				-
			
 
				-class ModelfileModel(BaseModel):
			
 
				-    tag_name: str
			
 
				-    user_id: str
			
 
				-    modelfile: str
			
 
				-    timestamp: int  # timestamp in epoch
			
 
				-
			
 
				-
			
 
				-####################
			
 
				-# Forms
			
 
				-####################
			
 
				-
			
 
				-
			
 
				-class ModelfileForm(BaseModel):
			
 
				-    modelfile: dict
			
 
				-
			
 
				-
			
 
				-class ModelfileTagNameForm(BaseModel):
			
 
				-    tag_name: str
			
 
				-
			
 
				-
			
 
				-class ModelfileUpdateForm(ModelfileForm, ModelfileTagNameForm):
			
 
				-    pass
			
 
				-
			
 
				-
			
 
				-class ModelfileResponse(BaseModel):
			
 
				-    tag_name: str
			
 
				-    user_id: str
			
 
				-    modelfile: dict
			
 
				-    timestamp: int  # timestamp in epoch
			
 
				-
			
 
				-
			
 
				-class ModelfilesTable:
			
 
				-
			
 
				-    def __init__(self, db):
			
 
				-        self.db = db
			
 
				-        self.db.create_tables([Modelfile])
			
 
				-
			
 
				-    def insert_new_modelfile(
			
 
				-        self, user_id: str, form_data: ModelfileForm
			
 
				-    ) -> Optional[ModelfileModel]:
			
 
				-        if "tagName" in form_data.modelfile:
			
 
				-            modelfile = ModelfileModel(
			
 
				-                **{
			
 
				-                    "user_id": user_id,
			
 
				-                    "tag_name": form_data.modelfile["tagName"],
			
 
				-                    "modelfile": json.dumps(form_data.modelfile),
			
 
				-                    "timestamp": int(time.time()),
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-            try:
			
 
				-                result = Modelfile.create(**modelfile.model_dump())
			
 
				-                if result:
			
 
				-                    return modelfile
			
 
				-                else:
			
 
				-                    return None
			
 
				-            except:
			
 
				-                return None
			
 
				-
			
 
				-        else:
			
 
				-            return None
			
 
				-
			
 
				-    def get_modelfile_by_tag_name(self, tag_name: str) -> Optional[ModelfileModel]:
			
 
				-        try:
			
 
				-            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
			
 
				-            return ModelfileModel(**model_to_dict(modelfile))
			
 
				-        except:
			
 
				-            return None
			
 
				-
			
 
				-    def get_modelfiles(self, skip: int = 0, limit: int = 50) -> List[ModelfileResponse]:
			
 
				-        return [
			
 
				-            ModelfileResponse(
			
 
				-                **{
			
 
				-                    **model_to_dict(modelfile),
			
 
				-                    "modelfile": json.loads(modelfile.modelfile),
			
 
				-                }
			
 
				-            )
			
 
				-            for modelfile in Modelfile.select()
			
 
				-            # .limit(limit).offset(skip)
			
 
				-        ]
			
 
				-
			
 
				-    def update_modelfile_by_tag_name(
			
 
				-        self, tag_name: str, modelfile: dict
			
 
				-    ) -> Optional[ModelfileModel]:
			
 
				-        try:
			
 
				-            query = Modelfile.update(
			
 
				-                modelfile=json.dumps(modelfile),
			
 
				-                timestamp=int(time.time()),
			
 
				-            ).where(Modelfile.tag_name == tag_name)
			
 
				-
			
 
				-            query.execute()
			
 
				-
			
 
				-            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
			
 
				-            return ModelfileModel(**model_to_dict(modelfile))
			
 
				-        except:
			
 
				-            return None
			
 
				-
			
 
				-    def delete_modelfile_by_tag_name(self, tag_name: str) -> bool:
			
 
				-        try:
			
 
				-            query = Modelfile.delete().where((Modelfile.tag_name == tag_name))
			
 
				-            query.execute()  # Remove the rows, return number of rows removed.
			
 
				-
			
 
				-            return True
			
 
				-        except:
			
 
				-            return False
			
 
				-
			
 
				-
			
 
				-Modelfiles = ModelfilesTable(DB)
			
--- a/backend/config.py
+++ b/backend/config.py
@@ -56,7 +56,6 @@ log_sources = [
 
				     "CONFIG",
			
 
				     "DB",
			
 
				     "IMAGES",
			
 
				-    "LITELLM",
			
 
				     "MAIN",
			
 
				     "MODELS",
			
 
				     "OLLAMA",
			
@@ -374,10 +373,10 @@ def create_config_file(file_path):
 
				 
			
 
				 LITELLM_CONFIG_PATH = f"{DATA_DIR}/litellm/config.yaml"
			
 
				 
			
 
				-if not os.path.exists(LITELLM_CONFIG_PATH):
			
 
				-    log.info("Config file doesn't exist. Creating...")
			
 
				-    create_config_file(LITELLM_CONFIG_PATH)
			
 
				-    log.info("Config file created successfully.")
			
 
				+# if not os.path.exists(LITELLM_CONFIG_PATH):
			
 
				+#     log.info("Config file doesn't exist. Creating...")
			
 
				+#     create_config_file(LITELLM_CONFIG_PATH)
			
 
				+#     log.info("Config file created successfully.")
			
 
				 
			
 
				 
			
 
				 ####################################
			
@@ -826,18 +825,6 @@ AUDIO_OPENAI_API_VOICE = PersistentConfig(
 
				     os.getenv("AUDIO_OPENAI_API_VOICE", "alloy"),
			
 
				 )
			
 
				 
			
 
				-####################################
			
 
				-# LiteLLM
			
 
				-####################################
			
 
				-
			
 
				-
			
 
				-ENABLE_LITELLM = os.environ.get("ENABLE_LITELLM", "True").lower() == "true"
			
 
				-
			
 
				-LITELLM_PROXY_PORT = int(os.getenv("LITELLM_PROXY_PORT", "14365"))
			
 
				-if LITELLM_PROXY_PORT < 0 or LITELLM_PROXY_PORT > 65535:
			
 
				-    raise ValueError("Invalid port number for LITELLM_PROXY_PORT")
			
 
				-LITELLM_PROXY_HOST = os.getenv("LITELLM_PROXY_HOST", "127.0.0.1")
			
 
				-
			
 
				 
			
 
				 ####################################
			
 
				 # Database
			
--- a/backend/main.py
+++ b/backend/main.py
@@ -22,13 +22,6 @@ from starlette.responses import StreamingResponse, Response
 
				 from apps.ollama.main import app as ollama_app, get_all_models as get_ollama_models
			
 
				 from apps.openai.main import app as openai_app, get_all_models as get_openai_models
			
 
				 
			
 
				-from apps.litellm.main import (
			
 
				-    app as litellm_app,
			
 
				-    start_litellm_background,
			
 
				-    shutdown_litellm_background,
			
 
				-)
			
 
				-
			
 
				-
			
 
				 from apps.audio.main import app as audio_app
			
 
				 from apps.images.main import app as images_app
			
 
				 from apps.rag.main import app as rag_app
			
@@ -55,7 +48,6 @@ from config import (
 
				     STATIC_DIR,
			
 
				     ENABLE_OPENAI_API,
			
 
				     ENABLE_OLLAMA_API,
			
 
				-    ENABLE_LITELLM,
			
 
				     ENABLE_MODEL_FILTER,
			
 
				     MODEL_FILTER_LIST,
			
 
				     GLOBAL_LOG_LEVEL,
			
@@ -100,11 +92,7 @@ https://github.com/open-webui/open-webui
 
				 
			
 
				 @asynccontextmanager
			
 
				 async def lifespan(app: FastAPI):
			
 
				-    if ENABLE_LITELLM:
			
 
				-        asyncio.create_task(start_litellm_background())
			
 
				     yield
			
 
				-    if ENABLE_LITELLM:
			
 
				-        await shutdown_litellm_background()
			
 
				 
			
 
				 
			
 
				 app = FastAPI(
			
@@ -262,9 +250,6 @@ async def update_embedding_function(request: Request, call_next):
 
				     return response
			
 
				 
			
 
				 
			
 
				-# TODO: Deprecate LiteLLM
			
 
				-app.mount("/litellm/api", litellm_app)
			
 
				-
			
 
				 app.mount("/ollama", ollama_app)
			
 
				 app.mount("/openai", openai_app)
			
 
				 
			
@@ -407,9 +392,6 @@ async def update_model_filter_config(
 
				     openai_app.state.config.ENABLE_MODEL_FILTER = app.state.config.ENABLE_MODEL_FILTER
			
 
				     openai_app.state.config.MODEL_FILTER_LIST = app.state.config.MODEL_FILTER_LIST
			
 
				 
			
 
				-    litellm_app.state.ENABLE_MODEL_FILTER = app.state.config.ENABLE_MODEL_FILTER
			
 
				-    litellm_app.state.MODEL_FILTER_LIST = app.state.config.MODEL_FILTER_LIST
			
 
				-
			
 
				     return {
			
 
				         "enabled": app.state.config.ENABLE_MODEL_FILTER,
			
 
				         "models": app.state.config.MODEL_FILTER_LIST,
			
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -18,8 +18,6 @@ psycopg2-binary==2.9.9
 
				 PyMySQL==1.1.1
			
 
				 bcrypt==4.1.3
			
 
				 
			
 
				-litellm[proxy]==1.37.20
			
 
				-
			
 
				 boto3==1.34.110
			
 
				 
			
 
				 argon2-cffi==23.1.0
			
--- a/backend/space/litellm_config.yaml
+++ b/backend/space/litellm_config.yaml
@@ -1,43 +0,0 @@
 
				-litellm_settings:
			
 
				-  drop_params: true
			
 
				-model_list:
			
 
				-  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.1'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/mistralai/Mistral-7B-Instruct-v0.1
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 1024
			
 
				-  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.2'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/mistralai/Mistral-7B-Instruct-v0.2
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 1024
			
 
				-  - model_name: 'HuggingFace: Meta: Llama 3 8B Instruct'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/meta-llama/Meta-Llama-3-8B-Instruct
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 2047
			
 
				-  - model_name: 'HuggingFace: Mistral: Mixtral 8x7B Instruct v0.1'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 8192
			
 
				-  - model_name: 'HuggingFace: Microsoft: Phi-3 Mini-4K-Instruct'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/microsoft/Phi-3-mini-4k-instruct
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 1024
			
 
				-  - model_name: 'HuggingFace: Google: Gemma 7B 1.1'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/google/gemma-1.1-7b-it
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 1024
			
 
				-  - model_name: 'HuggingFace: Yi-1.5 34B Chat'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/01-ai/Yi-1.5-34B-Chat
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 1024
			
 
				-  - model_name: 'HuggingFace: Nous Research: Nous Hermes 2 Mixtral 8x7B DPO'
			
 
				-    litellm_params:
			
 
				-      model: huggingface/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
			
 
				-      api_key: os.environ/HF_TOKEN
			
 
				-      max_tokens: 2048
			
--- a/backend/start.sh
+++ b/backend/start.sh
@@ -34,11 +34,6 @@ fi
 
				 # Check if SPACE_ID is set, if so, configure for space
			
 
				 if [ -n "$SPACE_ID" ]; then
			
 
				   echo "Configuring for HuggingFace Space deployment"
			
 
				-  
			
 
				-  # Copy litellm_config.yaml with specified ownership
			
 
				-  echo "Copying litellm_config.yaml to the desired location with specified ownership..."
			
 
				-  cp -f ./space/litellm_config.yaml ./data/litellm/config.yaml
			
 
				-
			
 
				   if [ -n "$ADMIN_USER_EMAIL" ] && [ -n "$ADMIN_USER_PASSWORD" ]; then
			
 
				     echo "Admin user configured, creating"
			
 
				     WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" uvicorn main:app --host "$HOST" --port "$PORT" --forwarded-allow-ips '*' &
			
--- a/src/lib/apis/litellm/index.ts
+++ b/src/lib/apis/litellm/index.ts
@@ -1,151 +0,0 @@
 
				-import { LITELLM_API_BASE_URL } from '$lib/constants';
			
 
				-
			
 
				-export const getLiteLLMModels = async (token: string = '') => {
			
 
				-	let error = null;
			
 
				-
			
 
				-	const res = await fetch(`${LITELLM_API_BASE_URL}/v1/models`, {
			
 
				-		method: 'GET',
			
 
				-		headers: {
			
 
				-			Accept: 'application/json',
			
 
				-			'Content-Type': 'application/json',
			
 
				-			...(token && { authorization: `Bearer ${token}` })
			
 
				-		}
			
 
				-	})
			
 
				-		.then(async (res) => {
			
 
				-			if (!res.ok) throw await res.json();
			
 
				-			return res.json();
			
 
				-		})
			
 
				-		.catch((err) => {
			
 
				-			console.log(err);
			
 
				-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
			
 
				-			return [];
			
 
				-		});
			
 
				-
			
 
				-	if (error) {
			
 
				-		throw error;
			
 
				-	}
			
 
				-
			
 
				-	const models = Array.isArray(res) ? res : res?.data ?? null;
			
 
				-
			
 
				-	return models
			
 
				-		? models
			
 
				-				.map((model) => ({
			
 
				-					id: model.id,
			
 
				-					name: model.name ?? model.id,
			
 
				-					external: true,
			
 
				-					source: 'LiteLLM',
			
 
				-					custom_info: model.custom_info
			
 
				-				}))
			
 
				-				.sort((a, b) => {
			
 
				-					return a.name.localeCompare(b.name);
			
 
				-				})
			
 
				-		: models;
			
 
				-};
			
 
				-
			
 
				-export const getLiteLLMModelInfo = async (token: string = '') => {
			
 
				-	let error = null;
			
 
				-
			
 
				-	const res = await fetch(`${LITELLM_API_BASE_URL}/model/info`, {
			
 
				-		method: 'GET',
			
 
				-		headers: {
			
 
				-			Accept: 'application/json',
			
 
				-			'Content-Type': 'application/json',
			
 
				-			...(token && { authorization: `Bearer ${token}` })
			
 
				-		}
			
 
				-	})
			
 
				-		.then(async (res) => {
			
 
				-			if (!res.ok) throw await res.json();
			
 
				-			return res.json();
			
 
				-		})
			
 
				-		.catch((err) => {
			
 
				-			console.log(err);
			
 
				-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
			
 
				-			return [];
			
 
				-		});
			
 
				-
			
 
				-	if (error) {
			
 
				-		throw error;
			
 
				-	}
			
 
				-
			
 
				-	const models = Array.isArray(res) ? res : res?.data ?? null;
			
 
				-
			
 
				-	return models;
			
 
				-};
			
 
				-
			
 
				-type AddLiteLLMModelForm = {
			
 
				-	name: string;
			
 
				-	model: string;
			
 
				-	api_base: string;
			
 
				-	api_key: string;
			
 
				-	rpm: string;
			
 
				-	max_tokens: string;
			
 
				-};
			
 
				-
			
 
				-export const addLiteLLMModel = async (token: string = '', payload: AddLiteLLMModelForm) => {
			
 
				-	let error = null;
			
 
				-
			
 
				-	const res = await fetch(`${LITELLM_API_BASE_URL}/model/new`, {
			
 
				-		method: 'POST',
			
 
				-		headers: {
			
 
				-			Accept: 'application/json',
			
 
				-			'Content-Type': 'application/json',
			
 
				-			...(token && { authorization: `Bearer ${token}` })
			
 
				-		},
			
 
				-		body: JSON.stringify({
			
 
				-			model_name: payload.name,
			
 
				-			litellm_params: {
			
 
				-				model: payload.model,
			
 
				-				...(payload.api_base === '' ? {} : { api_base: payload.api_base }),
			
 
				-				...(payload.api_key === '' ? {} : { api_key: payload.api_key }),
			
 
				-				...(isNaN(parseInt(payload.rpm)) ? {} : { rpm: parseInt(payload.rpm) }),
			
 
				-				...(payload.max_tokens === '' ? {} : { max_tokens: payload.max_tokens })
			
 
				-			}
			
 
				-		})
			
 
				-	})
			
 
				-		.then(async (res) => {
			
 
				-			if (!res.ok) throw await res.json();
			
 
				-			return res.json();
			
 
				-		})
			
 
				-		.catch((err) => {
			
 
				-			console.log(err);
			
 
				-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
			
 
				-			return [];
			
 
				-		});
			
 
				-
			
 
				-	if (error) {
			
 
				-		throw error;
			
 
				-	}
			
 
				-
			
 
				-	return res;
			
 
				-};
			
 
				-
			
 
				-export const deleteLiteLLMModel = async (token: string = '', id: string) => {
			
 
				-	let error = null;
			
 
				-
			
 
				-	const res = await fetch(`${LITELLM_API_BASE_URL}/model/delete`, {
			
 
				-		method: 'POST',
			
 
				-		headers: {
			
 
				-			Accept: 'application/json',
			
 
				-			'Content-Type': 'application/json',
			
 
				-			...(token && { authorization: `Bearer ${token}` })
			
 
				-		},
			
 
				-		body: JSON.stringify({
			
 
				-			id: id
			
 
				-		})
			
 
				-	})
			
 
				-		.then(async (res) => {
			
 
				-			if (!res.ok) throw await res.json();
			
 
				-			return res.json();
			
 
				-		})
			
 
				-		.catch((err) => {
			
 
				-			console.log(err);
			
 
				-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
			
 
				-			return [];
			
 
				-		});
			
 
				-
			
 
				-	if (error) {
			
 
				-		throw error;
			
 
				-	}
			
 
				-
			
 
				-	return res;
			
 
				-};
			
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@@ -35,12 +35,7 @@
 
				 	import MessageInput from '$lib/components/chat/MessageInput.svelte';
			
 
				 	import Messages from '$lib/components/chat/Messages.svelte';
			
 
				 	import Navbar from '$lib/components/layout/Navbar.svelte';
			
 
				-	import {
			
 
				-		LITELLM_API_BASE_URL,
			
 
				-		OLLAMA_API_BASE_URL,
			
 
				-		OPENAI_API_BASE_URL,
			
 
				-		WEBUI_BASE_URL
			
 
				-	} from '$lib/constants';
			
 
				+	import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
			
 
				 	import { createOpenAITextStream } from '$lib/apis/streaming';
			
 
				 	import { queryMemory } from '$lib/apis/memories';
			
 
				 	import type { Writable } from 'svelte/store';
			
@@ -733,9 +728,7 @@
 
				 					docs: docs.length > 0 ? docs : undefined,
			
 
				 					citations: docs.length > 0
			
 
				 				},
			
 
				-				model?.source?.toLowerCase() === 'litellm'
			
 
				-					? `${LITELLM_API_BASE_URL}/v1`
			
 
				-					: `${OPENAI_API_BASE_URL}`
			
 
				+				`${OPENAI_API_BASE_URL}`
			
 
				 			);
			
 
				 
			
 
				 			// Wait until history/message have been updated
			
--- a/src/lib/components/workspace/Playground.svelte
+++ b/src/lib/components/workspace/Playground.svelte
@@ -5,12 +5,7 @@
 
				 
			
 
				 	import { toast } from 'svelte-sonner';
			
 
				 
			
 
				-	import {
			
 
				-		LITELLM_API_BASE_URL,
			
 
				-		OLLAMA_API_BASE_URL,
			
 
				-		OPENAI_API_BASE_URL,
			
 
				-		WEBUI_API_BASE_URL
			
 
				-	} from '$lib/constants';
			
 
				+	import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_API_BASE_URL } from '$lib/constants';
			
 
				 	import { WEBUI_NAME, config, user, models, settings } from '$lib/stores';
			
 
				 
			
 
				 	import { cancelOllamaRequest, generateChatCompletion } from '$lib/apis/ollama';
			
@@ -79,11 +74,7 @@
 
				 					}
			
 
				 				]
			
 
				 			},
			
 
				-			model.external
			
 
				-				? model.source === 'litellm'
			
 
				-					? `${LITELLM_API_BASE_URL}/v1`
			
 
				-					: `${OPENAI_API_BASE_URL}`
			
 
				-				: `${OLLAMA_API_BASE_URL}/v1`
			
 
				+			model?.owned_by === 'openai' ? `${OPENAI_API_BASE_URL}` : `${OLLAMA_API_BASE_URL}/v1`
			
 
				 		);
			
 
				 
			
 
				 		if (res && res.ok) {
			
@@ -150,11 +141,7 @@
 
				 					...messages
			
 
				 				].filter((message) => message)
			
 
				 			},
			
 
				-			model.external
			
 
				-				? model.source === 'litellm'
			
 
				-					? `${LITELLM_API_BASE_URL}/v1`
			
 
				-					: `${OPENAI_API_BASE_URL}`
			
 
				-				: `${OLLAMA_API_BASE_URL}/v1`
			
 
				+			model?.owned_by === 'openai' ? `${OPENAI_API_BASE_URL}` : `${OLLAMA_API_BASE_URL}/v1`
			
 
				 		);
			
 
				 
			
 
				 		let responseMessage;
			
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -6,7 +6,6 @@ export const WEBUI_BASE_URL = browser ? (dev ? `http://${location.hostname}:8080
 
				 
			
 
				 export const WEBUI_API_BASE_URL = `${WEBUI_BASE_URL}/api/v1`;
			
 
				 
			
 
				-export const LITELLM_API_BASE_URL = `${WEBUI_BASE_URL}/litellm/api`;
			
 
				 export const OLLAMA_API_BASE_URL = `${WEBUI_BASE_URL}/ollama`;
			
 
				 export const OPENAI_API_BASE_URL = `${WEBUI_BASE_URL}/openai`;
			
 
				 export const AUDIO_API_BASE_URL = `${WEBUI_BASE_URL}/audio/api/v1`;