2 months ago · 1ae702b8a6
--- a/backend/open_webui/env.py
+++ b/backend/open_webui/env.py
@@ -419,3 +419,25 @@ OFFLINE_MODE = os.environ.get("OFFLINE_MODE", "false").lower() == "true"
 
				 
			
 
				 if OFFLINE_MODE:
			
 
				     os.environ["HF_HUB_OFFLINE"] = "1"
			
 
				+
			
 
				+####################################
			
 
				+# AUDIT LOGGING
			
 
				+####################################
			
 
				+ENABLE_AUDIT_LOGS = os.getenv("ENABLE_AUDIT_LOGS", "false").lower() == "true"
			
 
				+# Where to store log file
			
 
				+AUDIT_LOGS_FILE_PATH = f"{DATA_DIR}/audit.log"
			
 
				+# Maximum size of a file before rotating into a new log file
			
 
				+AUDIT_LOG_FILE_ROTATION_SIZE = os.getenv("AUDIT_LOG_FILE_ROTATION_SIZE", "10MB")
			
 
				+# METADATA | REQUEST | REQUEST_RESPONSE
			
 
				+AUDIT_LOG_LEVEL = os.getenv("AUDIT_LOG_LEVEL", "REQUEST_RESPONSE").upper()
			
 
				+try:
			
 
				+    MAX_BODY_LOG_SIZE = int(os.environ.get("MAX_BODY_LOG_SIZE") or 2048)
			
 
				+except ValueError:
			
 
				+    MAX_BODY_LOG_SIZE = 2048
			
 
				+
			
 
				+# Comma separated list for urls to exclude from audit
			
 
				+AUDIT_EXCLUDED_PATHS = os.getenv("AUDIT_EXCLUDED_PATHS", "/chats,/chat,/folders").split(
			
 
				+    ","
			
 
				+)
			
 
				+AUDIT_EXCLUDED_PATHS = [path.strip() for path in AUDIT_EXCLUDED_PATHS]
			
 
				+AUDIT_EXCLUDED_PATHS = [path.lstrip("/") for path in AUDIT_EXCLUDED_PATHS]
			
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -45,6 +45,9 @@ from starlette.middleware.sessions import SessionMiddleware
 
				 from starlette.responses import Response, StreamingResponse
			
 
				 
			
 
				 
			
 
				+from open_webui.utils import logger
			
 
				+from open_webui.utils.audit import AuditLevel, AuditLoggingMiddleware
			
 
				+from open_webui.utils.logger import start_logger
			
 
				 from open_webui.socket.main import (
			
 
				     app as socket_app,
			
 
				     periodic_usage_pool_cleanup,
			
@@ -304,8 +307,11 @@ from open_webui.config import (
 
				     reset_config,
			
 
				 )
			
 
				 from open_webui.env import (
			
 
				+    AUDIT_EXCLUDED_PATHS,
			
 
				+    AUDIT_LOG_LEVEL,
			
 
				     CHANGELOG,
			
 
				     GLOBAL_LOG_LEVEL,
			
 
				+    MAX_BODY_LOG_SIZE,
			
 
				     SAFE_MODE,
			
 
				     SRC_LOG_LEVELS,
			
 
				     VERSION,
			
@@ -390,6 +396,7 @@ https://github.com/open-webui/open-webui
 
				 
			
 
				 @asynccontextmanager
			
 
				 async def lifespan(app: FastAPI):
			
 
				+    start_logger()
			
 
				     if RESET_CONFIG_ON_START:
			
 
				         reset_config()
			
 
				 
			
@@ -891,6 +898,19 @@ app.include_router(
 
				 app.include_router(utils.router, prefix="/api/v1/utils", tags=["utils"])
			
 
				 
			
 
				 
			
 
				+try:
			
 
				+    audit_level = AuditLevel(AUDIT_LOG_LEVEL)
			
 
				+except ValueError as e:
			
 
				+    logger.error(f"Invalid audit level: {AUDIT_LOG_LEVEL}. Error: {e}")
			
 
				+    audit_level = AuditLevel.NONE
			
 
				+
			
 
				+if audit_level != AuditLevel.NONE:
			
 
				+    app.add_middleware(
			
 
				+        AuditLoggingMiddleware,
			
 
				+        audit_level=audit_level,
			
 
				+        excluded_paths=AUDIT_EXCLUDED_PATHS,
			
 
				+        max_body_size=MAX_BODY_LOG_SIZE,
			
 
				+    )
			
 
				 ##################################
			
 
				 #
			
 
				 # Chat Endpoints
			
--- a/backend/open_webui/utils/audit.py
+++ b/backend/open_webui/utils/audit.py
@@ -0,0 +1,249 @@
 
				+from contextlib import asynccontextmanager
			
 
				+from dataclasses import asdict, dataclass
			
 
				+from enum import Enum
			
 
				+import re
			
 
				+from typing import (
			
 
				+    TYPE_CHECKING,
			
 
				+    Any,
			
 
				+    AsyncGenerator,
			
 
				+    Dict,
			
 
				+    MutableMapping,
			
 
				+    Optional,
			
 
				+    cast,
			
 
				+)
			
 
				+import uuid
			
 
				+
			
 
				+from asgiref.typing import (
			
 
				+    ASGI3Application,
			
 
				+    ASGIReceiveCallable,
			
 
				+    ASGIReceiveEvent,
			
 
				+    ASGISendCallable,
			
 
				+    ASGISendEvent,
			
 
				+    Scope as ASGIScope,
			
 
				+)
			
 
				+from loguru import logger
			
 
				+from starlette.requests import Request
			
 
				+
			
 
				+from open_webui.env import AUDIT_LOG_LEVEL, MAX_BODY_LOG_SIZE
			
 
				+from open_webui.utils.auth import get_current_user, get_http_authorization_cred
			
 
				+from open_webui.models.users import UserModel
			
 
				+
			
 
				+
			
 
				+if TYPE_CHECKING:
			
 
				+    from loguru import Logger
			
 
				+
			
 
				+
			
 
				+@dataclass(frozen=True)
			
 
				+class AuditLogEntry:
			
 
				+    # `Metadata` audit level properties
			
 
				+    id: str
			
 
				+    user: dict[str, Any]
			
 
				+    audit_level: str
			
 
				+    verb: str
			
 
				+    request_uri: str
			
 
				+    user_agent: Optional[str] = None
			
 
				+    source_ip: Optional[str] = None
			
 
				+    # `Request` audit level properties
			
 
				+    request_object: Any = None
			
 
				+    # `Request Response` level
			
 
				+    response_object: Any = None
			
 
				+    response_status_code: Optional[int] = None
			
 
				+
			
 
				+
			
 
				+class AuditLevel(str, Enum):
			
 
				+    NONE = "NONE"
			
 
				+    METADATA = "METADATA"
			
 
				+    REQUEST = "REQUEST"
			
 
				+    REQUEST_RESPONSE = "REQUEST_RESPONSE"
			
 
				+
			
 
				+
			
 
				+class AuditLogger:
			
 
				+    """
			
 
				+    A helper class that encapsulates audit logging functionality. It uses Loguru’s logger with an auditable binding to ensure that audit log entries are filtered correctly.
			
 
				+
			
 
				+    Parameters:
			
 
				+    logger (Logger): An instance of Loguru’s logger.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, logger: "Logger"):
			
 
				+        self.logger = logger.bind(auditable=True)
			
 
				+
			
 
				+    def write(
			
 
				+        self,
			
 
				+        audit_entry: AuditLogEntry,
			
 
				+        *,
			
 
				+        log_level: str = "INFO",
			
 
				+        extra: Optional[dict] = None,
			
 
				+    ):
			
 
				+
			
 
				+        entry = asdict(audit_entry)
			
 
				+
			
 
				+        if extra:
			
 
				+            entry["extra"] = extra
			
 
				+
			
 
				+        self.logger.log(
			
 
				+            log_level,
			
 
				+            "",
			
 
				+            **entry,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+class AuditContext:
			
 
				+    """
			
 
				+    Captures and aggregates the HTTP request and response bodies during the processing of a request. It ensures that only a configurable maximum amount of data is stored to prevent excessive memory usage.
			
 
				+
			
 
				+    Attributes:
			
 
				+    request_body (bytearray): Accumulated request payload.
			
 
				+    response_body (bytearray): Accumulated response payload.
			
 
				+    max_body_size (int): Maximum number of bytes to capture.
			
 
				+    metadata (Dict[str, Any]): A dictionary to store additional audit metadata (user, http verb, user agent, etc.).
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, max_body_size: int = MAX_BODY_LOG_SIZE):
			
 
				+        self.request_body = bytearray()
			
 
				+        self.response_body = bytearray()
			
 
				+        self.max_body_size = max_body_size
			
 
				+        self.metadata: Dict[str, Any] = {}
			
 
				+
			
 
				+    def add_request_chunk(self, chunk: bytes):
			
 
				+        if len(self.request_body) < self.max_body_size:
			
 
				+            self.request_body.extend(
			
 
				+                chunk[: self.max_body_size - len(self.request_body)]
			
 
				+            )
			
 
				+
			
 
				+    def add_response_chunk(self, chunk: bytes):
			
 
				+        if len(self.response_body) < self.max_body_size:
			
 
				+            self.response_body.extend(
			
 
				+                chunk[: self.max_body_size - len(self.response_body)]
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+class AuditLoggingMiddleware:
			
 
				+    """
			
 
				+    ASGI middleware that intercepts HTTP requests and responses to perform audit logging. It captures request/response bodies (depending on audit level), headers, HTTP methods, and user information, then logs a structured audit entry at the end of the request cycle.
			
 
				+    """
			
 
				+
			
 
				+    AUDITED_METHODS = {"PUT", "PATCH", "DELETE", "POST"}
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        app: ASGI3Application,
			
 
				+        *,
			
 
				+        excluded_paths: Optional[list[str]] = None,
			
 
				+        max_body_size: int = MAX_BODY_LOG_SIZE,
			
 
				+        audit_level: AuditLevel = AuditLevel.NONE,
			
 
				+    ) -> None:
			
 
				+        self.app = app
			
 
				+        self.audit_logger = AuditLogger(logger)
			
 
				+        self.excluded_paths = excluded_paths or []
			
 
				+        self.max_body_size = max_body_size
			
 
				+        self.audit_level = audit_level
			
 
				+
			
 
				+    async def __call__(
			
 
				+        self,
			
 
				+        scope: ASGIScope,
			
 
				+        receive: ASGIReceiveCallable,
			
 
				+        send: ASGISendCallable,
			
 
				+    ) -> None:
			
 
				+        if scope["type"] != "http":
			
 
				+            return await self.app(scope, receive, send)
			
 
				+
			
 
				+        request = Request(scope=cast(MutableMapping, scope))
			
 
				+
			
 
				+        if self._should_skip_auditing(request):
			
 
				+            return await self.app(scope, receive, send)
			
 
				+
			
 
				+        async with self._audit_context(request) as context:
			
 
				+
			
 
				+            async def send_wrapper(message: ASGISendEvent) -> None:
			
 
				+                if self.audit_level == AuditLevel.REQUEST_RESPONSE:
			
 
				+                    await self._capture_response(message, context)
			
 
				+
			
 
				+                await send(message)
			
 
				+
			
 
				+            original_receive = receive
			
 
				+
			
 
				+            async def receive_wrapper() -> ASGIReceiveEvent:
			
 
				+                nonlocal original_receive
			
 
				+                message = await original_receive()
			
 
				+
			
 
				+                if self.audit_level in (
			
 
				+                    AuditLevel.REQUEST,
			
 
				+                    AuditLevel.REQUEST_RESPONSE,
			
 
				+                ):
			
 
				+                    await self._capture_request(message, context)
			
 
				+
			
 
				+                return message
			
 
				+
			
 
				+            await self.app(scope, receive_wrapper, send_wrapper)
			
 
				+
			
 
				+    @asynccontextmanager
			
 
				+    async def _audit_context(
			
 
				+        self, request: Request
			
 
				+    ) -> AsyncGenerator[AuditContext, None]:
			
 
				+        """
			
 
				+        async context manager that ensures that an audit log entry is recorded after the request is processed.
			
 
				+        """
			
 
				+        context = AuditContext()
			
 
				+        try:
			
 
				+            yield context
			
 
				+        finally:
			
 
				+            await self._log_audit_entry(request, context)
			
 
				+
			
 
				+    async def _get_authenticated_user(self, request: Request) -> UserModel:
			
 
				+
			
 
				+        auth_header = request.headers.get("Authorization")
			
 
				+        assert auth_header
			
 
				+        user = get_current_user(request, get_http_authorization_cred(auth_header))
			
 
				+
			
 
				+        return user
			
 
				+
			
 
				+    def _should_skip_auditing(self, request: Request) -> bool:
			
 
				+        if (
			
 
				+            request.method not in {"POST", "PUT", "PATCH", "DELETE"}
			
 
				+            or AUDIT_LOG_LEVEL == "NONE"
			
 
				+            or not request.headers.get("authorization")
			
 
				+        ):
			
 
				+            return True
			
 
				+        # match either /api/<resource>/...(for the endpoint /api/chat case) or /api/v1/<resource>/...
			
 
				+        pattern = re.compile(
			
 
				+            r"^/api(?:/v1)?/(" + "|".join(self.excluded_paths) + r")\b"
			
 
				+        )
			
 
				+        if pattern.match(request.url.path):
			
 
				+            return True
			
 
				+
			
 
				+        return False
			
 
				+
			
 
				+    async def _capture_request(self, message: ASGIReceiveEvent, context: AuditContext):
			
 
				+        if message["type"] == "http.request":
			
 
				+            body = message.get("body", b"")
			
 
				+            context.add_request_chunk(body)
			
 
				+
			
 
				+    async def _capture_response(self, message: ASGISendEvent, context: AuditContext):
			
 
				+        if message["type"] == "http.response.start":
			
 
				+            context.metadata["response_status_code"] = message["status"]
			
 
				+
			
 
				+        elif message["type"] == "http.response.body":
			
 
				+            body = message.get("body", b"")
			
 
				+            context.add_response_chunk(body)
			
 
				+
			
 
				+    async def _log_audit_entry(self, request: Request, context: AuditContext):
			
 
				+        try:
			
 
				+            user = await self._get_authenticated_user(request)
			
 
				+
			
 
				+            entry = AuditLogEntry(
			
 
				+                id=str(uuid.uuid4()),
			
 
				+                user=user.model_dump(include={"id", "name", "email", "role"}),
			
 
				+                audit_level=self.audit_level.value,
			
 
				+                verb=request.method,
			
 
				+                request_uri=str(request.url),
			
 
				+                response_status_code=context.metadata.get("response_status_code", None),
			
 
				+                source_ip=request.client.host if request.client else None,
			
 
				+                user_agent=request.headers.get("user-agent"),
			
 
				+                request_object=context.request_body.decode("utf-8", errors="replace"),
			
 
				+                response_object=context.response_body.decode("utf-8", errors="replace"),
			
 
				+            )
			
 
				+
			
 
				+            self.audit_logger.write(entry)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"Failed to log audit entry: {str(e)}")
			
--- a/backend/open_webui/utils/logger.py
+++ b/backend/open_webui/utils/logger.py
@@ -0,0 +1,140 @@
 
				+import json
			
 
				+import logging
			
 
				+import sys
			
 
				+from typing import TYPE_CHECKING
			
 
				+
			
 
				+from loguru import logger
			
 
				+
			
 
				+from open_webui.env import (
			
 
				+    AUDIT_LOG_FILE_ROTATION_SIZE,
			
 
				+    AUDIT_LOG_LEVEL,
			
 
				+    AUDIT_LOGS_FILE_PATH,
			
 
				+    GLOBAL_LOG_LEVEL,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+if TYPE_CHECKING:
			
 
				+    from loguru import Record
			
 
				+
			
 
				+
			
 
				+def stdout_format(record: "Record") -> str:
			
 
				+    """
			
 
				+    Generates a formatted string for log records that are output to the console. This format includes a timestamp, log level, source location (module, function, and line), the log message, and any extra data (serialized as JSON).
			
 
				+
			
 
				+    Parameters:
			
 
				+    record (Record): A Loguru record that contains logging details including time, level, name, function, line, message, and any extra context.
			
 
				+    Returns:
			
 
				+    str: A formatted log string intended for stdout.
			
 
				+    """
			
 
				+    record["extra"]["extra_json"] = json.dumps(record["extra"])
			
 
				+    return (
			
 
				+        "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
			
 
				+        "<level>{level: <8}</level> | "
			
 
				+        "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
			
 
				+        "<level>{message}</level> - {extra[extra_json]}"
			
 
				+        "\n{exception}"
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+class InterceptHandler(logging.Handler):
			
 
				+    """
			
 
				+    Intercepts log records from Python's standard logging module
			
 
				+    and redirects them to Loguru's logger.
			
 
				+    """
			
 
				+
			
 
				+    def emit(self, record):
			
 
				+        """
			
 
				+        Called by the standard logging module for each log event.
			
 
				+        It transforms the standard `LogRecord` into a format compatible with Loguru
			
 
				+        and passes it to Loguru's logger.
			
 
				+        """
			
 
				+        try:
			
 
				+            level = logger.level(record.levelname).name
			
 
				+        except ValueError:
			
 
				+            level = record.levelno
			
 
				+
			
 
				+        frame, depth = sys._getframe(6), 6
			
 
				+        while frame and frame.f_code.co_filename == logging.__file__:
			
 
				+            frame = frame.f_back
			
 
				+            depth += 1
			
 
				+
			
 
				+        logger.opt(depth=depth, exception=record.exc_info).log(
			
 
				+            level, record.getMessage()
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def file_format(record: "Record"):
			
 
				+    """
			
 
				+    Formats audit log records into a structured JSON string for file output.
			
 
				+
			
 
				+    Parameters:
			
 
				+    record (Record): A Loguru record containing extra audit data.
			
 
				+    Returns:
			
 
				+    str: A JSON-formatted string representing the audit data.
			
 
				+    """
			
 
				+
			
 
				+    audit_data = {
			
 
				+        "id": record["extra"].get("id", ""),
			
 
				+        "timestamp": int(record["time"].timestamp()),
			
 
				+        "user": record["extra"].get("user", dict()),
			
 
				+        "audit_level": record["extra"].get("audit_level", ""),
			
 
				+        "verb": record["extra"].get("verb", ""),
			
 
				+        "request_uri": record["extra"].get("request_uri", ""),
			
 
				+        "response_status_code": record["extra"].get("response_status_code", 0),
			
 
				+        "source_ip": record["extra"].get("source_ip", ""),
			
 
				+        "user_agent": record["extra"].get("user_agent", ""),
			
 
				+        "request_object": record["extra"].get("request_object", b""),
			
 
				+        "response_object": record["extra"].get("response_object", b""),
			
 
				+        "extra": record["extra"].get("extra", {}),
			
 
				+    }
			
 
				+
			
 
				+    record["extra"]["file_extra"] = json.dumps(audit_data, default=str)
			
 
				+    return "{extra[file_extra]}\n"
			
 
				+
			
 
				+
			
 
				+def start_logger():
			
 
				+    """
			
 
				+    Initializes and configures Loguru's logger with distinct handlers:
			
 
				+
			
 
				+    A console (stdout) handler for general log messages (excluding those marked as auditable).
			
 
				+    An optional file handler for audit logs if audit logging is enabled.
			
 
				+    Additionally, this function reconfigures Python’s standard logging to route through Loguru and adjusts logging levels for Uvicorn.
			
 
				+
			
 
				+    Parameters:
			
 
				+    enable_audit_logging (bool): Determines whether audit-specific log entries should be recorded to file.
			
 
				+    """
			
 
				+    logger.remove()
			
 
				+
			
 
				+    logger.add(
			
 
				+        sys.stdout,
			
 
				+        level=GLOBAL_LOG_LEVEL,
			
 
				+        format=stdout_format,
			
 
				+        filter=lambda record: "auditable" not in record["extra"],
			
 
				+    )
			
 
				+
			
 
				+    if AUDIT_LOG_LEVEL != "NONE":
			
 
				+        try:
			
 
				+            logger.add(
			
 
				+                AUDIT_LOGS_FILE_PATH,
			
 
				+                level="INFO",
			
 
				+                rotation=AUDIT_LOG_FILE_ROTATION_SIZE,
			
 
				+                compression="zip",
			
 
				+                format=file_format,
			
 
				+                filter=lambda record: record["extra"].get("auditable") is True,
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"Failed to initialize audit log file handler: {str(e)}")
			
 
				+
			
 
				+    logging.basicConfig(
			
 
				+        handlers=[InterceptHandler()], level=GLOBAL_LOG_LEVEL, force=True
			
 
				+    )
			
 
				+    for uvicorn_logger_name in ["uvicorn", "uvicorn.error"]:
			
 
				+        uvicorn_logger = logging.getLogger(uvicorn_logger_name)
			
 
				+        uvicorn_logger.setLevel(GLOBAL_LOG_LEVEL)
			
 
				+        uvicorn_logger.handlers = []
			
 
				+    for uvicorn_logger_name in ["uvicorn.access"]:
			
 
				+        uvicorn_logger = logging.getLogger(uvicorn_logger_name)
			
 
				+        uvicorn_logger.setLevel(GLOBAL_LOG_LEVEL)
			
 
				+        uvicorn_logger.handlers = [InterceptHandler()]
			
 
				+
			
 
				+    logger.info(f"GLOBAL_LOG_LEVEL: {GLOBAL_LOG_LEVEL}")
			
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -31,6 +31,9 @@ APScheduler==3.10.4
 
				 
			
 
				 RestrictedPython==8.0
			
 
				 
			
 
				+loguru==0.7.2
			
 
				+asgiref==3.8.1
			
 
				+
			
 
				 # AI libraries
			
 
				 openai
			
 
				 anthropic
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,9 @@ dependencies = [
 
				 
			
 
				     "RestrictedPython==8.0",
			
 
				 
			
 
				+    "loguru==0.7.2",
			
 
				+    "asgiref==3.8.1",
			
 
				+
			
 
				     "openai",
			
 
				     "anthropic",
			
 
				     "google-generativeai==0.7.2",