Browse Source

Introduce docker-compose.playwright.yaml + run-compose update

Rory 2 months ago
parent
commit
c3df481b22
3 changed files with 20 additions and 1 deletions
  1. 1 1
      backend/requirements.txt
  2. 10 0
      docker-compose.playwright.yaml
  3. 9 0
      run-compose.sh

+ 1 - 1
backend/requirements.txt

@@ -46,7 +46,7 @@ chromadb==0.6.2
 pymilvus==2.5.0
 qdrant-client~=1.12.0
 opensearch-py==2.7.1
-playwright==1.49.1
+playwright==1.49.1 # Caution: version must match docker-compose.playwright.yaml
 
 transformers
 sentence-transformers==3.3.1

+ 10 - 0
docker-compose.playwright.yaml

@@ -0,0 +1,10 @@
+services:
+  playwright:
+    image: mcr.microsoft.com/playwright:v1.49.1-noble # Version must match requirements.txt
+    container_name: playwright
+    command: npx -y playwright@1.49.1 run-server --port 3000 --host 0.0.0.0
+
+  open-webui:
+    environment:
+      - 'RAG_WEB_LOADER=playwright'
+      - 'PLAYWRIGHT_WS_URI=ws://playwright:3000'

+ 9 - 0
run-compose.sh

@@ -74,6 +74,7 @@ usage() {
     echo "  --enable-api[port=PORT]    Enable API and expose it on the specified port."
     echo "  --webui[port=PORT]         Set the port for the web user interface."
     echo "  --data[folder=PATH]        Bind mount for ollama data folder (by default will create the 'ollama' volume)."
+    echo "  --playwright               Enable Playwright support for web scraping."
     echo "  --build                    Build the docker image before running the compose project."
     echo "  --drop                     Drop the compose project."
     echo "  -q, --quiet                Run script in headless mode."
@@ -100,6 +101,7 @@ webui_port=3000
 headless=false
 build_image=false
 kill_compose=false
+enable_playwright=false
 
 # Function to extract value from the parameter
 extract_value() {
@@ -129,6 +131,9 @@ while [[ $# -gt 0 ]]; do
             value=$(extract_value "$key")
             data_dir=${value:-"./ollama-data"}
             ;;
+        --playwright)
+            enable_playwright=true
+            ;;
         --drop)
             kill_compose=true
             ;;
@@ -182,6 +187,9 @@ else
         DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml"
         export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable
     fi
+    if [[ $enable_playwright == true ]]; then
+        DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.playwright.yaml"
+    fi
     if [[ -n $webui_port ]]; then
         export OPEN_WEBUI_PORT=$webui_port # Set OPEN_WEBUI_PORT environment variable
     fi
@@ -201,6 +209,7 @@ echo -e "   ${GREEN}${BOLD}GPU Count:${NC} ${OLLAMA_GPU_COUNT:-Not Enabled}"
 echo -e "   ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}"
 echo -e "   ${GREEN}${BOLD}Data Folder:${NC} ${data_dir:-Using ollama volume}"
 echo -e "   ${GREEN}${BOLD}WebUI Port:${NC} $webui_port"
+echo -e "   ${GREEN}${BOLD}Playwright:${NC} ${enable_playwright:-false}"
 echo
 
 if [[ $headless == true ]]; then