|
@@ -1030,6 +1030,16 @@ async def generate_chat_completion(
|
|
if ":" not in payload["model"]:
|
|
if ":" not in payload["model"]:
|
|
payload["model"] = f"{payload['model']}:latest"
|
|
payload["model"] = f"{payload['model']}:latest"
|
|
|
|
|
|
|
|
+ await get_all_models(request)
|
|
|
|
+ model = request.app.state.OLLAMA_MODELS.get(model_id)
|
|
|
|
+ if model:
|
|
|
|
+ url_idx = model["urls"].pop()
|
|
|
|
+ else:
|
|
|
|
+ raise HTTPException(
|
|
|
|
+ status_code=404,
|
|
|
|
+ detail="Model not found",
|
|
|
|
+ )
|
|
|
|
+
|
|
url = await get_ollama_url(request, payload["model"], url_idx)
|
|
url = await get_ollama_url(request, payload["model"], url_idx)
|
|
api_config = request.app.state.config.OLLAMA_API_CONFIGS.get(
|
|
api_config = request.app.state.config.OLLAMA_API_CONFIGS.get(
|
|
str(url_idx),
|
|
str(url_idx),
|