2 months ago · 61a5254115
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -204,31 +204,31 @@ curl http://localhost:11434/v1/embeddings \
 
				     }'
			
 
				 ```
			
 
				 
			
 
				-## Extra Arguments
			
 
				+## Extra arguments
			
 
				 
			
 
				-### Setting Context Window Size
			
 
				-- `num_ctx` parameter can be used to set the context window for the model
			
 
				+### Setting context window size
			
 
				+- `context_window` parameter can be used to set the context window for the model
			
 
				 
			
 
				-#### OpenAI Python SDK
			
 
				-- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
			
 
				+#### OpenAI python library 
			
 
				+- OpenAI python library does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
			
 
				 
			
 
				 ```py
			
 
				-completion = client.beta.chat.completions.create(
			
 
				+completion = client.chat.completions.create(
			
 
				     model="llama3.1:8b",
			
 
				     messages=[{"role": "user", "content": "Say this is a test"}],
			
 
				-    extra_body={"num_ctx": 4096},
			
 
				+    extra_body={"context_window": 4096},
			
 
				 )
			
 
				 ```
			
 
				 
			
 
				-#### OpenAI JS SDK
			
 
				-- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
			
 
				+#### OpenAI JavaScript library
			
 
				+- OpenAI JavaScript library does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the OpenAI JavaScript library. [See documentation here](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
			
 
				 
			
 
				 ```ts
			
 
				 const chatCompletion = await openai.chat.completions.create({
			
 
				     messages: [{ role: 'user', content: 'Say this is a test' }],
			
 
				     model: 'llama3.2',
			
 
				-    // @ts-expect-error num_ctx is not officially supported
			
 
				-    num_ctx: 4096,
			
 
				+    // @ts-expect-error context_window is an additional parameter 
			
 
				+    context_window: 4096,
			
 
				 })
			
 
				 ```
			
 
				 
			
@@ -239,7 +239,7 @@ curl http://localhost:11434/v1/chat/completions \
 
				     -d '{
			
 
				         "model": "llama3.2",
			
 
				         "messages": [{"role": "user", "content": "Say this is a test"}],
			
 
				-        "num_ctx": 4096
			
 
				+        "context_window": 4096
			
 
				     }'
			
 
				 ```
			
 
				 
			
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -86,7 +86,7 @@ type ChatCompletionRequest struct {
 
				 	StreamOptions       *StreamOptions `json:"stream_options"`
			
 
				 	MaxCompletionTokens *int           `json:"max_completion_tokens"`
			
 
				 	// Deprecated: Use [ChatCompletionRequest.MaxCompletionTokens]
			
 
				-	MaxTokens        *int            `json:"max_tokens" deprecated:"use max_completion_tokens instead"`
			
 
				+	MaxTokens        *int            `json:"max_tokens"`
			
 
				 	Seed             *int            `json:"seed"`
			
 
				 	Stop             any             `json:"stop"`
			
 
				 	Temperature      *float64        `json:"temperature"`
			
@@ -95,7 +95,7 @@ type ChatCompletionRequest struct {
 
				 	TopP             *float64        `json:"top_p"`
			
 
				 	ResponseFormat   *ResponseFormat `json:"response_format"`
			
 
				 	Tools            []api.Tool      `json:"tools"`
			
 
				-	NumCtx           *int            `json:"num_ctx"`
			
 
				+	ContextWindow    *int            `json:"context_window"`
			
 
				 }
			
 
				 
			
 
				 type ChatCompletion struct {
			
@@ -478,8 +478,9 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 
				 		options["stop"] = stops
			
 
				 	}
			
 
				 
			
 
				-	if r.NumCtx != nil {
			
 
				-		options["num_ctx"] = *r.NumCtx
			
 
				+	if r.ContextWindow != nil {
			
 
				+		slog.Info("context_window in if", "context_window", *r.ContextWindow)
			
 
				+		options["num_ctx"] = *r.ContextWindow
			
 
				 	}
			
 
				 
			
 
				 	// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
			
@@ -974,6 +975,7 @@ func ChatMiddleware() gin.HandlerFunc {
 
				 			c.AbortWithStatusJSON(http.StatusBadRequest, NewError(http.StatusBadRequest, err.Error()))
			
 
				 			return
			
 
				 		}
			
 
				+		slog.Info("num_ctx", "num_ctx", chatReq.Options["num_ctx"])
			
 
				 
			
 
				 		if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
			
 
				 			c.AbortWithStatusJSON(http.StatusInternalServerError, NewError(http.StatusInternalServerError, err.Error()))
			
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@@ -315,11 +315,11 @@ func TestChatMiddleware(t *testing.T) {
 
				 			},
			
 
				 		},
			
 
				 		{
			
 
				-			name: "chat handler with num_ctx",
			
 
				+			name: "chat handler with context_window",
			
 
				 			body: `{
			
 
				 				"model": "test-model",
			
 
				 				"messages": [{"role": "user", "content": "Hello"}],
			
 
				-				"num_ctx": 4096 
			
 
				+				"context_window": 4096 
			
 
				 			}`,
			
 
				 			req: api.ChatRequest{
			
 
				 				Model:    "test-model",