10 months ago · 6266603b17
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -27,6 +27,11 @@ chat_completion = client.chat.completions.create(
 
				     ],
			
 
				     model='llama3',
			
 
				 )
			
 
				+
			
 
				+completion = client.completions.create(
			
 
				+    model="llama3",
			
 
				+    prompt="Say this is a test"
			
 
				+)
			
 
				 ```
			
 
				 
			
 
				 ### OpenAI JavaScript library
			
@@ -45,6 +50,11 @@ const chatCompletion = await openai.chat.completions.create({
 
				   messages: [{ role: 'user', content: 'Say this is a test' }],
			
 
				   model: 'llama3',
			
 
				 })
			
 
				+
			
 
				+const completion = await openai.completions.create({
			
 
				+    model: "llama3",
			
 
				+    prompt: "Say this is a test.",
			
 
				+})
			
 
				 ```
			
 
				 
			
 
				 ### `curl`
			
@@ -66,6 +76,12 @@ curl http://localhost:11434/v1/chat/completions \
 
				         ]
			
 
				     }'
			
 
				 
			
 
				+curl https://api.openai.com/v1/completions \
			
 
				+    -H "Content-Type: application/json" \
			
 
				+    -d '{
			
 
				+        "model": "llama3",
			
 
				+        "prompt": "Say this is a test"
			
 
				+    }'
			
 
				 ```
			
 
				 
			
 
				 ## Endpoints
			
@@ -107,6 +123,40 @@ curl http://localhost:11434/v1/chat/completions \
 
				 
			
 
				 - `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
			
 
				 
			
 
				+### `/v1/completions`
			
 
				+
			
 
				+#### Supported features
			
 
				+
			
 
				+- [x] Completions
			
 
				+- [x] Streaming
			
 
				+- [x] JSON mode
			
 
				+- [x] Reproducible outputs
			
 
				+- [ ] Logprobs
			
 
				+
			
 
				+#### Supported request fields
			
 
				+
			
 
				+- [x] `model`
			
 
				+- [x] `prompt`
			
 
				+- [x] `frequency_penalty`
			
 
				+- [x] `presence_penalty`
			
 
				+- [x] `seed`
			
 
				+- [x] `stop`
			
 
				+- [x] `stream`
			
 
				+- [x] `temperature`
			
 
				+- [x] `top_p`
			
 
				+- [x] `max_tokens`
			
 
				+- [ ] `best_of`
			
 
				+- [ ] `echo`
			
 
				+- [ ] `suffix`
			
 
				+- [ ] `logit_bias`
			
 
				+- [ ] `user`
			
 
				+- [ ] `n`
			
 
				+
			
 
				+#### Notes
			
 
				+
			
 
				+- `prompt` currently only accepts a string
			
 
				+- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
			
 
				+
			
 
				 ## Models
			
 
				 
			
 
				 Before using a model, pull it locally `ollama pull`:
			
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
 
				-Subproject commit a8db2a9ce64cd4417f6a312ab61858f17f0f8584
			
 
				+Subproject commit 7c26775adb579e92b59c82e8084c07a1d0f75e9c