4 months ago · abfdc4710f
--- a/README.md
+++ b/README.md
@@ -357,7 +357,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 - [OpenTalkGpt](https://github.com/adarshM84/OpenTalkGpt) (Chrome Extension to manage open-source models supported by Ollama, create custom models, and chat with models from a user-friendly UI)
			
 
				 - [VT](https://github.com/vinhnx/vt.ai) (A minimal multimodal AI chat app, with dynamic conversation routing. Supports local models via Ollama)
			
 
				 - [Nosia](https://github.com/nosia-ai/nosia) (Easy to install and use RAG platform based on Ollama)
			
 
				-- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application avaiable for Mac/Windows/Linux)
			
 
				+- [Witsy](https://github.com/nbonamy/witsy) (An AI Desktop application available for Mac/Windows/Linux)
			
 
				 - [Abbey](https://github.com/US-Artificial-Intelligence/abbey) (A configurable AI interface server with notebooks, document storage, and YouTube support)
			
 
				 - [Minima](https://github.com/dmayboroda/minima) (RAG with on-premises or fully local workflow)
			
 
				 
			
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@@ -98,7 +98,7 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui
 
				 		}
			
 
				 		err = t.wcex.unregister()
			
 
				 		if err != nil {
			
 
				-			slog.Error(fmt.Sprintf("failed to uregister windo %s", err))
			
 
				+			slog.Error(fmt.Sprintf("failed to unregister window %s", err))
			
 
				 		}
			
 
				 	case WM_DESTROY:
			
 
				 		// same as WM_ENDSESSION, but throws 0 exit code after all
			
--- a/convert/sentencepiece/sentencepiece_model.pb.go
+++ b/convert/sentencepiece/sentencepiece_model.pb.go
@@ -331,7 +331,7 @@ type TrainerSpec struct {
 
				 	// Reserved special meta tokens.
			
 
				 	// * -1 is not used.
			
 
				 	// * unk_id must not be -1.
			
 
				-	// Id must starts with 0 and be contigous.
			
 
				+	// Id must start with 0 and be contiguous.
			
 
				 	UnkId    *int32  `protobuf:"varint,40,opt,name=unk_id,json=unkId,def=0" json:"unk_id,omitempty"`  // <unk>
			
 
				 	BosId    *int32  `protobuf:"varint,41,opt,name=bos_id,json=bosId,def=1" json:"bos_id,omitempty"`  // <s>
			
 
				 	EosId    *int32  `protobuf:"varint,42,opt,name=eos_id,json=eosId,def=2" json:"eos_id,omitempty"`  // </s>
			
--- a/convert/sentencepiece_model.proto
+++ b/convert/sentencepiece_model.proto
@@ -213,7 +213,7 @@ message TrainerSpec {
 
				   // Reserved special meta tokens.
			
 
				   // * -1 is not used.
			
 
				   // * unk_id must not be -1.
			
 
				-  // Id must starts with 0 and be contigous.
			
 
				+  // Id must start with 0 and be contiguous.
			
 
				   optional int32 unk_id = 40 [default = 0];   // <unk>
			
 
				   optional int32 bos_id = 41 [default = 1];   // <s>
			
 
				   optional int32 eos_id = 42 [default = 2];   // </s>
			
--- a/discover/gpu_windows.go
+++ b/discover/gpu_windows.go
@@ -209,7 +209,7 @@ func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	// Sumarize the results
			
 
				+	// Summarize the results
			
 
				 	for i, pkg := range packages {
			
 
				 		slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
			
 
				 	}
			
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -80,7 +80,7 @@ If you are using a container to run Ollama, make sure you've set up the containe
 
				 
			
 
				 Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
			
 
				 
			
 
				-- If you are using a container, is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
			
 
				+- If you are using a container, is the container runtime working?  Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama won't be able to see your NVIDIA GPU.
			
 
				 - Is the uvm driver loaded? `sudo nvidia-modprobe -u`
			
 
				 - Try reloading the nvidia_uvm driver - `sudo rmmod nvidia_uvm` then `sudo modprobe nvidia_uvm`
			
 
				 - Try rebooting
			
--- a/examples/python-grounded-factuality-rag-check/README.md
+++ b/examples/python-grounded-factuality-rag-check/README.md
@@ -1,6 +1,6 @@
 
				 # RAG Hallucination Checker using Bespoke-Minicheck
			
 
				 
			
 
				-This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. 
			
 
				+This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retrieved from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations.
			
 
				 
			
 
				 ## Running the Example
			
 
				 
			
--- a/examples/python-grounded-factuality-rag-check/main.py
+++ b/examples/python-grounded-factuality-rag-check/main.py
@@ -115,7 +115,7 @@ if __name__ == "__main__":
 
				 
			
 
				         print(f"\nRetrieved chunks: \n{sourcetext}\n")
			
 
				 
			
 
				-        # Give the retreived chunks and question to the chat model
			
 
				+        # Give the retrieved chunks and question to the chat model
			
 
				         system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
			
 
				 
			
 
				         ollama_response = ollama.generate(
			
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -207,7 +207,7 @@ func TestMultiModelStress(t *testing.T) {
 
				 		chosenModels = mediumModels
			
 
				 		// default:
			
 
				 		// 	slog.Info("selecting large models")
			
 
				-		// 	chosenModels = largModels
			
 
				+		// 	chosenModels = largeModels
			
 
				 	}
			
 
				 
			
 
				 	req, resp := GenerateRequests()
			
@@ -232,7 +232,7 @@ func TestMultiModelStress(t *testing.T) {
 
				 	var wg sync.WaitGroup
			
 
				 	consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
			
 
				 	for i := 0; i < len(req); i++ {
			
 
				-		// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
			
 
				+		// Always get at least 2 models, but don't overshoot VRAM too much or we'll take too long
			
 
				 		if i > 1 && consumed > maxVram {
			
 
				 			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
			
 
				 			break
			
--- a/integration/max_queue_test.go
+++ b/integration/max_queue_test.go
@@ -20,7 +20,7 @@ import (
 
				 
			
 
				 func TestMaxQueue(t *testing.T) {
			
 
				 	if os.Getenv("OLLAMA_TEST_EXISTING") != "" {
			
 
				-		t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size")
			
 
				+		t.Skip("Max Queue test requires spawning a local server so we can adjust the queue size")
			
 
				 		return
			
 
				 	}
			
 
				 
			
@@ -67,7 +67,7 @@ func TestMaxQueue(t *testing.T) {
 
				 	busyCount := 0
			
 
				 	resetByPeerCount := 0
			
 
				 	canceledCount := 0
			
 
				-	succesCount := 0
			
 
				+	successCount := 0
			
 
				 	counterMu := sync.Mutex{}
			
 
				 	var embedwg sync.WaitGroup
			
 
				 	for i := 0; i < threadCount; i++ {
			
@@ -88,7 +88,7 @@ func TestMaxQueue(t *testing.T) {
 
				 			defer counterMu.Unlock()
			
 
				 			switch {
			
 
				 			case genErr == nil:
			
 
				-				succesCount++
			
 
				+				successCount++
			
 
				 				require.Greater(t, len(resp.Embedding), 5) // somewhat arbitrary, but sufficient to be reasonable
			
 
				 			case errors.Is(genErr, context.Canceled):
			
 
				 				canceledCount++
			
@@ -107,7 +107,7 @@ func TestMaxQueue(t *testing.T) {
 
				 	slog.Info("generate done, waiting for embeds")
			
 
				 	embedwg.Wait()
			
 
				 
			
 
				-	slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
			
 
				+	slog.Info("embeds completed", "success", successCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
			
 
				 	require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?")
			
 
				 	require.True(t, busyCount > 0, "no requests hit busy error but some should have")
			
 
				 	require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout")
			
--- a/llm/memory_test.go
+++ b/llm/memory_test.go
@@ -71,7 +71,7 @@ func TestEstimateGPULayers(t *testing.T) {
 
				 	projectorSize := uint64(0)
			
 
				 	memoryLayerOutput := uint64(4)
			
 
				 
			
 
				-	// Dual CUDA scenario with assymetry
			
 
				+	// Dual CUDA scenario with asymmetry
			
 
				 	gpuMinimumMemory := uint64(2048)
			
 
				 	gpus = []discover.GpuInfo{
			
 
				 		{
			
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -568,7 +568,7 @@ PARAMETER param1 value1
 
				 PARAMETER param2 value2
			
 
				 TEMPLATE template1
			
 
				 MESSAGE system """
			
 
				-You are a store greeter. Always responsed with "Hello!".
			
 
				+You are a store greeter. Always respond with "Hello!".
			
 
				 """
			
 
				 MESSAGE user Hey there!
			
 
				 MESSAGE assistant Hello, I want to parse all the things!
			
@@ -586,7 +586,7 @@ PARAMETER param1 value1
 
				 PARAMETER param2 value2
			
 
				 TEMPLATE template1
			
 
				 MESSAGE system """
			
 
				-You are a store greeter. Always responsed with "Hello!".
			
 
				+You are a store greeter. Always respond with "Hello!".
			
 
				 """
			
 
				 MESSAGE user Hey there!
			
 
				 MESSAGE assistant Hello, I want to parse all the things!
			
--- a/server/sched.go
+++ b/server/sched.go
@@ -54,7 +54,7 @@ type Scheduler struct {
 
				 var defaultModelsPerGPU = 3
			
 
				 
			
 
				 // Default automatic value for parallel setting
			
 
				-// Model will still need to fit in VRAM.  If this setting wont fit
			
 
				+// Model will still need to fit in VRAM.  If this setting won't fit
			
 
				 // we'll back off down to 1 to try to get it to fit
			
 
				 var defaultParallel = 4
			
 
				 
			
@@ -501,7 +501,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) {
 
				 			} else if (allGpus[i].TotalMemory - p) < allGpus[i].FreeMemory { // predicted free is smaller than reported free, use it
			
 
				 				// TODO maybe we should just always trust our numbers, since cuda's free memory reporting is laggy
			
 
				 				// and we might unload models we didn't actually need to.  The risk is if some other GPU intensive app is loaded
			
 
				-				// after we start our first runner, then we'll never acount for that, so picking the smallest free value seems prudent.
			
 
				+				// after we start our first runner, then we'll never account for that, so picking the smallest free value seems prudent.
			
 
				 				allGpus[i].FreeMemory = allGpus[i].TotalMemory - p
			
 
				 			}
			
 
				 			slog.Info("updated VRAM based on existing loaded models", "gpu", allGpus[i].ID, "library", allGpus[i].Library, "total", format.HumanBytes2(allGpus[i].TotalMemory), "available", format.HumanBytes2(allGpus[i].FreeMemory))
			
@@ -683,7 +683,7 @@ func (a ByDuration) Less(i, j int) bool {
 
				 // pickBestFullFitByLibrary will try to find the optimal placement of the model in the available GPUs where the model fully fits
			
 
				 // The list of GPUs returned will always be the same brand (library)
			
 
				 // If the model can not be fit fully within the available GPU(s) nil is returned
			
 
				-// If numParallel is <= 0, this will attempt try to optimize parallism based on available VRAM, and adjust
			
 
				+// If numParallel is <= 0, this will attempt try to optimize parallelism based on available VRAM, and adjust
			
 
				 // opts.NumCtx accordingly
			
 
				 func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus discover.GpuInfoList, numParallel *int) discover.GpuInfoList {
			
 
				 	var estimatedVRAM uint64
			
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -325,7 +325,7 @@ func TestRequestsMultipleLoadedModels(t *testing.T) {
 
				 	require.Len(t, s.loaded, 3)
			
 
				 	s.loadedMu.Unlock()
			
 
				 
			
 
				-	// Try to load a model that wont fit
			
 
				+	// Try to load a model that won't fit
			
 
				 	s.newServerFn = d.newServer
			
 
				 	slog.Info("d")
			
 
				 	s.loadedMu.Lock()
			
@@ -394,7 +394,7 @@ func TestGetRunner(t *testing.T) {
 
				 	c.req.model.ModelPath = "bad path"
			
 
				 	slog.Info("c")
			
 
				 	successCh1c, errCh1c := s.GetRunner(c.ctx, c.req.model, c.req.opts, c.req.sessionDuration)
			
 
				-	// Starts in pending channel, then should be quickly processsed to return an error
			
 
				+	// Starts in pending channel, then should be quickly processed to return an error
			
 
				 	time.Sleep(50 * time.Millisecond) // Long enough for the "a" model to expire and unload
			
 
				 	require.Empty(t, successCh1c)
			
 
				 	s.loadedMu.Lock()
			
--- a/server/testdata/tools/command-r-plus.out
+++ b/server/testdata/tools/command-r-plus.out
@@ -6,7 +6,7 @@ The instructions in this section override those in the task description and styl
 
				 You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
			
 
				 
			
 
				 # User Preamble
			
 
				-You are a knowledgable assistant. You can answer questions and perform tasks.
			
 
				+You are a knowledgeable assistant. You can answer questions and perform tasks.
			
 
				 
			
 
				 ## Available Tools
			
 
				 Here is a list of tools that you have available to you:
			
@@ -16,7 +16,7 @@ def get_current_weather(format: string, location: string, ) -> List[Dict]:
 
				     """Get the current weather
			
 
				 
			
 
				     Args:
			
 
				-        format (string): The temperature unit to use. Infer this from the users location.
			
 
				+        format (string): The temperature unit to use. Infer this from the user's location.
			
 
				         location (string): The city and state, e.g. San Francisco, CA
			
 
				     """
			
 
				     pass
			
--- a/server/testdata/tools/firefunction.out
+++ b/server/testdata/tools/firefunction.out
@@ -1,5 +1,5 @@
 
				 <|start_header_id|>system<|end_header_id|>
			
 
				-You are a knowledgable assistant. You can answer questions and perform tasks.
			
 
				+You are a knowledgeable assistant. You can answer questions and perform tasks.
			
 
				 In addition to plain text responses, you can chose to call one or more of the provided functions.
			
 
				 
			
 
				 Use the following rule to decide when to call a function:
			
@@ -14,4 +14,4 @@ If you decide to call functions:
 
				   * make sure you pick the right functions that match the user intent
			
 
				 
			
 
				 Available functions as JSON spec:
			
 
				-[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]<|eot_id|><|start_header_id|><|end_header_id|>You are a knowledgable assistant. You can answer questions and perform tasks.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|> functools[{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]<|eot_id|><|start_header_id|>tool<|end_header_id|>22<|eot_id|><|start_header_id|>assistant<|end_header_id|>The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
			
 
				+[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]<|eot_id|><|start_header_id|><|end_header_id|>You are a knowledgeable assistant. You can answer questions and perform tasks.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|> functools[{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]<|eot_id|><|start_header_id|>tool<|end_header_id|>22<|eot_id|><|start_header_id|>assistant<|end_header_id|>The current temperature in Paris, France is 22 degrees Celsius.<|eot_id|><|start_header_id|>user<|end_header_id|>What's the weather like today in San Francisco and Toronto?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
			
--- a/server/testdata/tools/llama3-groq-tool-use.out
+++ b/server/testdata/tools/llama3-groq-tool-use.out
@@ -1,12 +1,12 @@
 
				 <|start_header_id|>system<|end_header_id|>
			
 
				 
			
 
				-You are a knowledgable assistant. You can answer questions and perform tasks. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
			
 
				+You are a knowledgeable assistant. You can answer questions and perform tasks. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
			
 
				 <tool_call>
			
 
				 {"name": <function-name>,"arguments": <args-dict>}
			
 
				 </tool_call>
			
 
				 
			
 
				 Here are the available tools:
			
 
				-<tools> {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} </tools><|eot_id|><|start_header_id|>user<|end_header_id|>
			
 
				+<tools> {"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}} </tools><|eot_id|><|start_header_id|>user<|end_header_id|>
			
 
				 
			
 
				 What's the weather like today in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
			
 
				 
			
--- a/server/testdata/tools/messages.json
+++ b/server/testdata/tools/messages.json
@@ -1,7 +1,7 @@
 
				 [
			
 
				   {
			
 
				     "role": "system",
			
 
				-    "content": "You are a knowledgable assistant. You can answer questions and perform tasks."
			
 
				+    "content": "You are a knowledgeable assistant. You can answer questions and perform tasks."
			
 
				   },
			
 
				   {
			
 
				     "role": "user",
			
--- a/server/testdata/tools/mistral.out
+++ b/server/testdata/tools/mistral.out
@@ -1,3 +1,3 @@
 
				-[INST] What's the weather like today in Paris?[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]</s>[TOOL_RESULTS] {"content": 22}[/TOOL_RESULTS] The current temperature in Paris, France is 22 degrees Celsius.</s>[AVAILABLE_TOOLS] [{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}][/AVAILABLE_TOOLS][INST] You are a knowledgable assistant. You can answer questions and perform tasks.
			
 
				+[INST] What's the weather like today in Paris?[/INST][TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"format":"celsius","location":"Paris, France"}}]</s>[TOOL_RESULTS] {"content": 22}[/TOOL_RESULTS] The current temperature in Paris, France is 22 degrees Celsius.</s>[AVAILABLE_TOOLS] [{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}][/AVAILABLE_TOOLS][INST] You are a knowledgeable assistant. You can answer questions and perform tasks.
			
 
				 
			
 
				 What's the weather like today in San Francisco and Toronto?[/INST]
			
--- a/server/testdata/tools/nemotron.out
+++ b/server/testdata/tools/nemotron.out
@@ -1,8 +1,8 @@
 
				 <extra_id_0>System
			
 
				-You are a knowledgable assistant. You can answer questions and perform tasks.
			
 
				+You are a knowledgeable assistant. You can answer questions and perform tasks.
			
 
				 
			
 
				 
			
 
				-<tool> {"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}} </tool>
			
 
				+<tool> {"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}} </tool>
			
 
				 
			
 
				 
			
 
				 <extra_id_1>User
			
--- a/server/testdata/tools/tools.json
+++ b/server/testdata/tools/tools.json
@@ -17,7 +17,7 @@
 
				               "celsius",
			
 
				               "fahrenheit"
			
 
				             ],
			
 
				-            "description": "The temperature unit to use. Infer this from the users location."
			
 
				+            "description": "The temperature unit to use. Infer this from the user's location."
			
 
				           }
			
 
				         },
			
 
				         "required": [
			
--- a/server/testdata/tools/xlam.out
+++ b/server/testdata/tools/xlam.out
@@ -1,4 +1,4 @@
 
				-You are a knowledgable assistant. You can answer questions and perform tasks.
			
 
				+You are a knowledgeable assistant. You can answer questions and perform tasks.
			
 
				 ### Instruction:
			
 
				 What's the weather like today in Paris?
			
 
				 ### Response:
			
@@ -16,7 +16,7 @@ If the given question lacks the parameters required by the function, also point
 
				 [END OF TASK INSTRUCTION]
			
 
				 
			
 
				 [BEGIN OF AVAILABLE TOOLS]
			
 
				-[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the users location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]
			
 
				+[{"type":"function","function":{"name":"get_current_weather","description":"Get the current weather","parameters":{"type":"object","required":["location","format"],"properties":{"format":{"type":"string","description":"The temperature unit to use. Infer this from the user's location.","enum":["celsius","fahrenheit"]},"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"}}}}}]
			
 
				 [END OF AVAILABLE TOOLS]
			
 
				 
			
 
				 [BEGIN OF FORMAT INSTRUCTION]