1 year ago · 4c4c730a0a
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -23,29 +23,72 @@ jobs:
 
															         with:
														
 
															           go-version: '1.21'
														
 
															           cache: true
														
 
															-      - if: ${{ startsWith(matrix.os, 'windows-') }}
														
 
															-        shell: pwsh
														
 
															-        run: |
														
 
															-          $path = vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath
														
 
															-          if ($path) {
														
 
															-              $path = join-path $path 'Common7\Tools\vsdevcmd.bat'
														
 
															-              if (test-path $path) {
														
 
															-                  cmd /s /c """$path"" $args && set" | where { $_ -match '(\w+)=(.*)' } | foreach {
														
 
															-                      echo "$($Matches[1])=$($Matches[2])" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
														
 
															-                  }
														
 
															-              }
														
 
															-          }
														
 
															-
														
 
															-          echo "C:\Program Files\Git\usr\bin" | Out-File -FilePath $Env:GITHUB_PATH -Encoding utf8 -Append
														
 
															       - run: go get ./...
														
 
															       - run: go generate -x ./...
														
 
															       - uses: actions/upload-artifact@v4
														
 
															         with:
														
 
															           name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
														
 
															-          path: |
														
 
															-            llm/llama.cpp/build/**/lib/*
														
 
															+          path: llm/llama.cpp/build/**/lib/*
														
 
															+  generate-cuda:
														
 
															+    strategy:
														
 
															+      matrix:
														
 
															+        cuda-version:
														
 
															+          - '11.8.0'
														
 
															+    runs-on: ubuntu-latest
														
 
															+    container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
														
 
															+    steps:
														
 
															+      - run: |
														
 
															+          apt-get update && apt-get install -y git build-essential curl
														
 
															+          curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
														
 
															+            | tar -zx -C /usr --strip-components 1
														
 
															+        env:
														
 
															+          DEBIAN_FRONTEND: noninteractive
														
 
															+      - uses: actions/checkout@v4
														
 
															+      - uses: actions/setup-go@v4
														
 
															+        with:
														
 
															+          go-version: '1.21'
														
 
															+          cache: true
														
 
															+      - run: go get ./...
														
 
															+      - run: |
														
 
															+          git config --global --add safe.directory /__w/ollama/ollama
														
 
															+          go generate -x ./...
														
 
															+        env:
														
 
															+          OLLAMA_SKIP_CPU_GENERATE: '1'
														
 
															+      - uses: actions/upload-artifact@v4
														
 
															+        with:
														
 
															+          name: cuda-${{ matrix.cuda-version }}-libraries
														
 
															+          path: llm/llama.cpp/build/**/lib/*
														
 
															+  generate-rocm:
														
 
															+    strategy:
														
 
															+      matrix:
														
 
															+        rocm-version:
														
 
															+          - '5.7.1'
														
 
															+          - '6.0'
														
 
															+    runs-on: ubuntu-latest
														
 
															+    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
														
 
															+    steps:
														
 
															+      - run: |
														
 
															+          apt-get update && apt-get install -y git build-essential curl rocm-libs
														
 
															+          curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
														
 
															+            | tar -zx -C /usr --strip-components 1
														
 
															+        env:
														
 
															+          DEBIAN_FRONTEND: noninteractive
														
 
															+      - uses: actions/checkout@v4
														
 
															+      - uses: actions/setup-go@v4
														
 
															+        with:
														
 
															+          go-version: '1.21'
														
 
															+          cache: true
														
 
															+      - run: go get ./...
														
 
															+      - run: |
														
 
															+          git config --global --add safe.directory /__w/ollama/ollama
														
 
															+          go generate -x ./...
														
 
															+        env:
														
 
															+          OLLAMA_SKIP_CPU_GENERATE: '1'
														
 
															+      - uses: actions/upload-artifact@v4
														
 
															+        with:
														
 
															+          name: rocm-${{ matrix.rocm-version }}-libraries
														
 
															+          path: llm/llama.cpp/build/**/lib/*
														
 
															   lint:
														
 
															-    needs: generate
														
 
															     strategy:
														
 
															       matrix:
														
 
															         os: [ubuntu-latest, macos-latest, windows-latest]
														
@@ -69,10 +112,19 @@ jobs:
 
															         with:
														
 
															           go-version: '1.21'
														
 
															           cache: false
														
 
															-      - uses: actions/download-artifact@v4
														
 
															-        with:
														
 
															-          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
														
 
															-          path: llm/llama.cpp/build
														
 
															+      - run: |
														
 
															+          mkdir -p llm/llama.cpp/build/linux/${{ matrix.arch }}/stub/lib/
														
 
															+          touch llm/llama.cpp/build/linux/${{ matrix.arch }}/stub/lib/stub.so
														
 
															+        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
														
 
															+      - run: |
														
 
															+          mkdir -p llm/llama.cpp/build/darwin/${{ matrix.arch }}/stub/lib/
														
 
															+          touch llm/llama.cpp/build/darwin/${{ matrix.arch }}/stub/lib/stub.dylib
														
 
															+          touch llm/llama.cpp/ggml-metal.metal
														
 
															+        if: ${{ startsWith(matrix.os, 'macos-') }}
														
 
															+      - run: |
														
 
															+          mkdir -p llm/llama.cpp/build/windows/${{ matrix.arch }}/stub/lib/
														
 
															+          touch llm/llama.cpp/build/windows/${{ matrix.arch }}/stub/lib/stub.dll
														
 
															+        if: ${{ startsWith(matrix.os, 'windows-') }}
														
 
															       - uses: golangci/golangci-lint-action@v3
														
 
															   test:
														
 
															     needs: generate
														
@@ -104,3 +156,7 @@ jobs:
 
															           path: llm/llama.cpp/build
														
 
															       - run: go build
														
 
															       - run: go test -v ./...
														
 
															+      - uses: actions/upload-artifact@v4
														
 
															+        with:
														
 
															+          name: ${{ matrix.os }}-binaries
														
 
															+          path: ollama
														
--- a/Dockerfile
+++ b/Dockerfile
@@ -109,17 +109,28 @@ ARG CGO_CFLAGS
 
															 RUN go build .
														
 
															 # Runtime stages
														
 
															-FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete as runtime-amd64
														
 
															+FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
														
 
															+RUN apt-get update && apt-get install -y ca-certificates
														
 
															 COPY --from=build-amd64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
														
 
															 FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
														
 
															 RUN apt-get update && apt-get install -y ca-certificates
														
 
															 COPY --from=build-arm64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
														
 
															+# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
														
 
															+FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete as runtime-rocm
														
 
															+RUN update-pciids
														
 
															+COPY --from=build-amd64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
														
 
															+EXPOSE 11434
														
 
															+ENV OLLAMA_HOST 0.0.0.0
														
 
															+
														
 
															+ENTRYPOINT ["/bin/ollama"]
														
 
															+CMD ["serve"]
														
 
															+
														
 
															 FROM runtime-$TARGETARCH
														
 
															 EXPOSE 11434
														
 
															 ENV OLLAMA_HOST 0.0.0.0
														
 
															 ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
														
 
															-ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/opt/rocm/lib:
														
 
															+ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
														
 
															 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
														
 
															 ENTRYPOINT ["/bin/ollama"]
														
--- a/api/types.go
+++ b/api/types.go
@@ -34,24 +34,26 @@ func (e StatusError) Error() string {
 
															 type ImageData []byte
														
 
															 type GenerateRequest struct {
														
 
															-	Model    string      `json:"model"`
														
 
															-	Prompt   string      `json:"prompt"`
														
 
															-	System   string      `json:"system"`
														
 
															-	Template string      `json:"template"`
														
 
															-	Context  []int       `json:"context,omitempty"`
														
 
															-	Stream   *bool       `json:"stream,omitempty"`
														
 
															-	Raw      bool        `json:"raw,omitempty"`
														
 
															-	Format   string      `json:"format"`
														
 
															-	Images   []ImageData `json:"images,omitempty"`
														
 
															+	Model     string      `json:"model"`
														
 
															+	Prompt    string      `json:"prompt"`
														
 
															+	System    string      `json:"system"`
														
 
															+	Template  string      `json:"template"`
														
 
															+	Context   []int       `json:"context,omitempty"`
														
 
															+	Stream    *bool       `json:"stream,omitempty"`
														
 
															+	Raw       bool        `json:"raw,omitempty"`
														
 
															+	Format    string      `json:"format"`
														
 
															+	KeepAlive *Duration   `json:"keep_alive,omitempty"`
														
 
															+	Images    []ImageData `json:"images,omitempty"`
														
 
															 	Options map[string]interface{} `json:"options"`
														
 
															 }
														
 
															 type ChatRequest struct {
														
 
															-	Model    string    `json:"model"`
														
 
															-	Messages []Message `json:"messages"`
														
 
															-	Stream   *bool     `json:"stream,omitempty"`
														
 
															-	Format   string    `json:"format"`
														
 
															+	Model     string    `json:"model"`
														
 
															+	Messages  []Message `json:"messages"`
														
 
															+	Stream    *bool     `json:"stream,omitempty"`
														
 
															+	Format    string    `json:"format"`
														
 
															+	KeepAlive *Duration `json:"keep_alive,omitempty"`
														
 
															 	Options map[string]interface{} `json:"options"`
														
 
															 }
														
@@ -126,8 +128,9 @@ type Runner struct {
 
															 }
														
 
															 type EmbeddingRequest struct {
														
 
															-	Model  string `json:"model"`
														
 
															-	Prompt string `json:"prompt"`
														
 
															+	Model     string    `json:"model"`
														
 
															+	Prompt    string    `json:"prompt"`
														
 
															+	KeepAlive *Duration `json:"keep_alive,omitempty"`
														
 
															 	Options map[string]interface{} `json:"options"`
														
 
															 }
														
@@ -171,6 +174,7 @@ type ShowResponse struct {
 
															 	Template   string       `json:"template,omitempty"`
														
 
															 	System     string       `json:"system,omitempty"`
														
 
															 	Details    ModelDetails `json:"details,omitempty"`
														
 
															+	Messages   []Message    `json:"messages,omitempty"`
														
 
															 }
														
 
															 type CopyRequest struct {
														
@@ -236,6 +240,7 @@ type GenerateResponse struct {
 
															 }
														
 
															 type ModelDetails struct {
														
 
															+	ParentModel       string   `json:"parent_model"`
														
 
															 	Format            string   `json:"format"`
														
 
															 	Family            string   `json:"family"`
														
 
															 	Families          []string `json:"families"`
														
@@ -411,14 +416,19 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
 
															 	case float64:
														
 
															 		if t < 0 {
														
 
															 			t = math.MaxFloat64
														
 
															+			d.Duration = time.Duration(t)
														
 
															+		} else {
														
 
															+			d.Duration = time.Duration(t * float64(time.Second))
														
 
															 		}
														
 
															-
														
 
															-		d.Duration = time.Duration(t)
														
 
															 	case string:
														
 
															 		d.Duration, err = time.ParseDuration(t)
														
 
															 		if err != nil {
														
 
															 			return err
														
 
															 		}
														
 
															+		if d.Duration < 0 {
														
 
															+			mf := math.MaxFloat64
														
 
															+			d.Duration = time.Duration(mf)
														
 
															+		}
														
 
															 	}
														
 
															 	return nil
														
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -458,15 +458,17 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
 
															 type generateContextKey string
														
 
															 type runOptions struct {
														
 
															-	Model    string
														
 
															-	Prompt   string
														
 
															-	Messages []api.Message
														
 
															-	WordWrap bool
														
 
															-	Format   string
														
 
															-	System   string
														
 
															-	Template string
														
 
															-	Images   []api.ImageData
														
 
															-	Options  map[string]interface{}
														
 
															+	Model       string
														
 
															+	ParentModel string
														
 
															+	Prompt      string
														
 
															+	Messages    []api.Message
														
 
															+	WordWrap    bool
														
 
															+	Format      string
														
 
															+	System      string
														
 
															+	Template    string
														
 
															+	Images      []api.ImageData
														
 
															+	Options     map[string]interface{}
														
 
															+	MultiModal  bool
														
 
															 }
														
 
															 type displayResponseState struct {
														
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -7,12 +7,14 @@ import (
 
															 	"net/http"
														
 
															 	"os"
														
 
															 	"regexp"
														
 
															+	"sort"
														
 
															 	"strings"
														
 
															 	"github.com/spf13/cobra"
														
 
															 	"golang.org/x/exp/slices"
														
 
															 	"github.com/jmorganca/ollama/api"
														
 
															+	"github.com/jmorganca/ollama/progress"
														
 
															 	"github.com/jmorganca/ollama/readline"
														
 
															 )
														
@@ -25,43 +27,75 @@ const (
 
															 	MultilineTemplate
														
 
															 )
														
 
															-func modelIsMultiModal(cmd *cobra.Command, name string) bool {
														
 
															-	// get model details
														
 
															+func loadModel(cmd *cobra.Command, opts *runOptions) error {
														
 
															 	client, err := api.ClientFromEnvironment()
														
 
															 	if err != nil {
														
 
															-		fmt.Println("error: couldn't connect to ollama server")
														
 
															-		return false
														
 
															+		return err
														
 
															 	}
														
 
															-	req := api.ShowRequest{Name: name}
														
 
															-	resp, err := client.Show(cmd.Context(), &req)
														
 
															+	p := progress.NewProgress(os.Stderr)
														
 
															+	defer p.StopAndClear()
														
 
															+
														
 
															+	spinner := progress.NewSpinner("")
														
 
															+	p.Add("", spinner)
														
 
															+
														
 
															+	showReq := api.ShowRequest{Name: opts.Model}
														
 
															+	showResp, err := client.Show(cmd.Context(), &showReq)
														
 
															 	if err != nil {
														
 
															-		return false
														
 
															+		return err
														
 
															 	}
														
 
															+	opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
														
 
															+	opts.ParentModel = showResp.Details.ParentModel
														
 
															-	return slices.Contains(resp.Details.Families, "clip")
														
 
															-}
														
 
															-
														
 
															-func generateInteractive(cmd *cobra.Command, opts runOptions) error {
														
 
															-	multiModal := modelIsMultiModal(cmd, opts.Model)
														
 
															+	if len(showResp.Messages) > 0 {
														
 
															+		opts.Messages = append(opts.Messages, showResp.Messages...)
														
 
															+	}
														
 
															-	// load the model
														
 
															-	loadOpts := runOptions{
														
 
															+	chatReq := &api.ChatRequest{
														
 
															 		Model:    opts.Model,
														
 
															-		Prompt:   "",
														
 
															 		Messages: []api.Message{},
														
 
															 	}
														
 
															-	if _, err := chat(cmd, loadOpts); err != nil {
														
 
															+	err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
														
 
															+		p.StopAndClear()
														
 
															+		if len(opts.Messages) > 0 {
														
 
															+			for _, msg := range opts.Messages {
														
 
															+				switch msg.Role {
														
 
															+				case "user":
														
 
															+					fmt.Printf(">>> %s\n", msg.Content)
														
 
															+				case "assistant":
														
 
															+					state := &displayResponseState{}
														
 
															+					displayResponse(msg.Content, opts.WordWrap, state)
														
 
															+					fmt.Println()
														
 
															+					fmt.Println()
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+		return nil
														
 
															+	})
														
 
															+	if err != nil {
														
 
															+		return err
														
 
															+	}
														
 
															+
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+func generateInteractive(cmd *cobra.Command, opts runOptions) error {
														
 
															+	opts.Messages = make([]api.Message, 0)
														
 
															+
														
 
															+	err := loadModel(cmd, &opts)
														
 
															+	if err != nil {
														
 
															 		return err
														
 
															 	}
														
 
															 	usage := func() {
														
 
															 		fmt.Fprintln(os.Stderr, "Available Commands:")
														
 
															-		fmt.Fprintln(os.Stderr, "  /set          Set session variables")
														
 
															-		fmt.Fprintln(os.Stderr, "  /show         Show model information")
														
 
															-		fmt.Fprintln(os.Stderr, "  /bye          Exit")
														
 
															-		fmt.Fprintln(os.Stderr, "  /?, /help     Help for a command")
														
 
															-		fmt.Fprintln(os.Stderr, "  /? shortcuts  Help for keyboard shortcuts")
														
 
															+		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
														
 
															+		fmt.Fprintln(os.Stderr, "  /show           Show model information")
														
 
															+		fmt.Fprintln(os.Stderr, "  /load <model>   Load a session or model")
														
 
															+		fmt.Fprintln(os.Stderr, "  /save <model>   Save your current session")
														
 
															+		fmt.Fprintln(os.Stderr, "  /bye            Exit")
														
 
															+		fmt.Fprintln(os.Stderr, "  /?, /help       Help for a command")
														
 
															+		fmt.Fprintln(os.Stderr, "  /? shortcuts    Help for keyboard shortcuts")
														
 
															 		fmt.Fprintln(os.Stderr, "")
														
 
															 		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
														
 
															 		fmt.Fprintln(os.Stderr, "")
														
@@ -140,7 +174,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 
															 	var sb strings.Builder
														
 
															 	var multiline MultilineState
														
 
															-	opts.Messages = make([]api.Message, 0)
														
 
															 	for {
														
 
															 		line, err := scanner.Readline()
														
@@ -203,6 +236,44 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 
															 			if err := ListHandler(cmd, args[1:]); err != nil {
														
 
															 				return err
														
 
															 			}
														
 
															+		case strings.HasPrefix(line, "/load"):
														
 
															+			args := strings.Fields(line)
														
 
															+			if len(args) != 2 {
														
 
															+				fmt.Println("Usage:\n  /load <modelname>")
														
 
															+				continue
														
 
															+			}
														
 
															+			opts.Model = args[1]
														
 
															+			opts.Messages = []api.Message{}
														
 
															+			fmt.Printf("Loading model '%s'\n", opts.Model)
														
 
															+			if err := loadModel(cmd, &opts); err != nil {
														
 
															+				return err
														
 
															+			}
														
 
															+			continue
														
 
															+		case strings.HasPrefix(line, "/save"):
														
 
															+			args := strings.Fields(line)
														
 
															+			if len(args) != 2 {
														
 
															+				fmt.Println("Usage:\n  /save <modelname>")
														
 
															+				continue
														
 
															+			}
														
 
															+
														
 
															+			client, err := api.ClientFromEnvironment()
														
 
															+			if err != nil {
														
 
															+				fmt.Println("error: couldn't connect to ollama server")
														
 
															+				return err
														
 
															+			}
														
 
															+
														
 
															+			req := &api.CreateRequest{
														
 
															+				Name:      args[1],
														
 
															+				Modelfile: buildModelfile(opts),
														
 
															+			}
														
 
															+			fn := func(resp api.ProgressResponse) error { return nil }
														
 
															+			err = client.Create(cmd.Context(), req, fn)
														
 
															+			if err != nil {
														
 
															+				fmt.Println("error: couldn't save model")
														
 
															+				return err
														
 
															+			}
														
 
															+			fmt.Printf("Created new model '%s'\n", args[1])
														
 
															+			continue
														
 
															 		case strings.HasPrefix(line, "/set"):
														
 
															 			args := strings.Fields(line)
														
 
															 			if len(args) > 1 {
														
@@ -389,7 +460,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 
															 			args := strings.Fields(line)
														
 
															 			isFile := false
														
 
															-			if multiModal {
														
 
															+			if opts.MultiModal {
														
 
															 				for _, f := range extractFileNames(line) {
														
 
															 					if strings.HasPrefix(f, args[0]) {
														
 
															 						isFile = true
														
@@ -411,7 +482,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 
															 		if sb.Len() > 0 && multiline == MultilineNone {
														
 
															 			newMessage := api.Message{Role: "user", Content: sb.String()}
														
 
															-			if multiModal {
														
 
															+			if opts.MultiModal {
														
 
															 				msg, images, err := extractFileData(sb.String())
														
 
															 				if err != nil {
														
 
															 					return err
														
@@ -454,6 +525,38 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 
															 	}
														
 
															 }
														
 
															+func buildModelfile(opts runOptions) string {
														
 
															+	var mf strings.Builder
														
 
															+	model := opts.ParentModel
														
 
															+	if model == "" {
														
 
															+		model = opts.Model
														
 
															+	}
														
 
															+	fmt.Fprintf(&mf, "FROM %s\n", model)
														
 
															+	if opts.System != "" {
														
 
															+		fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
														
 
															+	}
														
 
															+
														
 
															+	if opts.Template != "" {
														
 
															+		fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
														
 
															+	}
														
 
															+
														
 
															+	keys := make([]string, 0)
														
 
															+	for k := range opts.Options {
														
 
															+		keys = append(keys, k)
														
 
															+	}
														
 
															+	sort.Strings(keys)
														
 
															+	for _, k := range keys {
														
 
															+		fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
														
 
															+	}
														
 
															+	fmt.Fprintln(&mf)
														
 
															+
														
 
															+	for _, msg := range opts.Messages {
														
 
															+		fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
														
 
															+	}
														
 
															+
														
 
															+	return mf.String()
														
 
															+}
														
 
															+
														
 
															 func normalizeFilePath(fp string) string {
														
 
															 	// Define a map of escaped characters and their replacements
														
 
															 	replacements := map[string]string{
														
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -1,9 +1,13 @@
 
															 package cmd
														
 
															 import (
														
 
															+	"bytes"
														
 
															 	"testing"
														
 
															+	"text/template"
														
 
															 	"github.com/stretchr/testify/assert"
														
 
															+
														
 
															+	"github.com/jmorganca/ollama/api"
														
 
															 )
														
 
															 func TestExtractFilenames(t *testing.T) {
														
@@ -49,3 +53,64 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8
 
															 	assert.Contains(t, res[9], "ten.svg")
														
 
															 	assert.Contains(t, res[9], "E:")
														
 
															 }
														
 
															+
														
 
															+func TestModelfileBuilder(t *testing.T) {
														
 
															+	opts := runOptions{
														
 
															+		Model:    "hork",
														
 
															+		System:   "You are part horse and part shark, but all hork. Do horklike things",
														
 
															+		Template: "This is a template.",
														
 
															+		Messages: []api.Message{
														
 
															+			{Role: "user", Content: "Hey there hork!"},
														
 
															+			{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
														
 
															+		},
														
 
															+		Options: map[string]interface{}{},
														
 
															+	}
														
 
															+
														
 
															+	opts.Options["temperature"] = 0.9
														
 
															+	opts.Options["seed"] = 42
														
 
															+	opts.Options["penalize_newline"] = false
														
 
															+	opts.Options["stop"] = []string{"hi", "there"}
														
 
															+
														
 
															+	mf := buildModelfile(opts)
														
 
															+	expectedModelfile := `FROM {{.Model}}
														
 
															+SYSTEM """{{.System}}"""
														
 
															+TEMPLATE """{{.Template}}"""
														
 
															+PARAMETER penalize_newline false
														
 
															+PARAMETER seed 42
														
 
															+PARAMETER stop [hi there]
														
 
															+PARAMETER temperature 0.9
														
 
															+
														
 
															+MESSAGE user """Hey there hork!"""
														
 
															+MESSAGE assistant """Yes it is true, I am half horse, half shark."""
														
 
															+`
														
 
															+
														
 
															+	tmpl, err := template.New("").Parse(expectedModelfile)
														
 
															+	assert.Nil(t, err)
														
 
															+
														
 
															+	var buf bytes.Buffer
														
 
															+	err = tmpl.Execute(&buf, opts)
														
 
															+	assert.Nil(t, err)
														
 
															+	assert.Equal(t, buf.String(), mf)
														
 
															+
														
 
															+	opts.ParentModel = "horseshark"
														
 
															+	mf = buildModelfile(opts)
														
 
															+	expectedModelfile = `FROM {{.ParentModel}}
														
 
															+SYSTEM """{{.System}}"""
														
 
															+TEMPLATE """{{.Template}}"""
														
 
															+PARAMETER penalize_newline false
														
 
															+PARAMETER seed 42
														
 
															+PARAMETER stop [hi there]
														
 
															+PARAMETER temperature 0.9
														
 
															+
														
 
															+MESSAGE user """Hey there hork!"""
														
 
															+MESSAGE assistant """Yes it is true, I am half horse, half shark."""
														
 
															+`
														
 
															+
														
 
															+	tmpl, err = template.New("").Parse(expectedModelfile)
														
 
															+	assert.Nil(t, err)
														
 
															+
														
 
															+	var parentBuf bytes.Buffer
														
 
															+	err = tmpl.Execute(&parentBuf, opts)
														
 
															+	assert.Nil(t, err)
														
 
															+	assert.Equal(t, parentBuf.String(), mf)
														
 
															+}
														
--- a/docs/development.md
+++ b/docs/development.md
@@ -50,7 +50,8 @@ development and runtime packages.
 
															 Typically the build scripts will auto-detect CUDA, however, if your Linux distro
														
 
															 or installation approach uses unusual paths, you can specify the location by
														
 
															 specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
														
 
															-libraries, and `CUDACXX` to the location of the nvcc compiler.
														
 
															+libraries, and `CUDACXX` to the location of the nvcc compiler.  You can customize
														
 
															+set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
														
 
															 Then generate dependencies:
														
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -19,6 +19,7 @@ A model file is the blueprint to create and share models with Ollama.
 
															   - [SYSTEM](#system)
														
 
															   - [ADAPTER](#adapter)
														
 
															   - [LICENSE](#license)
														
 
															+  - [MESSAGE](#message)
														
 
															 - [Notes](#notes)
														
 
															 ## Format
														
@@ -38,6 +39,7 @@ INSTRUCTION arguments
 
															 | [`SYSTEM`](#system)                 | Specifies the system message that will be set in the template. |
														
 
															 | [`ADAPTER`](#adapter)               | Defines the (Q)LoRA adapters to apply to the model.            |
														
 
															 | [`LICENSE`](#license)               | Specifies the legal license.                                   |
														
 
															+| [`MESSAGE`](#message)               | Specify message history.                                       |
														
 
															 ## Examples
														
@@ -205,6 +207,19 @@ LICENSE """
 
															 """
														
 
															 ```
														
 
															+### MESSAGE
														
 
															+
														
 
															+The `MESSAGE` instruction allows you to specify a message history for the model to use when responding:
														
 
															+
														
 
															+```modelfile
														
 
															+MESSAGE user Is Toronto in Canada?
														
 
															+MESSAGE assistant yes
														
 
															+MESSAGE user Is Sacramento in Canada?
														
 
															+MESSAGE assistant no
														
 
															+MESSAGE user Is Ontario in Canada?
														
 
															+MESSAGE assistant yes
														
 
															+```
														
 
															+
														
 
															 ## Notes
														
 
															 - the **`Modelfile` is not case sensitive**. In the examples, uppercase instructions are used to make it easier to distinguish it from arguments.
														
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -16,6 +16,7 @@ import (
 
															 	"os"
														
 
															 	"path/filepath"
														
 
															 	"runtime"
														
 
															+	"strconv"
														
 
															 	"strings"
														
 
															 	"sync"
														
 
															 	"unsafe"
														
@@ -29,8 +30,8 @@ type handles struct {
 
															 var gpuMutex sync.Mutex
														
 
															 var gpuHandles *handles = nil
														
 
															-// With our current CUDA compile flags, 5.2 and older will not work properly
														
 
															-const CudaComputeMajorMin = 6
														
 
															+// With our current CUDA compile flags, older than 5.0 will not work properly
														
 
															+var CudaComputeMin = [2]C.int{5, 0}
														
 
															 // Possible locations for the nvidia-ml library
														
 
															 var CudaLinuxGlobs = []string{
														
@@ -121,9 +122,15 @@ func GetGPUInfo() GpuInfo {
 
															 		initGPUHandles()
														
 
															 	}
														
 
															+	// All our GPU builds have AVX enabled, so fallback to CPU if we don't detect at least AVX
														
 
															+	cpuVariant := GetCPUVariant()
														
 
															+	if cpuVariant == "" {
														
 
															+		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
														
 
															+	}
														
 
															+
														
 
															 	var memInfo C.mem_info_t
														
 
															 	resp := GpuInfo{}
														
 
															-	if gpuHandles.cuda != nil {
														
 
															+	if gpuHandles.cuda != nil && cpuVariant != "" {
														
 
															 		C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
														
 
															 		if memInfo.err != nil {
														
 
															 			slog.Info(fmt.Sprintf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err)))
														
@@ -135,19 +142,40 @@ func GetGPUInfo() GpuInfo {
 
															 			if cc.err != nil {
														
 
															 				slog.Info(fmt.Sprintf("error looking up CUDA GPU compute capability: %s", C.GoString(cc.err)))
														
 
															 				C.free(unsafe.Pointer(cc.err))
														
 
															-			} else if cc.major >= CudaComputeMajorMin {
														
 
															+			} else if cc.major > CudaComputeMin[0] || (cc.major == CudaComputeMin[0] && cc.minor >= CudaComputeMin[1]) {
														
 
															 				slog.Info(fmt.Sprintf("CUDA Compute Capability detected: %d.%d", cc.major, cc.minor))
														
 
															 				resp.Library = "cuda"
														
 
															 			} else {
														
 
															 				slog.Info(fmt.Sprintf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
														
 
															 			}
														
 
															 		}
														
 
															-	} else if gpuHandles.rocm != nil {
														
 
															+	} else if gpuHandles.rocm != nil && cpuVariant != "" {
														
 
															 		C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
														
 
															 		if memInfo.err != nil {
														
 
															 			slog.Info(fmt.Sprintf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err)))
														
 
															 			C.free(unsafe.Pointer(memInfo.err))
														
 
															+		} else if memInfo.igpu_index >= 0 && memInfo.count == 1 {
														
 
															+			// Only one GPU detected and it appears to be an integrated GPU - skip it
														
 
															+			slog.Info("ROCm unsupported integrated GPU detected")
														
 
															 		} else {
														
 
															+			if memInfo.igpu_index >= 0 {
														
 
															+				// We have multiple GPUs reported, and one of them is an integrated GPU
														
 
															+				// so we have to set the env var to bypass it
														
 
															+				// If the user has specified their own ROCR_VISIBLE_DEVICES, don't clobber it
														
 
															+				val := os.Getenv("ROCR_VISIBLE_DEVICES")
														
 
															+				if val == "" {
														
 
															+					devices := []string{}
														
 
															+					for i := 0; i < int(memInfo.count); i++ {
														
 
															+						if i == int(memInfo.igpu_index) {
														
 
															+							continue
														
 
															+						}
														
 
															+						devices = append(devices, strconv.Itoa(i))
														
 
															+					}
														
 
															+					val = strings.Join(devices, ",")
														
 
															+					os.Setenv("ROCR_VISIBLE_DEVICES", val)
														
 
															+				}
														
 
															+				slog.Info(fmt.Sprintf("ROCm integrated GPU detected - ROCR_VISIBLE_DEVICES=%s", val))
														
 
															+			}
														
 
															 			resp.Library = "rocm"
														
 
															 			var version C.rocm_version_resp_t
														
 
															 			C.rocm_get_version(*gpuHandles.rocm, &version)
														
@@ -163,7 +191,7 @@ func GetGPUInfo() GpuInfo {
 
															 	if resp.Library == "" {
														
 
															 		C.cpu_check_ram(&memInfo)
														
 
															 		resp.Library = "cpu"
														
 
															-		resp.Variant = GetCPUVariant()
														
 
															+		resp.Variant = cpuVariant
														
 
															 	}
														
 
															 	if memInfo.err != nil {
														
 
															 		slog.Info(fmt.Sprintf("error looking up CPU memory: %s", C.GoString(memInfo.err)))
														
@@ -199,7 +227,9 @@ func CheckVRAM() (int64, error) {
 
															 		if overhead < gpus*1024*1024*1024 {
														
 
															 			overhead = gpus * 1024 * 1024 * 1024
														
 
															 		}
														
 
															-		return int64(gpuInfo.FreeMemory - overhead), nil
														
 
															+		avail := int64(gpuInfo.FreeMemory - overhead)
														
 
															+		slog.Debug(fmt.Sprintf("%s detected %d devices with %dM available memory", gpuInfo.Library, gpuInfo.DeviceCount, avail/1024/1024))
														
 
															+		return avail, nil
														
 
															 	}
														
 
															 	return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
														
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -42,6 +42,7 @@ typedef struct mem_info {
 
															   uint64_t total;
														
 
															   uint64_t free;
														
 
															   unsigned int count;
														
 
															+  int igpu_index; // If >= 0, we detected an integrated GPU to ignore
														
 
															   char *err;  // If non-nill, caller responsible for freeing
														
 
															 } mem_info_t;
														
--- a/gpu/gpu_info_cuda.c
+++ b/gpu/gpu_info_cuda.c
@@ -70,6 +70,7 @@ void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp) {
 
															     resp->ch.handle = NULL;
														
 
															     snprintf(buf, buflen, "nvml vram init failure: %d", ret);
														
 
															     resp->err = strdup(buf);
														
 
															+    return;
														
 
															   }
														
 
															   // Report driver version if we're in verbose mode, ignore errors
														
--- a/gpu/gpu_info_rocm.c
+++ b/gpu/gpu_info_rocm.c
@@ -77,6 +77,7 @@ void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
 
															 void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
														
 
															   resp->err = NULL;
														
 
															+  resp->igpu_index = -1;
														
 
															   uint64_t totalMem = 0;
														
 
															   uint64_t usedMem = 0;
														
 
															   rsmi_status_t ret;
														
@@ -162,8 +163,14 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
 
															     }
														
 
															     LOG(h.verbose, "[%d] ROCm totalMem %ld\n", i, totalMem);
														
 
															     LOG(h.verbose, "[%d] ROCm usedMem %ld\n", i, usedMem);
														
 
															-    resp->total += totalMem;
														
 
															-    resp->free += totalMem - usedMem;
														
 
															+    if (totalMem < 1024 * 1024 * 1024) {
														
 
															+      // Do not add up integrated GPU memory capacity, it's a bogus 512M, and actually uses system memory
														
 
															+      LOG(h.verbose, "[%d] ROCm integrated GPU\n", i);
														
 
															+      resp->igpu_index = i;
														
 
															+    } else {
														
 
															+      resp->total += totalMem;
														
 
															+      resp->free += totalMem - usedMem;
														
 
															+    }
														
 
															   }
														
 
															 }
														
@@ -171,7 +178,7 @@ void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) {
 
															   const int buflen = 256;
														
 
															   char buf[buflen + 1];
														
 
															   if (h.handle == NULL) {
														
 
															-    resp->str = strdup("nvml handle not initialized");
														
 
															+    resp->str = strdup("rocm handle not initialized");
														
 
															     resp->status = 1;
														
 
															     return;
														
 
															   }
														
@@ -188,4 +195,4 @@ void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) {
 
															   resp->str = strdup(buf);
														
 
															 }
														
 
															-#endif  // __APPLE__
														
 
															+#endif  // __APPLE__
														
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -190,6 +190,7 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
 
															 		"seed":              predict.Options.Seed,
														
 
															 		"stop":              predict.Options.Stop,
														
 
															 		"image_data":        imageData,
														
 
															+		"cache_prompt":      true,
														
 
															 	}
														
 
															 	if predict.Format == "json" {
														
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -39,6 +39,9 @@ init_vars() {
 
															     *)
														
 
															         ;;
														
 
															     esac
														
 
															+    if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then 
														
 
															+        CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
														
 
															+    fi
														
 
															 }
														
 
															 git_module_setup() {
														
@@ -61,6 +64,17 @@ apply_patches() {
 
															     if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
														
 
															         echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
														
 
															     fi
														
 
															+
														
 
															+    # apply temporary patches until fix is upstream
														
 
															+    for patch in ../patches/*.diff; do
														
 
															+        for file in $(grep "^+++ " ${patch} | cut -f2 -d' ' | cut -f2- -d/); do
														
 
															+            (cd ${LLAMACPP_DIR}; git checkout ${file})
														
 
															+        done
														
 
															+    done
														
 
															+    for patch in ../patches/*.diff; do
														
 
															+        (cd ${LLAMACPP_DIR} && git apply ${patch})
														
 
															+    done
														
 
															+
														
 
															     # Avoid duplicate main symbols when we link into the cgo binary
														
 
															     sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
														
 
															         mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
														
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -140,7 +140,7 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
 
															     if [ -n "${CUDA_MAJOR}" ]; then
														
 
															         CUDA_VARIANT=_v${CUDA_MAJOR}
														
 
															     fi
														
 
															-    CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
														
 
															+    CMAKE_DEFS="-DLLAMA_CUBLAS=on -DLLAMA_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
														
 
															     BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cuda${CUDA_VARIANT}"
														
 
															     EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
														
 
															     build
														
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -25,6 +25,11 @@ function init_vars {
 
															     }
														
 
															     $script:GZIP=(get-command -ea 'silentlycontinue' gzip).path
														
 
															     $script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
														
 
															+    if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
														
 
															+        $script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
														
 
															+    } else {
														
 
															+        $script:CMAKE_CUDA_ARCHITECTURES=$env:CMAKE_CUDA_ARCHITECTURES
														
 
															+    }
														
 
															 }
														
 
															 function git_module_setup {
														
@@ -40,6 +45,29 @@ function apply_patches {
 
															     if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
														
 
															         Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
														
 
															     }
														
 
															+
														
 
															+    # Apply temporary patches until fix is upstream
														
 
															+    $patches = Get-ChildItem "../patches/*.diff"
														
 
															+    foreach ($patch in $patches) {
														
 
															+        # Extract file paths from the patch file
														
 
															+        $filePaths = Get-Content $patch.FullName | Where-Object { $_ -match '^\+\+\+ ' } | ForEach-Object {
														
 
															+            $parts = $_ -split ' '
														
 
															+            ($parts[1] -split '/', 2)[1]
														
 
															+        }
														
 
															+
														
 
															+        # Checkout each file
														
 
															+        foreach ($file in $filePaths) {
														
 
															+            Set-Location -Path ${script:llamacppDir}
														
 
															+            git checkout $file
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    # Apply each patch
														
 
															+    foreach ($patch in $patches) {
														
 
															+        Set-Location -Path ${script:llamacppDir}
														
 
															+        git apply $patch.FullName
														
 
															+    }
														
 
															+
														
 
															     # Avoid duplicate main symbols when we link into the cgo binary
														
 
															     $content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
														
 
															     $content = $content -replace 'int main\(', 'int __main('
														
@@ -128,7 +156,7 @@ if ($null -ne $script:CUDA_LIB_DIR) {
 
															     }
														
 
															     init_vars
														
 
															     $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
														
 
															-    $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
														
 
															+    $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
														
 
															     build
														
 
															     install
														
 
															     cp "${script:CUDA_LIB_DIR}/cudart64_*.dll" "${script:buildDir}/lib"
														
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -69,12 +69,65 @@ type tensor struct {
 
															 	name   string
														
 
															 	kind   uint32
														
 
															 	offset uint64
														
 
															-	size   uint64
														
 
															 	// shape is the number of elements in each dimension
														
 
															 	shape [4]uint64
														
 
															 }
														
 
															+func (t tensor) blockSize() uint64 {
														
 
															+	switch {
														
 
															+	case t.kind < 2:
														
 
															+		return 1
														
 
															+	case t.kind < 10:
														
 
															+		return 32
														
 
															+	default:
														
 
															+		return 256
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (t tensor) typeSize() uint64 {
														
 
															+	blockSize := t.blockSize()
														
 
															+
														
 
															+	switch t.kind {
														
 
															+	case 0: // FP32
														
 
															+		return 4
														
 
															+	case 1: // FP16
														
 
															+		return 2
														
 
															+	case 2: // Q4_0
														
 
															+		return 2 + blockSize/2
														
 
															+	case 3: // Q4_1
														
 
															+		return 2 + 2 + blockSize/2
														
 
															+	case 6: // Q5_0
														
 
															+		return 2 + 4 + blockSize/2
														
 
															+	case 7: // Q5_1
														
 
															+		return 2 + 2 + 4 + blockSize/2
														
 
															+	case 8: // Q8_0
														
 
															+		return 2 + blockSize
														
 
															+	case 9: // Q8_1
														
 
															+		return 4 + 4 + blockSize
														
 
															+	case 10: // Q2_K
														
 
															+		return blockSize/16 + blockSize/4 + 2 + 2
														
 
															+	case 11: // Q3_K
														
 
															+		return blockSize/8 + blockSize/4 + 12 + 2
														
 
															+	case 12: // Q4_K
														
 
															+		return 2 + 2 + 12 + blockSize/2
														
 
															+	case 13: // Q5_K
														
 
															+		return 2 + 2 + 12 + blockSize/8 + blockSize/2
														
 
															+	case 14: // Q6_K
														
 
															+		return blockSize/2 + blockSize/4 + blockSize/16 + 2
														
 
															+	default:
														
 
															+		return 0
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func (t tensor) parameters() uint64 {
														
 
															+	return t.shape[0] * t.shape[1] * t.shape[2] * t.shape[3]
														
 
															+}
														
 
															+
														
 
															+func (t tensor) size() uint64 {
														
 
															+	return t.parameters() * t.typeSize() / t.blockSize()
														
 
															+}
														
 
															+
														
 
															 type ggufModel struct {
														
 
															 	*containerGGUF
														
@@ -201,61 +254,15 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
 
															 			shape[i] = llm.readU64(rso)
														
 
															 		}
														
 
															-		kind := llm.readU32(rso)
														
 
															-		offset := llm.readU64(rso)
														
 
															-
														
 
															-		var blockSize uint64
														
 
															-		switch {
														
 
															-		case kind < 2:
														
 
															-			blockSize = 1
														
 
															-		case kind < 10:
														
 
															-			blockSize = 32
														
 
															-		default:
														
 
															-			blockSize = 256
														
 
															-		}
														
 
															-
														
 
															-		var typeSize uint64
														
 
															-		switch kind {
														
 
															-		case 0: // FP32
														
 
															-			typeSize = 4
														
 
															-		case 1: // FP16
														
 
															-			typeSize = 2
														
 
															-		case 2: // Q4_0
														
 
															-			typeSize = 2 + blockSize/2
														
 
															-		case 3: // Q4_1
														
 
															-			typeSize = 2 + 2 + blockSize/2
														
 
															-		case 6: // Q5_0
														
 
															-			typeSize = 2 + 4 + blockSize/2
														
 
															-		case 7: // Q5_1
														
 
															-			typeSize = 2 + 2 + 4 + blockSize/2
														
 
															-		case 8: // Q8_0
														
 
															-			typeSize = 2 + blockSize
														
 
															-		case 9: // Q8_1
														
 
															-			typeSize = 4 + 4 + blockSize
														
 
															-		case 10: // Q2_K
														
 
															-			typeSize = blockSize/16 + blockSize/4 + 2 + 2
														
 
															-		case 11: // Q3_K
														
 
															-			typeSize = blockSize/8 + blockSize/4 + 12 + 2
														
 
															-		case 12: // Q4_K
														
 
															-			typeSize = 2 + 2 + 12 + blockSize/2
														
 
															-		case 13: // Q5_K
														
 
															-			typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
														
 
															-		case 14: // Q6_K
														
 
															-			typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
														
 
															-		}
														
 
															-
														
 
															-		parameters := shape[0] * shape[1] * shape[2] * shape[3]
														
 
															-		size := parameters * typeSize / blockSize
														
 
															-
														
 
															-		llm.tensors = append(llm.tensors, tensor{
														
 
															+		tensor := tensor{
														
 
															 			name:   name,
														
 
															-			kind:   kind,
														
 
															-			offset: offset,
														
 
															-			size:   size,
														
 
															+			kind:   llm.readU32(rso),
														
 
															+			offset: llm.readU64(rso),
														
 
															 			shape:  shape,
														
 
															-		})
														
 
															+		}
														
 
															-		llm.parameters += parameters
														
 
															+		llm.tensors = append(llm.tensors, tensor)
														
 
															+		llm.parameters += tensor.parameters()
														
 
															 	}
														
 
															 	alignment, ok := llm.kv["general.alignment"].(uint32)
														
@@ -265,7 +272,7 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
 
															 	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
														
 
															 	for _, tensor := range llm.tensors {
														
 
															-		padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
														
 
															+		padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
														
 
															 		rso.Seek(padded, io.SeekCurrent)
														
 
															 	}
														
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
 
															-Subproject commit 011e8ec577fd135cbc02993d3ea9840c516d6a1c
														
 
															+Subproject commit cd4fddb29f81d6a1f6d51a0c016bc6b486d68def
														
--- a/llm/patches/01-cache.diff
+++ b/llm/patches/01-cache.diff
@@ -0,0 +1,30 @@
 
															+diff --git a/examples/server/server.cpp b/examples/server/server.cpp
														
 
															+index 0462fbd2..4fa7b57f 100644
														
 
															+--- a/examples/server/server.cpp
														
 
															++++ b/examples/server/server.cpp
														
 
															+@@ -1857,12 +1857,6 @@ struct llama_server_context
														
 
															+                         LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
														
 
															+                     }
														
 
															+ 
														
 
															+-                    LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past);
														
 
															+-
														
 
															+-                    llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1);
														
 
															+-
														
 
															+-                    slot.cache_tokens = prompt_tokens;
														
 
															+-
														
 
															+                     if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
														
 
															+                     {
														
 
															+                         // we have to evaluate at least 1 token to generate logits.
														
 
															+@@ -1870,6 +1864,12 @@ struct llama_server_context
														
 
															+                         slot.n_past--;
														
 
															+                     }
														
 
															+ 
														
 
															++                    LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past);
														
 
															++
														
 
															++                    llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1);
														
 
															++
														
 
															++                    slot.cache_tokens = prompt_tokens;
														
 
															++
														
 
															+                     LOG_VERBOSE("prompt ingested", {
														
 
															+                                                     {"n_past", slot.n_past},
														
 
															+                                                     {"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
														
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -7,6 +7,7 @@ import (
 
															 	"fmt"
														
 
															 	"io"
														
 
															 	"log/slog"
														
 
															+	"slices"
														
 
															 )
														
 
															 type Command struct {
														
@@ -56,6 +57,16 @@ func Parse(reader io.Reader) ([]Command, error) {
 
															 			command.Args = string(bytes.TrimSpace(fields[1]))
														
 
															 		case "EMBED":
														
 
															 			return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
														
 
															+		case "MESSAGE":
														
 
															+			command.Name = string(bytes.ToLower(fields[0]))
														
 
															+			fields = bytes.SplitN(fields[1], []byte(" "), 2)
														
 
															+			if len(fields) < 2 {
														
 
															+				return nil, fmt.Errorf("should be in the format <role> <message>")
														
 
															+			}
														
 
															+			if !slices.Contains([]string{"system", "user", "assistant"}, string(bytes.ToLower(fields[0]))) {
														
 
															+				return nil, fmt.Errorf("role must be one of \"system\", \"user\", or \"assistant\"")
														
 
															+			}
														
 
															+			command.Args = fmt.Sprintf("%s: %s", string(bytes.ToLower(fields[0])), string(fields[1]))
														
 
															 		default:
														
 
															 			if !bytes.HasPrefix(fields[0], []byte("#")) {
														
 
															 				// log a warning for unknown commands
														
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -61,3 +61,38 @@ PARAMETER param1
 
															 	assert.ErrorContains(t, err, "missing value for [param1]")
														
 
															 }
														
 
															+
														
 
															+func Test_Parser_Messages(t *testing.T) {
														
 
															+
														
 
															+	input := `
														
 
															+FROM foo
														
 
															+MESSAGE system You are a Parser. Always Parse things.
														
 
															+MESSAGE user Hey there!
														
 
															+MESSAGE assistant Hello, I want to parse all the things!
														
 
															+`
														
 
															+
														
 
															+	reader := strings.NewReader(input)
														
 
															+	commands, err := Parse(reader)
														
 
															+	assert.Nil(t, err)
														
 
															+
														
 
															+	expectedCommands := []Command{
														
 
															+		{Name: "model", Args: "foo"},
														
 
															+		{Name: "message", Args: "system: You are a Parser. Always Parse things."},
														
 
															+		{Name: "message", Args: "user: Hey there!"},
														
 
															+		{Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
														
 
															+	}
														
 
															+
														
 
															+	assert.Equal(t, expectedCommands, commands)
														
 
															+}
														
 
															+
														
 
															+func Test_Parser_Messages_BadRole(t *testing.T) {
														
 
															+
														
 
															+	input := `
														
 
															+FROM foo
														
 
															+MESSAGE badguy I'm a bad guy!
														
 
															+`
														
 
															+
														
 
															+	reader := strings.NewReader(input)
														
 
															+	_, err := Parse(reader)
														
 
															+	assert.ErrorContains(t, err, "role must be one of \"system\", \"user\", or \"assistant\"")
														
 
															+}
														
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -13,3 +13,13 @@ docker build \
 
															     -f Dockerfile \
														
 
															     -t ollama/ollama:$VERSION \
														
 
															     .
														
 
															+
														
 
															+docker build \
														
 
															+    --load \
														
 
															+    --platform=linux/amd64 \
														
 
															+    --build-arg=VERSION \
														
 
															+    --build-arg=GOFLAGS \
														
 
															+    --target runtime-rocm \
														
 
															+    -f Dockerfile \
														
 
															+    -t ollama/ollama:$VERSION-rocm \
														
 
															+    .
														
--- a/server/download.go
+++ b/server/download.go
@@ -25,6 +25,11 @@ import (
 
															 	"github.com/jmorganca/ollama/format"
														
 
															 )
														
 
															+const maxRetries = 6
														
 
															+
														
 
															+var errMaxRetriesExceeded = errors.New("max retries exceeded")
														
 
															+var errPartStalled = errors.New("part stalled")
														
 
															+
														
 
															 var blobDownloadManager sync.Map
														
 
															 type blobDownload struct {
														
@@ -44,10 +49,11 @@ type blobDownload struct {
 
															 }
														
 
															 type blobDownloadPart struct {
														
 
															-	N         int
														
 
															-	Offset    int64
														
 
															-	Size      int64
														
 
															-	Completed int64
														
 
															+	N           int
														
 
															+	Offset      int64
														
 
															+	Size        int64
														
 
															+	Completed   int64
														
 
															+	lastUpdated time.Time
														
 
															 	*blobDownload `json:"-"`
														
 
															 }
														
@@ -72,6 +78,13 @@ func (p *blobDownloadPart) StopsAt() int64 {
 
															 	return p.Offset + p.Size
														
 
															 }
														
 
															+func (p *blobDownloadPart) Write(b []byte) (n int, err error) {
														
 
															+	n = len(b)
														
 
															+	p.blobDownload.Completed.Add(int64(n))
														
 
															+	p.lastUpdated = time.Now()
														
 
															+	return n, nil
														
 
															+}
														
 
															+
														
 
															 func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
														
 
															 	partFilePaths, err := filepath.Glob(b.Name + "-partial-*")
														
 
															 	if err != nil {
														
@@ -157,6 +170,9 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 
															 				case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
														
 
															 					// return immediately if the context is canceled or the device is out of space
														
 
															 					return err
														
 
															+				case errors.Is(err, errPartStalled):
														
 
															+					try--
														
 
															+					continue
														
 
															 				case err != nil:
														
 
															 					sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
														
 
															 					slog.Info(fmt.Sprintf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep))
														
@@ -195,28 +211,54 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
 
															 }
														
 
															 func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w io.Writer, part *blobDownloadPart, opts *RegistryOptions) error {
														
 
															-	headers := make(http.Header)
														
 
															-	headers.Set("Range", fmt.Sprintf("bytes=%d-%d", part.StartsAt(), part.StopsAt()-1))
														
 
															-	resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, opts)
														
 
															-	if err != nil {
														
 
															-		return err
														
 
															-	}
														
 
															-	defer resp.Body.Close()
														
 
															+	g, ctx := errgroup.WithContext(ctx)
														
 
															+	g.Go(func() error {
														
 
															+		headers := make(http.Header)
														
 
															+		headers.Set("Range", fmt.Sprintf("bytes=%d-%d", part.StartsAt(), part.StopsAt()-1))
														
 
															+		resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, opts)
														
 
															+		if err != nil {
														
 
															+			return err
														
 
															+		}
														
 
															+		defer resp.Body.Close()
														
 
															-	n, err := io.Copy(w, io.TeeReader(resp.Body, b))
														
 
															-	if err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, io.ErrUnexpectedEOF) {
														
 
															-		// rollback progress
														
 
															-		b.Completed.Add(-n)
														
 
															-		return err
														
 
															-	}
														
 
															+		n, err := io.Copy(w, io.TeeReader(resp.Body, part))
														
 
															+		if err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, io.ErrUnexpectedEOF) {
														
 
															+			// rollback progress
														
 
															+			b.Completed.Add(-n)
														
 
															+			return err
														
 
															+		}
														
 
															-	part.Completed += n
														
 
															-	if err := b.writePart(part.Name(), part); err != nil {
														
 
															+		part.Completed += n
														
 
															+		if err := b.writePart(part.Name(), part); err != nil {
														
 
															+			return err
														
 
															+		}
														
 
															+
														
 
															+		// return nil or context.Canceled or UnexpectedEOF (resumable)
														
 
															 		return err
														
 
															-	}
														
 
															+	})
														
 
															+
														
 
															+	g.Go(func() error {
														
 
															+		ticker := time.NewTicker(time.Second)
														
 
															+		for {
														
 
															+			select {
														
 
															+			case <-ticker.C:
														
 
															+				if part.Completed >= part.Size {
														
 
															+					return nil
														
 
															+				}
														
 
															+
														
 
															+				if !part.lastUpdated.IsZero() && time.Since(part.lastUpdated) > 5*time.Second {
														
 
															+					slog.Info(fmt.Sprintf("%s part %d stalled; retrying", b.Digest[7:19], part.N))
														
 
															+					// reset last updated
														
 
															+					part.lastUpdated = time.Time{}
														
 
															+					return errPartStalled
														
 
															+				}
														
 
															+			case <-ctx.Done():
														
 
															+				return ctx.Err()
														
 
															+			}
														
 
															+		}
														
 
															+	})
														
 
															-	// return nil or context.Canceled or UnexpectedEOF (resumable)
														
 
															-	return err
														
 
															+	return g.Wait()
														
 
															 }
														
 
															 func (b *blobDownload) newPart(offset, size int64) error {
														
@@ -255,12 +297,6 @@ func (b *blobDownload) writePart(partName string, part *blobDownloadPart) error
 
															 	return json.NewEncoder(partFile).Encode(part)
														
 
															 }
														
 
															-func (b *blobDownload) Write(p []byte) (n int, err error) {
														
 
															-	n = len(p)
														
 
															-	b.Completed.Add(int64(n))
														
 
															-	return n, nil
														
 
															-}
														
 
															-
														
 
															 func (b *blobDownload) acquire() {
														
 
															 	b.references.Add(1)
														
 
															 }
														
@@ -279,20 +315,19 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 
															 	for {
														
 
															 		select {
														
 
															 		case <-ticker.C:
														
 
															+			fn(api.ProgressResponse{
														
 
															+				Status:    fmt.Sprintf("pulling %s", b.Digest[7:19]),
														
 
															+				Digest:    b.Digest,
														
 
															+				Total:     b.Total,
														
 
															+				Completed: b.Completed.Load(),
														
 
															+			})
														
 
															+
														
 
															+			if b.done || b.err != nil {
														
 
															+				return b.err
														
 
															+			}
														
 
															 		case <-ctx.Done():
														
 
															 			return ctx.Err()
														
 
															 		}
														
 
															-
														
 
															-		fn(api.ProgressResponse{
														
 
															-			Status:    fmt.Sprintf("pulling %s", b.Digest[7:19]),
														
 
															-			Digest:    b.Digest,
														
 
															-			Total:     b.Total,
														
 
															-			Completed: b.Completed.Load(),
														
 
															-		})
														
 
															-
														
 
															-		if b.done || b.err != nil {
														
 
															-			return b.err
														
 
															-		}
														
 
															 	}
														
 
															 }
														
@@ -303,10 +338,6 @@ type downloadOpts struct {
 
															 	fn      func(api.ProgressResponse)
														
 
															 }
														
 
															-const maxRetries = 6
														
 
															-
														
 
															-var errMaxRetriesExceeded = errors.New("max retries exceeded")
														
 
															-
														
 
															 // downloadBlob downloads a blob from the registry and stores it in the blobs directory
														
 
															 func downloadBlob(ctx context.Context, opts downloadOpts) error {
														
 
															 	fp, err := GetBlobsPath(opts.digest)
														
--- a/server/images.go
+++ b/server/images.go
@@ -41,7 +41,7 @@ type Model struct {
 
															 	Config         ConfigV2
														
 
															 	ShortName      string
														
 
															 	ModelPath      string
														
 
															-	OriginalModel  string
														
 
															+	ParentModel    string
														
 
															 	AdapterPaths   []string
														
 
															 	ProjectorPaths []string
														
 
															 	Template       string
														
@@ -50,6 +50,12 @@ type Model struct {
 
															 	Digest         string
														
 
															 	Size           int64
														
 
															 	Options        map[string]interface{}
														
 
															+	Messages       []Message
														
 
															+}
														
 
															+
														
 
															+type Message struct {
														
 
															+	Role    string `json:"role"`
														
 
															+	Content string `json:"content"`
														
 
															 }
														
 
															 type PromptVars struct {
														
@@ -333,7 +339,7 @@ func GetModel(name string) (*Model, error) {
 
															 		switch layer.MediaType {
														
 
															 		case "application/vnd.ollama.image.model":
														
 
															 			model.ModelPath = filename
														
 
															-			model.OriginalModel = layer.From
														
 
															+			model.ParentModel = layer.From
														
 
															 		case "application/vnd.ollama.image.embed":
														
 
															 			// Deprecated in versions  > 0.1.2
														
 
															 			// TODO: remove this warning in a future version
														
@@ -374,6 +380,16 @@ func GetModel(name string) (*Model, error) {
 
															 			if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
														
 
															 				return nil, err
														
 
															 			}
														
 
															+		case "application/vnd.ollama.image.messages":
														
 
															+			msgs, err := os.Open(filename)
														
 
															+			if err != nil {
														
 
															+				return nil, err
														
 
															+			}
														
 
															+			defer msgs.Close()
														
 
															+
														
 
															+			if err = json.NewDecoder(msgs).Decode(&model.Messages); err != nil {
														
 
															+				return nil, err
														
 
															+			}
														
 
															 		case "application/vnd.ollama.image.license":
														
 
															 			bts, err := os.ReadFile(filename)
														
 
															 			if err != nil {
														
@@ -428,12 +444,12 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
 
															 	}
														
 
															 	var layers Layers
														
 
															+	messages := []string{}
														
 
															 	params := make(map[string][]string)
														
 
															 	fromParams := make(map[string]any)
														
 
															 	for _, c := range commands {
														
 
															-		slog.Info(fmt.Sprintf("[%s] - %s", c.Name, c.Args))
														
 
															 		mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
														
 
															 		switch c.Name {
														
@@ -607,11 +623,37 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
 
															 			}
														
 
															 			layers.Replace(layer)
														
 
															+		case "message":
														
 
															+			messages = append(messages, c.Args)
														
 
															 		default:
														
 
															 			params[c.Name] = append(params[c.Name], c.Args)
														
 
															 		}
														
 
															 	}
														
 
															+	if len(messages) > 0 {
														
 
															+		fn(api.ProgressResponse{Status: "creating parameters layer"})
														
 
															+
														
 
															+		msgs := make([]api.Message, 0)
														
 
															+
														
 
															+		for _, m := range messages {
														
 
															+			// todo: handle images
														
 
															+			msg := strings.SplitN(m, ": ", 2)
														
 
															+			msgs = append(msgs, api.Message{Role: msg[0], Content: msg[1]})
														
 
															+		}
														
 
															+
														
 
															+		var b bytes.Buffer
														
 
															+		if err := json.NewEncoder(&b).Encode(msgs); err != nil {
														
 
															+			return err
														
 
															+		}
														
 
															+
														
 
															+		layer, err := NewLayer(&b, "application/vnd.ollama.image.messages")
														
 
															+		if err != nil {
														
 
															+			return err
														
 
															+		}
														
 
															+
														
 
															+		layers.Replace(layer)
														
 
															+	}
														
 
															+
														
 
															 	if len(params) > 0 {
														
 
															 		fn(api.ProgressResponse{Status: "creating parameters layer"})
														
@@ -908,8 +950,8 @@ func ShowModelfile(model *Model) (string, error) {
 
															 	mt.Model = model
														
 
															 	mt.From = model.ModelPath
														
 
															-	if model.OriginalModel != "" {
														
 
															-		mt.From = model.OriginalModel
														
 
															+	if model.ParentModel != "" {
														
 
															+		mt.From = model.ParentModel
														
 
															 	}
														
 
															 	modelFile := `# Modelfile generated by "ollama show"
														
--- a/server/routes.go
+++ b/server/routes.go
@@ -186,7 +186,13 @@ func GenerateHandler(c *gin.Context) {
 
															 		return
														
 
															 	}
														
 
															-	sessionDuration := defaultSessionDuration
														
 
															+	var sessionDuration time.Duration
														
 
															+	if req.KeepAlive == nil {
														
 
															+		sessionDuration = defaultSessionDuration
														
 
															+	} else {
														
 
															+		sessionDuration = req.KeepAlive.Duration
														
 
															+	}
														
 
															+
														
 
															 	if err := load(c, model, opts, sessionDuration); err != nil {
														
 
															 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															 		return
														
@@ -378,7 +384,14 @@ func EmbeddingHandler(c *gin.Context) {
 
															 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															 		return
														
 
															 	}
														
 
															-	sessionDuration := defaultSessionDuration
														
 
															+
														
 
															+	var sessionDuration time.Duration
														
 
															+	if req.KeepAlive == nil {
														
 
															+		sessionDuration = defaultSessionDuration
														
 
															+	} else {
														
 
															+		sessionDuration = req.KeepAlive.Duration
														
 
															+	}
														
 
															+
														
 
															 	if err := load(c, model, opts, sessionDuration); err != nil {
														
 
															 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															 		return
														
@@ -659,6 +672,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 
															 	}
														
 
															 	modelDetails := api.ModelDetails{
														
 
															+		ParentModel:       model.ParentModel,
														
 
															 		Format:            model.Config.ModelFormat,
														
 
															 		Family:            model.Config.ModelFamily,
														
 
															 		Families:          model.Config.ModelFamilies,
														
@@ -674,11 +688,17 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 
															 		model.Template = req.Template
														
 
															 	}
														
 
															+	msgs := make([]api.Message, 0)
														
 
															+	for _, msg := range model.Messages {
														
 
															+		msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content})
														
 
															+	}
														
 
															+
														
 
															 	resp := &api.ShowResponse{
														
 
															 		License:  strings.Join(model.License, "\n"),
														
 
															 		System:   model.System,
														
 
															 		Template: model.Template,
														
 
															 		Details:  modelDetails,
														
 
															+		Messages: msgs,
														
 
															 	}
														
 
															 	var params []string
														
@@ -1067,7 +1087,14 @@ func ChatHandler(c *gin.Context) {
 
															 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															 		return
														
 
															 	}
														
 
															-	sessionDuration := defaultSessionDuration
														
 
															+
														
 
															+	var sessionDuration time.Duration
														
 
															+	if req.KeepAlive == nil {
														
 
															+		sessionDuration = defaultSessionDuration
														
 
															+	} else {
														
 
															+		sessionDuration = req.KeepAlive.Duration
														
 
															+	}
														
 
															+
														
 
															 	if err := load(c, model, opts, sessionDuration); err != nil {
														
 
															 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															 		return
														
@@ -1075,7 +1102,13 @@ func ChatHandler(c *gin.Context) {
 
															 	// an empty request loads the model
														
 
															 	if len(req.Messages) == 0 {
														
 
															-		c.JSON(http.StatusOK, api.ChatResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true, Message: api.Message{Role: "assistant"}})
														
 
															+		resp := api.ChatResponse{
														
 
															+			CreatedAt: time.Now().UTC(),
														
 
															+			Model:     req.Model,
														
 
															+			Done:      true,
														
 
															+			Message:   api.Message{Role: "assistant"},
														
 
															+		}
														
 
															+		c.JSON(http.StatusOK, resp)
														
 
															 		return
														
 
															 	}
	`@@ -1 +1 @@`
	`-Subproject commit 011e8ec577fd135cbc02993d3ea9840c516d6a1c`
			`+Subproject commit cd4fddb29f81d6a1f6d51a0c016bc6b486d68def`