1 year ago · ea4c284a48
--- a/.github/ISSUE_TEMPLATE/10_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/10_bug_report.yml
@@ -0,0 +1,60 @@
 
				+name: Bug report
			
 
				+labels: [bug]
			
 
				+description: Something isn't working right.
			
 
				+body:
			
 
				+  - type: textarea
			
 
				+    id: description
			
 
				+    attributes:
			
 
				+      label: What is the issue?
			
 
				+      description: What happened? What did you expect to happen?
			
 
				+    validations:
			
 
				+      required: true
			
 
				+  - type: dropdown
			
 
				+    id: os
			
 
				+    attributes:
			
 
				+      label: OS
			
 
				+      description: Which operating system are you using?
			
 
				+      multiple: true
			
 
				+      options:
			
 
				+        - Linux
			
 
				+        - macOS
			
 
				+        - Windows
			
 
				+        - Docker
			
 
				+        - WSL2
			
 
				+    validations:
			
 
				+      required: false
			
 
				+  - type: dropdown
			
 
				+    id: gpu
			
 
				+    attributes:
			
 
				+      label: GPU
			
 
				+      description: Which GPU are you using?
			
 
				+      multiple: true
			
 
				+      options:
			
 
				+        - Nvidia
			
 
				+        - AMD
			
 
				+        - Intel
			
 
				+        - Apple
			
 
				+        - Other
			
 
				+    validations:
			
 
				+      required: false
			
 
				+  - type: dropdown
			
 
				+    id: cpu
			
 
				+    attributes:
			
 
				+      label: CPU
			
 
				+      description: Which CPU are you using?
			
 
				+      multiple: true
			
 
				+      options:
			
 
				+        - Intel
			
 
				+        - AMD
			
 
				+        - Apple
			
 
				+        - Other
			
 
				+    validations:
			
 
				+      required: false
			
 
				+  - type: input
			
 
				+    id: version
			
 
				+    attributes:
			
 
				+      label: Ollama version
			
 
				+      description: What version of Ollama are you using? (`ollama --version`)
			
 
				+      placeholder: e.g., 0.1.32
			
 
				+    validations:
			
 
				+      required: false
			
--- a/.github/ISSUE_TEMPLATE/10_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/10_model_request.yml
@@ -1,18 +0,0 @@
 
				-name: Model request
			
 
				-description: Request a new model for the library
			
 
				-labels: [mr]
			
 
				-body:
			
 
				-  - type: markdown
			
 
				-    attributes:
			
 
				-      value: |
			
 
				-        Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
			
 
				-        Tell us about which Model you'd like to see in the library!
			
 
				-  - type: textarea
			
 
				-    id: problem
			
 
				-    attributes:
			
 
				-      label: What model would you like?
			
 
				-      description: Please provide a link to the model.
			
 
				-  - type: markdown
			
 
				-    attributes:
			
 
				-      value: |
			
 
				-        Thanks for filing a model request!
			
--- a/.github/ISSUE_TEMPLATE/20_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.yml
@@ -1,41 +1,11 @@
 
				 name: Feature request
			
 
				-description: Propose a new feature
			
 
				-labels: [needs-triage, fr]
			
 
				+labels: ['feature request']
			
 
				+description: Request a new feature.
			
 
				 body:
			
 
				-  - type: markdown
			
 
				-    attributes:
			
 
				-      value: |
			
 
				-        Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
			
 
				-        Tell us about your idea!
			
 
				   - type: textarea
			
 
				     id: problem
			
 
				     attributes:
			
 
				-      label: What are you trying to do?
			
 
				+      label: What new feature would you like to see?
			
 
				       description: Tell us about the problem you're trying to solve.
			
 
				     validations:
			
 
				       required: false
			
 
				-  - type: textarea
			
 
				-    id: solution
			
 
				-    attributes:
			
 
				-      label: How should we solve this?
			
 
				-      description: If you have an idea of how you'd like to see this feature work, let us know.
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: textarea
			
 
				-    id: alternative
			
 
				-    attributes:
			
 
				-      label: What is the impact of not solving this?
			
 
				-      description: (How) Are you currently working around the issue?
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: textarea
			
 
				-    id: context
			
 
				-    attributes:
			
 
				-      label: Anything else?
			
 
				-      description: Any additional context to share, e.g., links
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: markdown
			
 
				-    attributes:
			
 
				-      value: |
			
 
				-        Thanks for filing a feature request!
			
--- a/.github/ISSUE_TEMPLATE/30_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/30_model_request.yml
@@ -0,0 +1,9 @@
 
				+name: Model request
			
 
				+labels: ['model request']
			
 
				+description: Request a new model.
			
 
				+body:
			
 
				+  - type: textarea
			
 
				+    id: problem
			
 
				+    attributes:
			
 
				+      label: What model would you like?
			
 
				+      description: Please provide a link to the model.
			
--- a/.github/ISSUE_TEMPLATE/90_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/90_bug_report.yml
@@ -1,125 +0,0 @@
 
				-name: Bug report
			
 
				-description: File a bug report. If you need help, please join our Discord server.
			
 
				-labels: [needs-triage, bug]
			
 
				-body:
			
 
				-  - type: markdown
			
 
				-    attributes:
			
 
				-      value: |
			
 
				-        Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
			
 
				-  - type: textarea
			
 
				-    id: what-happened
			
 
				-    attributes:
			
 
				-      label: What is the issue?
			
 
				-      description: What happened? What did you expect to happen?
			
 
				-    validations:
			
 
				-      required: true
			
 
				-  - type: textarea
			
 
				-    id: what-was-expected
			
 
				-    attributes:
			
 
				-      label: What did you expect to see?
			
 
				-      description: What did you expect to see/happen instead?
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: textarea
			
 
				-    id: steps
			
 
				-    attributes:
			
 
				-      label: Steps to reproduce
			
 
				-      description: What are the steps you took that hit this issue?
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: textarea
			
 
				-    id: changes
			
 
				-    attributes:
			
 
				-      label: Are there any recent changes that introduced the issue?
			
 
				-      description: If so, what are those changes?
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: dropdown
			
 
				-    id: os
			
 
				-    attributes:
			
 
				-      label: OS
			
 
				-      description: What OS are you using? You may select more than one.
			
 
				-      multiple: true
			
 
				-      options:
			
 
				-        - Linux
			
 
				-        - macOS
			
 
				-        - Windows
			
 
				-        - Other
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: dropdown
			
 
				-    id: architecture
			
 
				-    attributes:
			
 
				-      label: Architecture
			
 
				-      description: What architecture are you using? You may select more than one.
			
 
				-      multiple: true
			
 
				-      options:
			
 
				-        - arm64
			
 
				-        - amd64
			
 
				-        - x86
			
 
				-        - Other
			
 
				-  - type: dropdown
			
 
				-    id: platform
			
 
				-    attributes:
			
 
				-      label: Platform
			
 
				-      description: What platform are you using? You may select more than one.
			
 
				-      multiple: true
			
 
				-      options:
			
 
				-        - Docker
			
 
				-        - WSL
			
 
				-        - WSL2
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: input
			
 
				-    id: ollama-version
			
 
				-    attributes:
			
 
				-      label: Ollama version
			
 
				-      description: What Ollama version are you using? (`ollama --version`)
			
 
				-      placeholder: e.g., 1.14.4
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: dropdown
			
 
				-    id: gpu
			
 
				-    attributes:
			
 
				-      label: GPU
			
 
				-      description: What GPU, if any, are you using? You may select more than one.
			
 
				-      multiple: true
			
 
				-      options:
			
 
				-        - Nvidia
			
 
				-        - AMD
			
 
				-        - Intel
			
 
				-        - Apple
			
 
				-        - Other
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: textarea
			
 
				-    id: gpu-info
			
 
				-    attributes:
			
 
				-      label: GPU info
			
 
				-      description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: dropdown
			
 
				-    id: cpu
			
 
				-    attributes:
			
 
				-      label: CPU
			
 
				-      description: What CPU are you using? You may select more than one.
			
 
				-      multiple: true
			
 
				-      options:
			
 
				-        - Intel
			
 
				-        - AMD
			
 
				-        - Apple
			
 
				-        - Other
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: textarea
			
 
				-    id: other-software
			
 
				-    attributes:
			
 
				-      label: Other software
			
 
				-      description: What other software are you using that might be related to this issue?
			
 
				-    validations:
			
 
				-      required: false
			
 
				-  - type: markdown
			
 
				-    attributes:
			
 
				-      value: |
			
 
				-        Thanks for filing a bug report!
			
--- a/README.md
+++ b/README.md
@@ -60,7 +60,6 @@ Here are some example models that can be downloaded:
 
				 | Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        |
			
 
				 | Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        |
			
 
				 | Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         |
			
 
				-| Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            |
			
 
				 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
			
 
				 | Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
			
 
				 | Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
			
@@ -378,3 +377,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
			
 
				 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
			
 
				 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
			
 
				+
			
 
				+### Supported backends 
			
 
				+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
			
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -164,7 +164,8 @@ func (ts Tensors) Layers() map[string]Layer {
 
				 	for _, t := range ts {
			
 
				 		parts := strings.Split(t.Name, ".")
			
 
				 		if parts[0] == "blk" {
			
 
				-			parts = parts[1:]
			
 
				+			// join first and second part, e.g. blk.%d
			
 
				+			parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
			
 
				 		}
			
 
				 
			
 
				 		if _, ok := layers[parts[0]]; !ok {
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -97,7 +97,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
				 	var layerCount int
			
 
				 	layers := ggml.Tensors().Layers()
			
 
				 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
			
 
				-		memoryLayer := layers[fmt.Sprintf("%d", i)].size()
			
 
				+		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
			
 
				 
			
 
				 		// KV is proportional to the number of layers
			
 
				 		memoryLayer += kv / ggml.KV().BlockCount()
			
@@ -109,7 +109,14 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	memoryLayerOutput := layers["output"].size()
			
 
				+	var memoryLayerOutput uint64
			
 
				+	for k, v := range layers {
			
 
				+		if !strings.HasPrefix(k, "blk.") {
			
 
				+			slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
			
 
				+			memoryLayerOutput += v.size()
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	memoryRequiredTotal += memoryLayerOutput
			
 
				 
			
 
				 	if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
			
@@ -124,16 +131,47 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
				 		opts.NumGPU = layerCount
			
 
				 	}
			
 
				 
			
 
				+	memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
			
 
				+
			
 
				 	slog.Info(
			
 
				 		"offload to gpu",
			
 
				-		"reallayers", opts.NumGPU,
			
 
				-		"layers", layerCount,
			
 
				-		"required", format.HumanBytes2(memoryRequiredTotal),
			
 
				-		"used", format.HumanBytes2(memoryRequiredPartial),
			
 
				-		"available", format.HumanBytes2(memoryAvailable),
			
 
				-		"kv", format.HumanBytes2(kv),
			
 
				-		"fulloffload", format.HumanBytes2(graphFullOffload),
			
 
				-		"partialoffload", format.HumanBytes2(graphPartialOffload),
			
 
				+		slog.Group(
			
 
				+			"layers",
			
 
				+			// actual number of layers offloaded
			
 
				+			"real", opts.NumGPU,
			
 
				+			// estimated number of layers that can be offloaded
			
 
				+			"estimate", layerCount,
			
 
				+		),
			
 
				+		slog.Group(
			
 
				+			"memory",
			
 
				+			// memory available for offloading
			
 
				+			"available", format.HumanBytes2(memoryAvailable),
			
 
				+			slog.Group(
			
 
				+				"required",
			
 
				+				// memory required for full offloading
			
 
				+				"full", format.HumanBytes2(memoryRequiredTotal),
			
 
				+				// memory required to offload layers.estimate layers
			
 
				+				"partial", format.HumanBytes2(memoryRequiredPartial),
			
 
				+				// memory of KV cache
			
 
				+				"kv", format.HumanBytes2(kv),
			
 
				+			),
			
 
				+			slog.Group(
			
 
				+				"weights",
			
 
				+				// memory of the weights
			
 
				+				"total", format.HumanBytes2(memoryWeights),
			
 
				+				// memory of repeating layers
			
 
				+				"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
			
 
				+				// memory of non-repeating layers
			
 
				+				"nonrepeating", format.HumanBytes2(memoryLayerOutput),
			
 
				+			),
			
 
				+			slog.Group(
			
 
				+				"graph",
			
 
				+				// memory of graph when fully offloaded
			
 
				+				"full", format.HumanBytes2(graphFullOffload),
			
 
				+				// memory of graph when not fully offloaded
			
 
				+				"partial", format.HumanBytes2(graphPartialOffload),
			
 
				+			),
			
 
				+		),
			
 
				 	)
			
 
				 
			
 
				 	if len(adapters) > 1 {