1 år sedan · ea4c284a48
--- a/.github/ISSUE_TEMPLATE/10_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/10_bug_report.yml
@@ -0,0 +1,60 @@
 
															+name: Bug report
														
 
															+labels: [bug]
														
 
															+description: Something isn't working right.
														
 
															+body:
														
 
															+  - type: textarea
														
 
															+    id: description
														
 
															+    attributes:
														
 
															+      label: What is the issue?
														
 
															+      description: What happened? What did you expect to happen?
														
 
															+    validations:
														
 
															+      required: true
														
 
															+  - type: dropdown
														
 
															+    id: os
														
 
															+    attributes:
														
 
															+      label: OS
														
 
															+      description: Which operating system are you using?
														
 
															+      multiple: true
														
 
															+      options:
														
 
															+        - Linux
														
 
															+        - macOS
														
 
															+        - Windows
														
 
															+        - Docker
														
 
															+        - WSL2
														
 
															+    validations:
														
 
															+      required: false
														
 
															+  - type: dropdown
														
 
															+    id: gpu
														
 
															+    attributes:
														
 
															+      label: GPU
														
 
															+      description: Which GPU are you using?
														
 
															+      multiple: true
														
 
															+      options:
														
 
															+        - Nvidia
														
 
															+        - AMD
														
 
															+        - Intel
														
 
															+        - Apple
														
 
															+        - Other
														
 
															+    validations:
														
 
															+      required: false
														
 
															+  - type: dropdown
														
 
															+    id: cpu
														
 
															+    attributes:
														
 
															+      label: CPU
														
 
															+      description: Which CPU are you using?
														
 
															+      multiple: true
														
 
															+      options:
														
 
															+        - Intel
														
 
															+        - AMD
														
 
															+        - Apple
														
 
															+        - Other
														
 
															+    validations:
														
 
															+      required: false
														
 
															+  - type: input
														
 
															+    id: version
														
 
															+    attributes:
														
 
															+      label: Ollama version
														
 
															+      description: What version of Ollama are you using? (`ollama --version`)
														
 
															+      placeholder: e.g., 0.1.32
														
 
															+    validations:
														
 
															+      required: false
														
--- a/.github/ISSUE_TEMPLATE/10_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/10_model_request.yml
@@ -1,18 +0,0 @@
 
															-name: Model request
														
 
															-description: Request a new model for the library
														
 
															-labels: [mr]
														
 
															-body:
														
 
															-  - type: markdown
														
 
															-    attributes:
														
 
															-      value: |
														
 
															-        Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
														
 
															-        Tell us about which Model you'd like to see in the library!
														
 
															-  - type: textarea
														
 
															-    id: problem
														
 
															-    attributes:
														
 
															-      label: What model would you like?
														
 
															-      description: Please provide a link to the model.
														
 
															-  - type: markdown
														
 
															-    attributes:
														
 
															-      value: |
														
 
															-        Thanks for filing a model request!
														
--- a/.github/ISSUE_TEMPLATE/20_feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/20_feature_request.yml
@@ -1,41 +1,11 @@
 
															 name: Feature request
														
 
															-description: Propose a new feature
														
 
															-labels: [needs-triage, fr]
														
 
															+labels: ['feature request']
														
 
															+description: Request a new feature.
														
 
															 body:
														
 
															-  - type: markdown
														
 
															-    attributes:
														
 
															-      value: |
														
 
															-        Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
														
 
															-        Tell us about your idea!
														
 
															   - type: textarea
														
 
															     id: problem
														
 
															     attributes:
														
 
															-      label: What are you trying to do?
														
 
															+      label: What new feature would you like to see?
														
 
															       description: Tell us about the problem you're trying to solve.
														
 
															     validations:
														
 
															       required: false
														
 
															-  - type: textarea
														
 
															-    id: solution
														
 
															-    attributes:
														
 
															-      label: How should we solve this?
														
 
															-      description: If you have an idea of how you'd like to see this feature work, let us know.
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: textarea
														
 
															-    id: alternative
														
 
															-    attributes:
														
 
															-      label: What is the impact of not solving this?
														
 
															-      description: (How) Are you currently working around the issue?
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: textarea
														
 
															-    id: context
														
 
															-    attributes:
														
 
															-      label: Anything else?
														
 
															-      description: Any additional context to share, e.g., links
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: markdown
														
 
															-    attributes:
														
 
															-      value: |
														
 
															-        Thanks for filing a feature request!
														
--- a/.github/ISSUE_TEMPLATE/30_model_request.yml
+++ b/.github/ISSUE_TEMPLATE/30_model_request.yml
@@ -0,0 +1,9 @@
 
															+name: Model request
														
 
															+labels: ['model request']
														
 
															+description: Request a new model.
														
 
															+body:
														
 
															+  - type: textarea
														
 
															+    id: problem
														
 
															+    attributes:
														
 
															+      label: What model would you like?
														
 
															+      description: Please provide a link to the model.
														
--- a/.github/ISSUE_TEMPLATE/90_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/90_bug_report.yml
@@ -1,125 +0,0 @@
 
															-name: Bug report
														
 
															-description: File a bug report. If you need help, please join our Discord server.
														
 
															-labels: [needs-triage, bug]
														
 
															-body:
														
 
															-  - type: markdown
														
 
															-    attributes:
														
 
															-      value: |
														
 
															-        Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
														
 
															-  - type: textarea
														
 
															-    id: what-happened
														
 
															-    attributes:
														
 
															-      label: What is the issue?
														
 
															-      description: What happened? What did you expect to happen?
														
 
															-    validations:
														
 
															-      required: true
														
 
															-  - type: textarea
														
 
															-    id: what-was-expected
														
 
															-    attributes:
														
 
															-      label: What did you expect to see?
														
 
															-      description: What did you expect to see/happen instead?
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: textarea
														
 
															-    id: steps
														
 
															-    attributes:
														
 
															-      label: Steps to reproduce
														
 
															-      description: What are the steps you took that hit this issue?
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: textarea
														
 
															-    id: changes
														
 
															-    attributes:
														
 
															-      label: Are there any recent changes that introduced the issue?
														
 
															-      description: If so, what are those changes?
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: dropdown
														
 
															-    id: os
														
 
															-    attributes:
														
 
															-      label: OS
														
 
															-      description: What OS are you using? You may select more than one.
														
 
															-      multiple: true
														
 
															-      options:
														
 
															-        - Linux
														
 
															-        - macOS
														
 
															-        - Windows
														
 
															-        - Other
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: dropdown
														
 
															-    id: architecture
														
 
															-    attributes:
														
 
															-      label: Architecture
														
 
															-      description: What architecture are you using? You may select more than one.
														
 
															-      multiple: true
														
 
															-      options:
														
 
															-        - arm64
														
 
															-        - amd64
														
 
															-        - x86
														
 
															-        - Other
														
 
															-  - type: dropdown
														
 
															-    id: platform
														
 
															-    attributes:
														
 
															-      label: Platform
														
 
															-      description: What platform are you using? You may select more than one.
														
 
															-      multiple: true
														
 
															-      options:
														
 
															-        - Docker
														
 
															-        - WSL
														
 
															-        - WSL2
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: input
														
 
															-    id: ollama-version
														
 
															-    attributes:
														
 
															-      label: Ollama version
														
 
															-      description: What Ollama version are you using? (`ollama --version`)
														
 
															-      placeholder: e.g., 1.14.4
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: dropdown
														
 
															-    id: gpu
														
 
															-    attributes:
														
 
															-      label: GPU
														
 
															-      description: What GPU, if any, are you using? You may select more than one.
														
 
															-      multiple: true
														
 
															-      options:
														
 
															-        - Nvidia
														
 
															-        - AMD
														
 
															-        - Intel
														
 
															-        - Apple
														
 
															-        - Other
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: textarea
														
 
															-    id: gpu-info
														
 
															-    attributes:
														
 
															-      label: GPU info
														
 
															-      description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: dropdown
														
 
															-    id: cpu
														
 
															-    attributes:
														
 
															-      label: CPU
														
 
															-      description: What CPU are you using? You may select more than one.
														
 
															-      multiple: true
														
 
															-      options:
														
 
															-        - Intel
														
 
															-        - AMD
														
 
															-        - Apple
														
 
															-        - Other
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: textarea
														
 
															-    id: other-software
														
 
															-    attributes:
														
 
															-      label: Other software
														
 
															-      description: What other software are you using that might be related to this issue?
														
 
															-    validations:
														
 
															-      required: false
														
 
															-  - type: markdown
														
 
															-    attributes:
														
 
															-      value: |
														
 
															-        Thanks for filing a bug report!
														
--- a/README.md
+++ b/README.md
@@ -60,7 +60,6 @@ Here are some example models that can be downloaded:
 
															 | Llama 2 13B        | 13B        | 7.3GB | `ollama run llama2:13b`        |
														
 
															 | Llama 2 70B        | 70B        | 39GB  | `ollama run llama2:70b`        |
														
 
															 | Orca Mini          | 3B         | 1.9GB | `ollama run orca-mini`         |
														
 
															-| Vicuna             | 7B         | 3.8GB | `ollama run vicuna`            |
														
 
															 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
														
 
															 | Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
														
 
															 | Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
														
@@ -378,3 +377,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
															 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
														
 
															 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
														
 
															 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
														
 
															+
														
 
															+### Supported backends 
														
 
															+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
														
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -164,7 +164,8 @@ func (ts Tensors) Layers() map[string]Layer {
 
															 	for _, t := range ts {
														
 
															 		parts := strings.Split(t.Name, ".")
														
 
															 		if parts[0] == "blk" {
														
 
															-			parts = parts[1:]
														
 
															+			// join first and second part, e.g. blk.%d
														
 
															+			parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
														
 
															 		}
														
 
															 		if _, ok := layers[parts[0]]; !ok {
														
--- a/llm/server.go
+++ b/llm/server.go
@@ -97,7 +97,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
															 	var layerCount int
														
 
															 	layers := ggml.Tensors().Layers()
														
 
															 	for i := 0; i < int(ggml.KV().BlockCount()); i++ {
														
 
															-		memoryLayer := layers[fmt.Sprintf("%d", i)].size()
														
 
															+		memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
														
 
															 		// KV is proportional to the number of layers
														
 
															 		memoryLayer += kv / ggml.KV().BlockCount()
														
@@ -109,7 +109,14 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
															 		}
														
 
															 	}
														
 
															-	memoryLayerOutput := layers["output"].size()
														
 
															+	var memoryLayerOutput uint64
														
 
															+	for k, v := range layers {
														
 
															+		if !strings.HasPrefix(k, "blk.") {
														
 
															+			slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
														
 
															+			memoryLayerOutput += v.size()
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	memoryRequiredTotal += memoryLayerOutput
														
 
															 	if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
														
@@ -124,16 +131,47 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
															 		opts.NumGPU = layerCount
														
 
															 	}
														
 
															+	memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
														
 
															+
														
 
															 	slog.Info(
														
 
															 		"offload to gpu",
														
 
															-		"reallayers", opts.NumGPU,
														
 
															-		"layers", layerCount,
														
 
															-		"required", format.HumanBytes2(memoryRequiredTotal),
														
 
															-		"used", format.HumanBytes2(memoryRequiredPartial),
														
 
															-		"available", format.HumanBytes2(memoryAvailable),
														
 
															-		"kv", format.HumanBytes2(kv),
														
 
															-		"fulloffload", format.HumanBytes2(graphFullOffload),
														
 
															-		"partialoffload", format.HumanBytes2(graphPartialOffload),
														
 
															+		slog.Group(
														
 
															+			"layers",
														
 
															+			// actual number of layers offloaded
														
 
															+			"real", opts.NumGPU,
														
 
															+			// estimated number of layers that can be offloaded
														
 
															+			"estimate", layerCount,
														
 
															+		),
														
 
															+		slog.Group(
														
 
															+			"memory",
														
 
															+			// memory available for offloading
														
 
															+			"available", format.HumanBytes2(memoryAvailable),
														
 
															+			slog.Group(
														
 
															+				"required",
														
 
															+				// memory required for full offloading
														
 
															+				"full", format.HumanBytes2(memoryRequiredTotal),
														
 
															+				// memory required to offload layers.estimate layers
														
 
															+				"partial", format.HumanBytes2(memoryRequiredPartial),
														
 
															+				// memory of KV cache
														
 
															+				"kv", format.HumanBytes2(kv),
														
 
															+			),
														
 
															+			slog.Group(
														
 
															+				"weights",
														
 
															+				// memory of the weights
														
 
															+				"total", format.HumanBytes2(memoryWeights),
														
 
															+				// memory of repeating layers
														
 
															+				"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
														
 
															+				// memory of non-repeating layers
														
 
															+				"nonrepeating", format.HumanBytes2(memoryLayerOutput),
														
 
															+			),
														
 
															+			slog.Group(
														
 
															+				"graph",
														
 
															+				// memory of graph when fully offloaded
														
 
															+				"full", format.HumanBytes2(graphFullOffload),
														
 
															+				// memory of graph when not fully offloaded
														
 
															+				"partial", format.HumanBytes2(graphPartialOffload),
														
 
															+			),
														
 
															+		),
														
 
															 	)
														
 
															 	if len(adapters) > 1 {