11 months ago · e1dfc757b3
--- a/llama/example/main.go
+++ b/llama/example/main.go
@@ -6,6 +6,7 @@ import (
 
				 	"io"
			
 
				 	"log"
			
 
				 	"os"
			
 
				+	"runtime"
			
 
				 	"strings"
			
 
				 
			
 
				 	"github.com/ollama/ollama/llama"
			
@@ -28,9 +29,11 @@ func main() {
 
				 
			
 
				 	// load the model
			
 
				 	llama.BackendInit()
			
 
				-	params := llama.NewModelParams()
			
 
				+	params := llama.NewModelParams(999, 0, func(p float32) {
			
 
				+		fmt.Printf("loading... %f\n", p)
			
 
				+	})
			
 
				 	model := llama.LoadModelFromFile(*mpath, params)
			
 
				-	ctxParams := llama.NewContextParams()
			
 
				+	ctxParams := llama.NewContextParams(2048, runtime.NumCPU(), false)
			
 
				 
			
 
				 	// language model context
			
 
				 	lc := llama.NewContextWithModel(model, ctxParams)
			
@@ -65,7 +68,7 @@ func main() {
 
				 			panic("prompt must contain exactly one <image>")
			
 
				 		}
			
 
				 
			
 
				-		beforeTokens, err := lc.Model().Tokenize(parts[0], 2048, true, true)
			
 
				+		beforeTokens, err := lc.Model().Tokenize(parts[0], true, true)
			
 
				 		if err != nil {
			
 
				 			panic(err)
			
 
				 		}
			
@@ -82,7 +85,7 @@ func main() {
 
				 
			
 
				 		llama.LlavaEvalImageEmbed(lc, embedding, 512, &nPast)
			
 
				 
			
 
				-		afterTokens, err := lc.Model().Tokenize(parts[1], 2048, true, true)
			
 
				+		afterTokens, err := lc.Model().Tokenize(parts[1], true, true)
			
 
				 		if err != nil {
			
 
				 			panic(err)
			
 
				 		}
			
@@ -92,7 +95,7 @@ func main() {
 
				 			nPast++
			
 
				 		}
			
 
				 	} else {
			
 
				-		tokens, err := lc.Model().Tokenize(*prompt, 2048, true, true)
			
 
				+		tokens, err := lc.Model().Tokenize(*prompt, true, true)
			
 
				 		if err != nil {
			
 
				 			panic(err)
			
 
				 		}
			
--- a/llm/filetype.go
+++ b/llm/filetype.go
@@ -2,10 +2,10 @@ package llm
 
				 
			
 
				 import "fmt"
			
 
				 
			
 
				-type FileType uint32
			
 
				+type fileType uint32
			
 
				 
			
 
				 const (
			
 
				-	fileTypeF32 FileType = iota
			
 
				+	fileTypeF32 fileType = iota
			
 
				 	fileTypeF16
			
 
				 	fileTypeQ4_0
			
 
				 	fileTypeQ4_1
			
@@ -41,7 +41,7 @@ const (
 
				 	fileTypeUnknown
			
 
				 )
			
 
				 
			
 
				-func ParseFileType(s string) (FileType, error) {
			
 
				+func ParseFileType(s string) (fileType, error) {
			
 
				 	switch s {
			
 
				 	case "F32":
			
 
				 		return fileTypeF32, nil
			
@@ -108,7 +108,7 @@ func ParseFileType(s string) (FileType, error) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-func (t FileType) String() string {
			
 
				+func (t fileType) String() string {
			
 
				 	switch t {
			
 
				 	case fileTypeF32:
			
 
				 		return "F32"
			
@@ -175,6 +175,6 @@ func (t FileType) String() string {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-func (t FileType) Value() uint32 {
			
 
				+func (t fileType) Value() uint32 {
			
 
				 	return uint32(t)
			
 
				 }
			
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -100,4 +100,4 @@ esac
 
				 
			
 
				 cleanup
			
 
				 wait_for_compress
			
 
				-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
 
				+echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -58,6 +58,19 @@ init_vars
 
				 git_module_setup
			
 
				 apply_patches
			
 
				 
			
 
				+init_vars
			
 
				+if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
			
 
				+    # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
			
 
				+    # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
			
 
				+    # Static build for linking into the Go binary
			
 
				+    init_vars
			
 
				+    CMAKE_TARGETS="--target llama --target ggml"
			
 
				+    CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
			
 
				+    BUILD_DIR="../build/linux/${ARCH}_static"
			
 
				+    echo "Building static library"
			
 
				+    build
			
 
				+fi
			
 
				+
			
 
				 init_vars
			
 
				 if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
			
 
				     # Users building from source can tune the exact flags we pass to cmake for configuring
			
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -177,6 +177,39 @@ function cleanup {
 
				 # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
			
 
				 # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
			
 
				 
			
 
				+
			
 
				+function build_static() {
			
 
				+    if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
			
 
				+        # GCC build for direct linking into the Go binary
			
 
				+        init_vars
			
 
				+        # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
			
 
				+        # as we need this to be compiled by gcc for golang to be able to link with itx
			
 
				+        write-host "Checking for MinGW..."
			
 
				+        # error action ensures we exit on failure
			
 
				+        get-command gcc
			
 
				+        get-command mingw32-make
			
 
				+        $oldTargets = $script:cmakeTargets
			
 
				+        $script:cmakeTargets = @("llama", "ggml")
			
 
				+        $script:cmakeDefs = @(
			
 
				+            "-G", "MinGW Makefiles"
			
 
				+            "-DCMAKE_C_COMPILER=gcc.exe",
			
 
				+            "-DCMAKE_CXX_COMPILER=g++.exe",
			
 
				+            "-DBUILD_SHARED_LIBS=off",
			
 
				+            "-DLLAMA_NATIVE=off",
			
 
				+            "-DLLAMA_AVX=off",
			
 
				+            "-DLLAMA_AVX2=off",
			
 
				+            "-DLLAMA_AVX512=off",
			
 
				+            "-DLLAMA_F16C=off",
			
 
				+            "-DLLAMA_FMA=off")
			
 
				+        $script:buildDir="../build/windows/${script:ARCH}_static"
			
 
				+        write-host "Building static library"
			
 
				+        build
			
 
				+        $script:cmakeTargets = $oldTargets
			
 
				+    } else {
			
 
				+        write-host "Skipping CPU generation step as requested"
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 function build_cpu($gen_arch) {
			
 
				     if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
			
 
				         # remaining llama.cpp builds use MSVC 
			
@@ -364,6 +397,7 @@ init_vars
 
				 if ($($args.count) -eq 0) {
			
 
				     git_module_setup
			
 
				     apply_patches
			
 
				+    build_static
			
 
				     if ($script:ARCH -eq "arm64") {
			
 
				         build_cpu("ARM64")
			
 
				     } else { # amd64
			
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -55,9 +55,9 @@ func (kv KV) ParameterCount() uint64 {
 
				 	return kv.u64("general.parameter_count")
			
 
				 }
			
 
				 
			
 
				-func (kv KV) FileType() FileType {
			
 
				+func (kv KV) FileType() fileType {
			
 
				 	if u64 := kv.u64("general.file_type"); u64 > 0 {
			
 
				-		return FileType(uint32(u64))
			
 
				+		return fileType(uint32(u64))
			
 
				 	}
			
 
				 
			
 
				 	return fileTypeUnknown
			
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -0,0 +1,39 @@
 
				+package llm
			
 
				+
			
 
				+// #cgo CFLAGS: -Illama.cpp
			
 
				+// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
			
 
				+// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
			
 
				+// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
			
 
				+// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
			
 
				+// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
			
 
				+// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
			
 
				+// #include <stdlib.h>
			
 
				+// #include "llama.h"
			
 
				+import "C"
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"unsafe"
			
 
				+)
			
 
				+
			
 
				+// SystemInfo is an unused example of calling llama.cpp functions using CGo
			
 
				+func SystemInfo() string {
			
 
				+	return C.GoString(C.llama_print_system_info())
			
 
				+}
			
 
				+
			
 
				+func Quantize(infile, outfile string, ftype fileType) error {
			
 
				+	cinfile := C.CString(infile)
			
 
				+	defer C.free(unsafe.Pointer(cinfile))
			
 
				+
			
 
				+	coutfile := C.CString(outfile)
			
 
				+	defer C.free(unsafe.Pointer(coutfile))
			
 
				+
			
 
				+	params := C.llama_model_quantize_default_params()
			
 
				+	params.nthread = -1
			
 
				+	params.ftype = ftype.Value()
			
 
				+
			
 
				+	if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
			
 
				+		return fmt.Errorf("llama_model_quantize: %d", rc)
			
 
				+	}
			
 
				+
			
 
				+	return nil
			
 
				+}
			
--- a/server/images.go
+++ b/server/images.go
@@ -26,7 +26,6 @@ import (
 
				 	"github.com/ollama/ollama/auth"
			
 
				 	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/format"
			
 
				-	"github.com/ollama/ollama/llama"
			
 
				 	"github.com/ollama/ollama/llm"
			
 
				 	"github.com/ollama/ollama/parser"
			
 
				 	"github.com/ollama/ollama/template"
			
@@ -454,7 +453,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
 
				 						defer temp.Close()
			
 
				 						defer os.Remove(temp.Name())
			
 
				 
			
 
				-						if err := llama.Quantize(blob, temp.Name(), want); err != nil {
			
 
				+						if err := llm.Quantize(blob, temp.Name(), want); err != nil {
			
 
				 							return err
			
 
				 						}