4 meses atrás · 8c9fb8eb73
--- a/model/imageproc/images.go
+++ b/model/imageproc/images.go
@@ -0,0 +1,111 @@
 
				+package imageproc
			
 
				+
			
 
				+import (
			
 
				+	"image"
			
 
				+	"image/color"
			
 
				+
			
 
				+	"golang.org/x/image/draw"
			
 
				+)
			
 
				+
			
 
				+var (
			
 
				+	ImageNetDefaultMean  = [3]float32{0.485, 0.456, 0.406}
			
 
				+	ImageNetDefaultSTD   = [3]float32{0.229, 0.224, 0.225}
			
 
				+	ImageNetStandardMean = [3]float32{0.5, 0.5, 0.5}
			
 
				+	ImageNetStandardSTD  = [3]float32{0.5, 0.5, 0.5}
			
 
				+	ClipDefaultMean      = [3]float32{0.48145466, 0.4578275, 0.40821073}
			
 
				+	ClipDefaultSTD       = [3]float32{0.26862954, 0.26130258, 0.27577711}
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	ResizeBilinear = iota
			
 
				+	ResizeNearestNeighbor
			
 
				+	ResizeApproxBilinear
			
 
				+	ResizeCatmullrom
			
 
				+)
			
 
				+
			
 
				+// Composite returns an image with the alpha channel removed by drawing over a white background.
			
 
				+func Composite(img image.Image) image.Image {
			
 
				+	dst := image.NewRGBA(img.Bounds())
			
 
				+
			
 
				+	white := color.RGBA{255, 255, 255, 255}
			
 
				+	draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
			
 
				+	draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
			
 
				+
			
 
				+	return dst
			
 
				+}
			
 
				+
			
 
				+// Resize returns an image which has been scaled to a new size.
			
 
				+func Resize(img image.Image, newSize image.Point, method int) image.Image {
			
 
				+	dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
			
 
				+
			
 
				+	kernels := map[int]draw.Interpolator{
			
 
				+		ResizeBilinear:        draw.BiLinear,
			
 
				+		ResizeNearestNeighbor: draw.NearestNeighbor,
			
 
				+		ResizeApproxBilinear:  draw.ApproxBiLinear,
			
 
				+		ResizeCatmullrom:      draw.CatmullRom,
			
 
				+	}
			
 
				+
			
 
				+	kernel, ok := kernels[method]
			
 
				+	if !ok {
			
 
				+		panic("no resizing method found")
			
 
				+	}
			
 
				+
			
 
				+	kernel.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil)
			
 
				+
			
 
				+	return dst
			
 
				+}
			
 
				+
			
 
				+// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
			
 
				+func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
			
 
				+	var pixelVals []float32
			
 
				+
			
 
				+	bounds := img.Bounds()
			
 
				+	if channelFirst {
			
 
				+		var rVals, gVals, bVals []float32
			
 
				+		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
			
 
				+			for x := bounds.Min.X; x < bounds.Max.X; x++ {
			
 
				+				c := img.At(x, y)
			
 
				+				r, g, b, _ := c.RGBA()
			
 
				+				var rVal, gVal, bVal float32
			
 
				+				if rescale {
			
 
				+					rVal = float32(r>>8) / 255.0
			
 
				+					gVal = float32(g>>8) / 255.0
			
 
				+					bVal = float32(b>>8) / 255.0
			
 
				+				}
			
 
				+
			
 
				+				rVal = (rVal - mean[0]) / std[0]
			
 
				+				gVal = (gVal - mean[1]) / std[1]
			
 
				+				bVal = (bVal - mean[2]) / std[2]
			
 
				+
			
 
				+				rVals = append(rVals, rVal)
			
 
				+				gVals = append(gVals, gVal)
			
 
				+				bVals = append(bVals, bVal)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		pixelVals = append(pixelVals, rVals...)
			
 
				+		pixelVals = append(pixelVals, gVals...)
			
 
				+		pixelVals = append(pixelVals, bVals...)
			
 
				+	} else {
			
 
				+		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
			
 
				+			for x := bounds.Min.X; x < bounds.Max.X; x++ {
			
 
				+				c := img.At(x, y)
			
 
				+				r, g, b, _ := c.RGBA()
			
 
				+				var rVal, gVal, bVal float32
			
 
				+				if rescale {
			
 
				+					rVal = float32(r>>8) / 255.0
			
 
				+					gVal = float32(g>>8) / 255.0
			
 
				+					bVal = float32(b>>8) / 255.0
			
 
				+				}
			
 
				+
			
 
				+				rVal = (rVal - mean[0]) / std[0]
			
 
				+				gVal = (gVal - mean[1]) / std[1]
			
 
				+				bVal = (bVal - mean[2]) / std[2]
			
 
				+
			
 
				+				pixelVals = append(pixelVals, rVal, gVal, bVal)
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return pixelVals
			
 
				+}
			
--- a/model/imageproc/images_test.go
+++ b/model/imageproc/images_test.go
@@ -0,0 +1,177 @@
 
				+package imageproc
			
 
				+
			
 
				+import (
			
 
				+	"image"
			
 
				+	"image/color"
			
 
				+	"image/draw"
			
 
				+	"reflect"
			
 
				+	"testing"
			
 
				+)
			
 
				+
			
 
				+func createImage(width, height int, fillCol color.RGBA) image.Image {
			
 
				+	img := image.NewRGBA(image.Rect(0, 0, width, height))
			
 
				+	draw.Draw(img, img.Bounds(), &image.Uniform{fillCol}, image.Point{}, draw.Src)
			
 
				+	return img
			
 
				+}
			
 
				+
			
 
				+func TestComposite(t *testing.T) {
			
 
				+	tests := []struct {
			
 
				+		name         string
			
 
				+		img          image.Image
			
 
				+		expectedRGBA color.RGBA
			
 
				+	}{
			
 
				+		{
			
 
				+			name:         "Transparent image",
			
 
				+			img:          createImage(5, 5, color.RGBA{0, 0, 0, 0}),
			
 
				+			expectedRGBA: color.RGBA{255, 255, 255, 255},
			
 
				+		},
			
 
				+		{
			
 
				+			name:         "Solid red image",
			
 
				+			img:          createImage(5, 5, color.RGBA{255, 0, 0, 255}),
			
 
				+			expectedRGBA: color.RGBA{255, 0, 0, 255},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, tt := range tests {
			
 
				+		t.Run(tt.name, func(t *testing.T) {
			
 
				+			resultImg := Composite(tt.img)
			
 
				+
			
 
				+			// Check the pixel values in the resulting image
			
 
				+			for x := range resultImg.Bounds().Dx() {
			
 
				+				for y := range resultImg.Bounds().Dy() {
			
 
				+					r, g, b, a := resultImg.At(x, y).RGBA()
			
 
				+					expectedR, expectedG, expectedB, expectedA := tt.expectedRGBA.RGBA()
			
 
				+
			
 
				+					if r != expectedR || g != expectedG || b != expectedB || a != expectedA {
			
 
				+						t.Errorf("Pixel mismatch at (%d, %d): got (%d, %d, %d, %d), want (%d, %d, %d, %d)",
			
 
				+							x, y, r, g, b, a, expectedR, expectedG, expectedB, expectedA)
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestResize(t *testing.T) {
			
 
				+	tests := []struct {
			
 
				+		name     string
			
 
				+		img      image.Image
			
 
				+		newSize  image.Point
			
 
				+		method   int
			
 
				+		expected image.Point
			
 
				+	}{
			
 
				+		{
			
 
				+			name:     "Resize with bilinear interpolation",
			
 
				+			img:      createImage(5, 5, color.RGBA{255, 0, 0, 255}),
			
 
				+			newSize:  image.Point{10, 10},
			
 
				+			method:   ResizeBilinear,
			
 
				+			expected: image.Point{10, 10},
			
 
				+		},
			
 
				+		{
			
 
				+			name:     "Resize with nearest neighbor",
			
 
				+			img:      createImage(10, 10, color.RGBA{0, 255, 0, 255}),
			
 
				+			newSize:  image.Point{5, 5},
			
 
				+			method:   ResizeNearestNeighbor,
			
 
				+			expected: image.Point{5, 5},
			
 
				+		},
			
 
				+		{
			
 
				+			name:     "Resize with catmullrom",
			
 
				+			img:      createImage(1024, 1024, color.RGBA{0, 0, 255, 255}),
			
 
				+			newSize:  image.Point{10, 10},
			
 
				+			method:   ResizeCatmullrom,
			
 
				+			expected: image.Point{10, 10},
			
 
				+		},
			
 
				+		{
			
 
				+			name:     "Resize with approx bilinear",
			
 
				+			img:      createImage(1024, 768, color.RGBA{100, 100, 100, 255}),
			
 
				+			newSize:  image.Point{4, 3},
			
 
				+			method:   ResizeApproxBilinear,
			
 
				+			expected: image.Point{4, 3},
			
 
				+		},
			
 
				+	}
			
 
				+	for _, tt := range tests {
			
 
				+		t.Run(tt.name, func(t *testing.T) {
			
 
				+			resizedImg := Resize(tt.img, tt.newSize, tt.method)
			
 
				+
			
 
				+			if resizedImg.Bounds().Dx() != tt.expected.X || resizedImg.Bounds().Dy() != tt.expected.Y {
			
 
				+				t.Errorf("Unexpected size for resized image: got (%d, %d), want (%d, %d)",
			
 
				+					resizedImg.Bounds().Dx(), resizedImg.Bounds().Dy(), tt.expected.X, tt.expected.Y)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestResizeInvalidMethod(t *testing.T) {
			
 
				+	defer func() {
			
 
				+		if r := recover(); r == nil {
			
 
				+			t.Errorf("Expected panic for invalid resizing method, but did not panic")
			
 
				+		}
			
 
				+	}()
			
 
				+
			
 
				+	img := createImage(10, 10, color.RGBA{0, 0, 0, 255})
			
 
				+	Resize(img, image.Point{5, 5}, -1)
			
 
				+}
			
 
				+
			
 
				+func TestNormalize(t *testing.T) {
			
 
				+	tests := []struct {
			
 
				+		name         string
			
 
				+		img          image.Image
			
 
				+		mean         [3]float32
			
 
				+		std          [3]float32
			
 
				+		rescale      bool
			
 
				+		channelFirst bool
			
 
				+		expected     []float32
			
 
				+	}{
			
 
				+		{
			
 
				+			name:         "Rescale with channel first",
			
 
				+			img:          createImage(2, 2, color.RGBA{128, 128, 128, 255}),
			
 
				+			mean:         ImageNetStandardMean,
			
 
				+			std:          ImageNetStandardSTD,
			
 
				+			rescale:      true,
			
 
				+			channelFirst: true,
			
 
				+			expected: []float32{
			
 
				+				0.003921628, 0.003921628, 0.003921628, 0.003921628, // R values
			
 
				+				0.003921628, 0.003921628, 0.003921628, 0.003921628, // G values
			
 
				+				0.003921628, 0.003921628, 0.003921628, 0.003921628, // B values
			
 
				+			},
			
 
				+		},
			
 
				+		{
			
 
				+			name:         "Rescale without channel first",
			
 
				+			img:          createImage(2, 2, color.RGBA{255, 0, 0, 255}),
			
 
				+			mean:         [3]float32{0.0, 0.0, 0.0},
			
 
				+			std:          [3]float32{1.0, 1.0, 1.0},
			
 
				+			rescale:      true,
			
 
				+			channelFirst: false,
			
 
				+			expected: []float32{
			
 
				+				1.0, 0.0, 0.0,
			
 
				+				1.0, 0.0, 0.0,
			
 
				+				1.0, 0.0, 0.0,
			
 
				+				1.0, 0.0, 0.0,
			
 
				+			},
			
 
				+		},
			
 
				+		{
			
 
				+			name:         "No rescale with mean/std adjustment",
			
 
				+			img:          createImage(2, 2, color.RGBA{100, 150, 200, 255}),
			
 
				+			mean:         ClipDefaultMean,
			
 
				+			std:          ClipDefaultSTD,
			
 
				+			rescale:      false,
			
 
				+			channelFirst: false,
			
 
				+			expected: []float32{
			
 
				+				-1.7922626, -1.7520971, -1.4802198,
			
 
				+				-1.7922626, -1.7520971, -1.4802198,
			
 
				+				-1.7922626, -1.7520971, -1.4802198,
			
 
				+				-1.7922626, -1.7520971, -1.4802198,
			
 
				+			},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, tt := range tests {
			
 
				+		t.Run(tt.name, func(t *testing.T) {
			
 
				+			result := Normalize(tt.img, tt.mean, tt.std, tt.rescale, tt.channelFirst)
			
 
				+
			
 
				+			if !reflect.DeepEqual(result, tt.expected) {
			
 
				+				t.Errorf("Test %s failed: got %v, want %v", tt.name, result, tt.expected)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
--- a/server/imageproc/images.go
+++ b/server/imageproc/images.go
@@ -1,19 +1,20 @@
 
				-package imageproc
			
 
				+package mllama
			
 
				 
			
 
				 import (
			
 
				-	"bytes"
			
 
				 	"fmt"
			
 
				 	"image"
			
 
				-	"image/color"
			
 
				 	_ "image/jpeg"
			
 
				 	_ "image/png"
			
 
				+	"io"
			
 
				 	"math"
			
 
				 	"slices"
			
 
				 
			
 
				 	"golang.org/x/image/draw"
			
 
				+
			
 
				+	"github.com/ollama/ollama/model/imageproc"
			
 
				 )
			
 
				 
			
 
				-func GetSupportedAspectRatios(maxTiles int) []image.Point {
			
 
				+func getSupportedAspectRatios(maxTiles int) []image.Point {
			
 
				 	ratios := []image.Point{}
			
 
				 
			
 
				 	for w := range maxTiles {
			
@@ -37,28 +38,8 @@ func clip(a, a_min, a_max int) int {
 
				 	return a
			
 
				 }
			
 
				 
			
 
				-func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
			
 
				-	targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
			
 
				-	targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
			
 
				-
			
 
				-	scaleWidth := float64(targetWidth) / float64(imageSize.X)
			
 
				-	scaleHeight := float64(targetHeight) / float64(imageSize.Y)
			
 
				-
			
 
				-	var w, h int
			
 
				-
			
 
				-	if scaleWidth < scaleHeight {
			
 
				-		w = targetWidth
			
 
				-		h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
			
 
				-	} else {
			
 
				-		w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
			
 
				-		h = targetHeight
			
 
				-	}
			
 
				-
			
 
				-	return image.Point{w, h}
			
 
				-}
			
 
				-
			
 
				 func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
			
 
				-	possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
			
 
				+	possibleTileArrangements := getSupportedAspectRatios(maxImageTiles)
			
 
				 	possibleCanvasSizes := []image.Point{}
			
 
				 	for _, pta := range possibleTileArrangements {
			
 
				 		possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
			
@@ -113,41 +94,29 @@ func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
 
				 	return selectedCanvas
			
 
				 }
			
 
				 
			
 
				-func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
			
 
				-	b := img.Bounds()
			
 
				-	width := b.Max.X - b.Min.X
			
 
				-	height := b.Max.Y - b.Min.Y
			
 
				-	tileHeight := height / numTilesSize.Y
			
 
				-	tileWidth := width / numTilesSize.X
			
 
				-
			
 
				-	images := []image.Image{}
			
 
				-
			
 
				-	for h := range numTilesSize.Y {
			
 
				-		for w := range numTilesSize.X {
			
 
				-			rect := image.Rect(tileWidth*w, tileHeight*h, tileWidth*(w+1), tileHeight*(h+1))
			
 
				-			images = append(images, img.(interface {
			
 
				-				SubImage(image.Rectangle) image.Image
			
 
				-			}).SubImage(rect))
			
 
				-		}
			
 
				-	}
			
 
				+func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
			
 
				+	targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
			
 
				+	targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
			
 
				 
			
 
				-	return images
			
 
				-}
			
 
				+	scaleWidth := float64(targetWidth) / float64(imageSize.X)
			
 
				+	scaleHeight := float64(targetHeight) / float64(imageSize.Y)
			
 
				 
			
 
				-// remove the "alpha" channel by drawing over a prefilled image
			
 
				-func compositeImage(img image.Image) image.Image {
			
 
				-	dst := image.NewRGBA(img.Bounds())
			
 
				+	var w, h int
			
 
				 
			
 
				-	white := color.RGBA{255, 255, 255, 255}
			
 
				-	draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
			
 
				-	draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
			
 
				+	if scaleWidth < scaleHeight {
			
 
				+		w = targetWidth
			
 
				+		h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
			
 
				+	} else {
			
 
				+		w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
			
 
				+		h = targetHeight
			
 
				+	}
			
 
				 
			
 
				-	return dst
			
 
				+	return image.Point{w, h}
			
 
				 }
			
 
				 
			
 
				-func ResizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
			
 
				+func resizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
			
 
				 	if format == "png" {
			
 
				-		img = compositeImage(img)
			
 
				+		img = imageproc.Composite(img)
			
 
				 	}
			
 
				 
			
 
				 	b := img.Bounds()
			
@@ -157,19 +126,10 @@ func ResizeImage(img image.Image, format string, outputSize image.Point, maxImag
 
				 	aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
			
 
				 	newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
			
 
				 
			
 
				-	dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
			
 
				-
			
 
				-	// scaling choices:
			
 
				-	//   NearestNeighbor	fast, blocky output
			
 
				-	//   ApproxBiLinear	fast, medium quality
			
 
				-	//   BiLinear		slow, high quality
			
 
				-	//   CatmullRom		very slow, very high quality
			
 
				-	draw.BiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
			
 
				-
			
 
				-	return dst, aspectRatio
			
 
				+	return imageproc.Resize(img, newSize, imageproc.ResizeBilinear), aspectRatio
			
 
				 }
			
 
				 
			
 
				-func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
			
 
				+func padImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
			
 
				 	paddedSize := image.Point{
			
 
				 		X: outputSize.X * aspectRatio.X,
			
 
				 		Y: outputSize.Y * aspectRatio.Y,
			
@@ -181,60 +141,61 @@ func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image
 
				 	return dst
			
 
				 }
			
 
				 
			
 
				-func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
			
 
				+func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
			
 
				+	b := img.Bounds()
			
 
				+	width := b.Max.X - b.Min.X
			
 
				+	height := b.Max.Y - b.Min.Y
			
 
				+	tileHeight := height / numTilesSize.Y
			
 
				+	tileWidth := width / numTilesSize.X
			
 
				+
			
 
				+	images := []image.Image{}
			
 
				+
			
 
				+	for h := range numTilesSize.Y {
			
 
				+		for w := range numTilesSize.X {
			
 
				+			rect := image.Rect(tileWidth*w, tileHeight*h, tileWidth*(w+1), tileHeight*(h+1))
			
 
				+			images = append(images, img.(interface {
			
 
				+				SubImage(image.Rectangle) image.Image
			
 
				+			}).SubImage(rect))
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return images
			
 
				+}
			
 
				+
			
 
				+func packImages(img image.Image, aspectRatio image.Point) []float32 {
			
 
				 	subImages := splitToTiles(img, aspectRatio)
			
 
				 
			
 
				 	var pixelVals []float32
			
 
				 
			
 
				+	rescale := true
			
 
				+	channelFirst := true
			
 
				+
			
 
				 	for _, subImg := range subImages {
			
 
				-		bounds := subImg.Bounds()
			
 
				-		var rVals, gVals, bVals []float32
			
 
				-		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
			
 
				-			for x := bounds.Min.X; x < bounds.Max.X; x++ {
			
 
				-				c := subImg.At(x, y)
			
 
				-				r, g, b, _ := c.RGBA()
			
 
				-				rVal := float32(r>>8) / 255.0
			
 
				-				gVal := float32(g>>8) / 255.0
			
 
				-				bVal := float32(b>>8) / 255.0
			
 
				-
			
 
				-				rVal = (rVal - mean[0]) / std[0]
			
 
				-				gVal = (gVal - mean[1]) / std[1]
			
 
				-				bVal = (bVal - mean[2]) / std[2]
			
 
				-
			
 
				-				rVals = append(rVals, rVal)
			
 
				-				gVals = append(gVals, gVal)
			
 
				-				bVals = append(bVals, bVal)
			
 
				-			}
			
 
				-		}
			
 
				-		pixelVals = append(pixelVals, rVals...)
			
 
				-		pixelVals = append(pixelVals, gVals...)
			
 
				-		pixelVals = append(pixelVals, bVals...)
			
 
				+		vals := imageproc.Normalize(subImg, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, rescale, channelFirst)
			
 
				+		pixelVals = append(pixelVals, vals...)
			
 
				 	}
			
 
				 
			
 
				 	return pixelVals
			
 
				 }
			
 
				 
			
 
				-func Preprocess(imageData []byte) ([]float32, int, error) {
			
 
				-	// todo: need guard in here for bad image data
			
 
				-
			
 
				-	// mllama values
			
 
				+func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
			
 
				 	outputSize := image.Point{560, 560}
			
 
				 	maxTiles := 4
			
 
				 
			
 
				-	// clip values
			
 
				-	mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
			
 
				-	std := [3]float32{0.26862954, 0.26130258, 0.27577711}
			
 
				-
			
 
				-	img, format, err := image.Decode(bytes.NewReader(imageData))
			
 
				+	img, format, err := image.Decode(imageData)
			
 
				 	if err != nil {
			
 
				-		return nil, 0, fmt.Errorf("failed to decode image: %w", err)
			
 
				+		return nil, nil, fmt.Errorf("failed to decode image: %w", err)
			
 
				 	}
			
 
				 
			
 
				-	newImage, aspectRatio := ResizeImage(img, format, outputSize, maxTiles)
			
 
				-	newImage = PadImage(newImage, outputSize, aspectRatio)
			
 
				+	newImage, aspectRatio := resizeImage(img, format, outputSize, maxTiles)
			
 
				+	newImage = padImage(newImage, outputSize, aspectRatio)
			
 
				 
			
 
				-	data := PackImages(newImage, aspectRatio, mean, std)
			
 
				-	aspectRatioIndex := slices.Index(GetSupportedAspectRatios(maxTiles), aspectRatio) + 1
			
 
				+	data := packImages(newImage, aspectRatio)
			
 
				+	aspectRatioIndex := slices.Index(getSupportedAspectRatios(maxTiles), aspectRatio) + 1
			
 
				+
			
 
				+	opts := map[string]any{
			
 
				+		"aspectRatioIndex": aspectRatioIndex,
			
 
				+	}
			
 
				 
			
 
				-	return data, aspectRatioIndex, nil
			
 
				+	return data, opts, nil
			
 
				 }
			
--- a/server/imageproc/images_test.go
+++ b/server/imageproc/images_test.go
@@ -1,4 +1,4 @@
 
				-package imageproc
			
 
				+package mllama
			
 
				 
			
 
				 import (
			
 
				 	"bytes"
			
@@ -35,7 +35,7 @@ func TestAspectRatios(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actual := GetSupportedAspectRatios(c.MaxTiles)
			
 
				+		actual := getSupportedAspectRatios(c.MaxTiles)
			
 
				 
			
 
				 		if diff := cmp.Diff(actual, c.Expected); diff != "" {
			
 
				 			t.Errorf("mismatch (-got +want):\n%s", diff)
			
@@ -299,7 +299,7 @@ func TestResize(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actualImage, actualAspectRatio := ResizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
			
 
				+		actualImage, actualAspectRatio := resizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
			
 
				 
			
 
				 		if actualImage.Bounds() != c.ExpectedImage.Bounds() {
			
 
				 			t.Errorf("image size incorrect: '%#v': expected: '%#v'", actualImage.Bounds(), c.ExpectedImage.Bounds())
			
@@ -329,7 +329,7 @@ func TestPad(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actual := PadImage(c.TestImage, c.OutputSize, c.AspectRatio)
			
 
				+		actual := padImage(c.TestImage, c.OutputSize, c.AspectRatio)
			
 
				 
			
 
				 		if actual.Bounds() != c.Expected.Bounds() {
			
 
				 			t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
			
@@ -344,9 +344,6 @@ func TestPackImages(t *testing.T) {
 
				 		ExpectedVals int
			
 
				 	}
			
 
				 
			
 
				-	mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
			
 
				-	std := [3]float32{0.26862954, 0.26130258, 0.27577711}
			
 
				-
			
 
				 	cases := []packCase{
			
 
				 		{
			
 
				 			TestImage:    image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
			
@@ -366,7 +363,7 @@ func TestPackImages(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actualVals := PackImages(c.TestImage, c.AspectRatio, mean, std)
			
 
				+		actualVals := packImages(c.TestImage, c.AspectRatio)
			
 
				 		if len(actualVals) != c.ExpectedVals {
			
 
				 			t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
			
 
				 		}
			
@@ -400,7 +397,7 @@ func TestPreprocess(t *testing.T) {
 
				 			t.Fatal(err)
			
 
				 		}
			
 
				 
			
 
				-		imgData, aspectRatioID, err := Preprocess(buf.Bytes())
			
 
				+		imgData, opts, err := Preprocess(&buf)
			
 
				 		if err != nil {
			
 
				 			t.Fatalf("error processing: %q", err)
			
 
				 		}
			
@@ -409,6 +406,13 @@ func TestPreprocess(t *testing.T) {
 
				 			t.Errorf("no image data returned")
			
 
				 		}
			
 
				 
			
 
				+		ar, ok := opts["aspectRatioIndex"]
			
 
				+		if !ok {
			
 
				+			t.Fatalf("no aspect ratio found")
			
 
				+		}
			
 
				+
			
 
				+		aspectRatioID := ar.(int)
			
 
				+
			
 
				 		if aspectRatioID != c.ExpectedAspectRatioID {
			
 
				 			t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
			
 
				 		}
			
--- a/model/pixtral/imageproc.go
+++ b/model/pixtral/imageproc.go
@@ -0,0 +1,68 @@
 
				+package pixtral
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"image"
			
 
				+	_ "image/jpeg"
			
 
				+	_ "image/png"
			
 
				+	"io"
			
 
				+	"math"
			
 
				+
			
 
				+	"github.com/ollama/ollama/model/imageproc"
			
 
				+)
			
 
				+
			
 
				+func getNumImageTokens(imageSize, patchSize image.Point) image.Point {
			
 
				+	return image.Point{
			
 
				+		(imageSize.X-1)/patchSize.X + 1,
			
 
				+		(imageSize.Y-1)/patchSize.Y + 1,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func getResizeOutputImageSize(img image.Image, longestEdge int, patchSize image.Point) image.Point {
			
 
				+	b := img.Bounds()
			
 
				+	le := float64(longestEdge)
			
 
				+	ratio := math.Max(float64(b.Max.Y)/le, float64(b.Max.X)/le)
			
 
				+
			
 
				+	newSize := img.Bounds().Max
			
 
				+
			
 
				+	if ratio > 1.0 {
			
 
				+		newSize = image.Point{
			
 
				+			int(math.Ceil(float64(b.Max.X) / ratio)),
			
 
				+			int(math.Ceil(float64(b.Max.Y) / ratio)),
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	tokens := getNumImageTokens(newSize, patchSize)
			
 
				+	return image.Point{
			
 
				+		tokens.X * patchSize.X,
			
 
				+		tokens.Y * patchSize.Y,
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func resizeImage(img image.Image, format string, longestEdge int, patchSize image.Point) image.Image {
			
 
				+	if format == "png" {
			
 
				+		img = imageproc.Composite(img)
			
 
				+	}
			
 
				+
			
 
				+	newSize := getResizeOutputImageSize(img, longestEdge, patchSize)
			
 
				+
			
 
				+	// todo should be ResizeBicubic, but it doesn't exist
			
 
				+	return imageproc.Resize(img, newSize, imageproc.ResizeBilinear)
			
 
				+}
			
 
				+
			
 
				+func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
			
 
				+	img, format, err := image.Decode(imageData)
			
 
				+	if err != nil {
			
 
				+		return nil, nil, fmt.Errorf("failed to decode image: %w", err)
			
 
				+	}
			
 
				+
			
 
				+	longestEdge := 1024
			
 
				+	patchSize := image.Point{16, 16}
			
 
				+
			
 
				+	img = resizeImage(img, format, longestEdge, patchSize)
			
 
				+
			
 
				+	data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
			
 
				+
			
 
				+	opts := map[string]any{}
			
 
				+	return data, opts, nil
			
 
				+}
			
--- a/model/pixtral/imageproc_test.go
+++ b/model/pixtral/imageproc_test.go
@@ -0,0 +1,219 @@
 
				+package pixtral
			
 
				+
			
 
				+import (
			
 
				+	"bytes"
			
 
				+	"encoding/binary"
			
 
				+	"image"
			
 
				+	"image/png"
			
 
				+	"math"
			
 
				+	"os"
			
 
				+	"testing"
			
 
				+
			
 
				+	"github.com/google/go-cmp/cmp"
			
 
				+)
			
 
				+
			
 
				+func TestGetNumImageTokens(t *testing.T) {
			
 
				+	type numImageTokensCase struct {
			
 
				+		ImageSize image.Point
			
 
				+		PatchSize image.Point
			
 
				+		Expected  image.Point
			
 
				+	}
			
 
				+
			
 
				+	cases := []numImageTokensCase{
			
 
				+		{
			
 
				+			ImageSize: image.Point{1024, 764},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{64, 48},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{800, 600},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{50, 38},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{640, 480},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{40, 30},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{320, 200},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{20, 13},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{1320, 200},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{83, 13},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{2000, 200},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{125, 13},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{10000, 200},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{625, 13},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{1131, 577},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{71, 37},
			
 
				+		},
			
 
				+		{
			
 
				+			ImageSize: image.Point{16, 16},
			
 
				+			PatchSize: image.Point{16, 16},
			
 
				+			Expected:  image.Point{1, 1},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		actual := getNumImageTokens(c.ImageSize, c.PatchSize)
			
 
				+
			
 
				+		if diff := cmp.Diff(actual, c.Expected); diff != "" {
			
 
				+			t.Errorf("mismatch (-got +want):\n%s", diff)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestGetResizeOutputImageSize(t *testing.T) {
			
 
				+	type resizeCase struct {
			
 
				+		Image       image.Image
			
 
				+		LongestEdge int
			
 
				+		PatchSize   image.Point
			
 
				+		Expected    image.Point
			
 
				+	}
			
 
				+
			
 
				+	cases := []resizeCase{
			
 
				+		{
			
 
				+			Image:       image.NewRGBA(image.Rect(0, 0, 1024, 768)),
			
 
				+			LongestEdge: 1024,
			
 
				+			PatchSize:   image.Point{16, 16},
			
 
				+			Expected:    image.Point{1024, 768},
			
 
				+		},
			
 
				+		{
			
 
				+			Image:       image.NewRGBA(image.Rect(0, 0, 1162, 690)),
			
 
				+			LongestEdge: 1024,
			
 
				+			PatchSize:   image.Point{16, 16},
			
 
				+			Expected:    image.Point{1024, 624},
			
 
				+		},
			
 
				+		{
			
 
				+			Image:       image.NewRGBA(image.Rect(0, 0, 300, 200)),
			
 
				+			LongestEdge: 1024,
			
 
				+			PatchSize:   image.Point{16, 16},
			
 
				+			Expected:    image.Point{304, 208},
			
 
				+		},
			
 
				+		{
			
 
				+			Image:       image.NewRGBA(image.Rect(0, 0, 1862, 522)),
			
 
				+			LongestEdge: 1024,
			
 
				+			PatchSize:   image.Point{16, 16},
			
 
				+			Expected:    image.Point{1024, 288},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		actual := getResizeOutputImageSize(c.Image, c.LongestEdge, c.PatchSize)
			
 
				+
			
 
				+		if diff := cmp.Diff(actual, c.Expected); diff != "" {
			
 
				+			t.Errorf("mismatch (-got +want):\n%s", diff)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestResize(t *testing.T) {
			
 
				+	type resizeCase struct {
			
 
				+		Image       image.Image
			
 
				+		LongestEdge int
			
 
				+		PatchSize   image.Point
			
 
				+		Expected    image.Image
			
 
				+	}
			
 
				+
			
 
				+	cases := []resizeCase{
			
 
				+		{
			
 
				+			Image:       image.NewRGBA(image.Rect(0, 0, 1862, 522)),
			
 
				+			LongestEdge: 1024,
			
 
				+			PatchSize:   image.Point{16, 16},
			
 
				+			Expected:    image.NewRGBA(image.Rect(0, 0, 1024, 288)),
			
 
				+		},
			
 
				+		{
			
 
				+			Image:       image.NewRGBA(image.Rect(0, 0, 10, 10)),
			
 
				+			LongestEdge: 1024,
			
 
				+			PatchSize:   image.Point{16, 16},
			
 
				+			Expected:    image.NewRGBA(image.Rect(0, 0, 16, 16)),
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		actual := resizeImage(c.Image, "png", c.LongestEdge, c.PatchSize)
			
 
				+
			
 
				+		if actual.Bounds() != c.Expected.Bounds() {
			
 
				+			t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestPreprocess(t *testing.T) {
			
 
				+	type preprocessCase struct {
			
 
				+		TestImage   image.Image
			
 
				+		ExpectedLen int
			
 
				+	}
			
 
				+
			
 
				+	cases := []preprocessCase{
			
 
				+		{
			
 
				+			TestImage:   image.NewRGBA(image.Rect(0, 0, 10, 10)),
			
 
				+			ExpectedLen: 16 * 16 * 3 * 1,
			
 
				+		},
			
 
				+		{
			
 
				+			TestImage:   image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
			
 
				+			ExpectedLen: 1024 * 1024 * 3 * 1,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		var buf bytes.Buffer
			
 
				+		err := png.Encode(&buf, c.TestImage)
			
 
				+		if err != nil {
			
 
				+			t.Fatal(err)
			
 
				+		}
			
 
				+
			
 
				+		imgData, _, err := Preprocess(&buf)
			
 
				+		if err != nil {
			
 
				+			t.Fatalf("error processing: %q", err)
			
 
				+		}
			
 
				+
			
 
				+		switch len(imgData) {
			
 
				+		case 0:
			
 
				+			t.Errorf("no image data returned")
			
 
				+		case c.ExpectedLen:
			
 
				+			// ok
			
 
				+		default:
			
 
				+			t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestPreprocessImages(t *testing.T) {
			
 
				+	for _, testFile := range []string{"flight.png", "sportsball.png"} {
			
 
				+		f, err := os.Open(testFile)
			
 
				+		if err != nil {
			
 
				+			t.Skipf("skipping test, no test image found at %s", testFile)
			
 
				+		}
			
 
				+		defer f.Close()
			
 
				+
			
 
				+		imgData, _, err := Preprocess(f)
			
 
				+		if err != nil {
			
 
				+			t.Fatalf("error processing: %q", err)
			
 
				+		}
			
 
				+
			
 
				+		byteData := make([]byte, len(imgData)*4) // float32 is 4 bytes
			
 
				+		for i, f := range imgData {
			
 
				+			binary.LittleEndian.PutUint32(byteData[i*4:], math.Float32bits(f))
			
 
				+		}
			
 
				+
			
 
				+		outputPath := "processed_" + testFile + ".bin"
			
 
				+		err = os.WriteFile(outputPath, byteData, 0o644)
			
 
				+		if err != nil {
			
 
				+			t.Fatalf("error writing processed image: %q", err)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/model/qwen2vl/imageproc.go
+++ b/model/qwen2vl/imageproc.go
@@ -0,0 +1,74 @@
 
				+package qwen2vl
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"image"
			
 
				+	_ "image/jpeg"
			
 
				+	_ "image/png"
			
 
				+	"io"
			
 
				+	"math"
			
 
				+
			
 
				+	"github.com/ollama/ollama/model/imageproc"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	DefaultFactor    = 28
			
 
				+	DefaultMinPixels = 56 * 56
			
 
				+	DefaultMaxPixels = 14 * 14 * 4 * 1280
			
 
				+)
			
 
				+
			
 
				+// smartResize calculates the size of the image to resize to based on the
			
 
				+// factor, minPixels, and maxPixels.
			
 
				+func smartResize(size image.Point, factor, minPixels, maxPixels int) image.Point {
			
 
				+	// 1. Both dimensions of size are divisible by factor
			
 
				+	// 2. The area of the image is between minPixels and maxPixels
			
 
				+	// 3. The aspect ratio of the image is as close to 1:1 as possible
			
 
				+
			
 
				+	if size.Y < factor || size.X < factor {
			
 
				+		panic("image is too small to resize")
			
 
				+	} else if max(size.X, size.Y)/min(size.X, size.Y) > 200 {
			
 
				+		panic("aspect ratio must be less than 200:1")
			
 
				+	}
			
 
				+
			
 
				+	f := float64(factor)
			
 
				+	width := float64(size.X)
			
 
				+	height := float64(size.Y)
			
 
				+
			
 
				+	xBar := math.Round(width/f) * f
			
 
				+	yBar := math.Round(height/f) * f
			
 
				+
			
 
				+	if xBar*yBar > float64(maxPixels) {
			
 
				+		beta := math.Sqrt(height * width / float64(maxPixels))
			
 
				+		xBar = math.Floor(width/beta/f) * f
			
 
				+		yBar = math.Floor(height/beta/f) * f
			
 
				+	} else if xBar*yBar < float64(minPixels) {
			
 
				+		beta := math.Sqrt(float64(minPixels) / (height * width))
			
 
				+		xBar = math.Ceil(width*beta/f) * f
			
 
				+		yBar = math.Ceil(height*beta/f) * f
			
 
				+	}
			
 
				+
			
 
				+	return image.Point{int(xBar), int(yBar)}
			
 
				+}
			
 
				+
			
 
				+func resizeImage(img image.Image, format string, size image.Point) image.Image {
			
 
				+	if format == "png" {
			
 
				+		img = imageproc.Composite(img)
			
 
				+	}
			
 
				+
			
 
				+	return imageproc.Resize(img, size, imageproc.ResizeBilinear)
			
 
				+}
			
 
				+
			
 
				+func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
			
 
				+	img, format, err := image.Decode(imageData)
			
 
				+	if err != nil {
			
 
				+		return nil, nil, fmt.Errorf("failed to decode image: %w", err)
			
 
				+	}
			
 
				+
			
 
				+	size := smartResize(img.Bounds().Max, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
			
 
				+	img = resizeImage(img, format, size)
			
 
				+
			
 
				+	data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
			
 
				+
			
 
				+	opts := map[string]any{}
			
 
				+	return data, opts, nil
			
 
				+}
			
--- a/model/qwen2vl/imageproc_test.go
+++ b/model/qwen2vl/imageproc_test.go
@@ -0,0 +1,78 @@
 
				+package qwen2vl
			
 
				+
			
 
				+import (
			
 
				+	"bytes"
			
 
				+	"image"
			
 
				+	"image/png"
			
 
				+	"testing"
			
 
				+)
			
 
				+
			
 
				+func TestSmartResize(t *testing.T) {
			
 
				+	type smartResizeCase struct {
			
 
				+		TestImage image.Image
			
 
				+		Expected  image.Point
			
 
				+	}
			
 
				+
			
 
				+	cases := []smartResizeCase{
			
 
				+		{
			
 
				+			TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 1024)),
			
 
				+			Expected:  image.Point{980, 980},
			
 
				+		},
			
 
				+		{
			
 
				+			TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
			
 
				+			Expected:  image.Point{1036, 756},
			
 
				+		},
			
 
				+		{
			
 
				+			TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
			
 
				+			Expected:  image.Point{980, 980},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		b := c.TestImage.Bounds().Max
			
 
				+		actual := smartResize(b, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
			
 
				+		if actual != c.Expected {
			
 
				+			t.Errorf("expected: %v, actual: %v", c.Expected, actual)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestPreprocess(t *testing.T) {
			
 
				+	type preprocessCase struct {
			
 
				+		TestImage   image.Image
			
 
				+		ExpectedLen int
			
 
				+	}
			
 
				+
			
 
				+	cases := []preprocessCase{
			
 
				+		{
			
 
				+			TestImage:   image.NewRGBA(image.Rect(0, 0, 256, 256)),
			
 
				+			ExpectedLen: 252 * 252 * 3 * 1,
			
 
				+		},
			
 
				+		{
			
 
				+			TestImage:   image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
			
 
				+			ExpectedLen: 980 * 980 * 3 * 1,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		var buf bytes.Buffer
			
 
				+		err := png.Encode(&buf, c.TestImage)
			
 
				+		if err != nil {
			
 
				+			t.Fatal(err)
			
 
				+		}
			
 
				+
			
 
				+		imgData, _, err := Preprocess(&buf)
			
 
				+		if err != nil {
			
 
				+			t.Fatalf("error processing: %q", err)
			
 
				+		}
			
 
				+
			
 
				+		switch len(imgData) {
			
 
				+		case 0:
			
 
				+			t.Errorf("no image data returned")
			
 
				+		case c.ExpectedLen:
			
 
				+			// ok
			
 
				+		default:
			
 
				+			t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -11,7 +11,7 @@ import (
 
				 
			
 
				 	"github.com/ollama/ollama/api"
			
 
				 	"github.com/ollama/ollama/llm"
			
 
				-	"github.com/ollama/ollama/server/imageproc"
			
 
				+	"github.com/ollama/ollama/model/mllama"
			
 
				 	"github.com/ollama/ollama/template"
			
 
				 )
			
 
				 
			
@@ -92,7 +92,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 
				 			var imgData llm.ImageData
			
 
				 
			
 
				 			if isMllama {
			
 
				-				data, aspectRatioID, err := imageproc.Preprocess(i)
			
 
				+				data, opts, err := mllama.Preprocess(bytes.NewReader(i))
			
 
				 				if err != nil {
			
 
				 					return "", nil, err
			
 
				 				}
			
@@ -103,10 +103,15 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 
				 					return "", nil, err
			
 
				 				}
			
 
				 
			
 
				+				ar, ok := opts["aspectRatioIndex"].(int)
			
 
				+				if !ok {
			
 
				+					return "", nil, fmt.Errorf("missing aspect ratio for image")
			
 
				+				}
			
 
				+
			
 
				 				imgData = llm.ImageData{
			
 
				 					ID:            len(images),
			
 
				 					Data:          buf.Bytes(),
			
 
				-					AspectRatioID: aspectRatioID,
			
 
				+					AspectRatioID: ar,
			
 
				 				}
			
 
				 				imgPrompt = "<|image|>"
			
 
				 			} else {
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -31,10 +31,10 @@ import (
 
				 	"github.com/ollama/ollama/discover"
			
 
				 	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/llm"
			
 
				+	"github.com/ollama/ollama/model/mllama"
			
 
				 	"github.com/ollama/ollama/openai"
			
 
				 	"github.com/ollama/ollama/parser"
			
 
				 	"github.com/ollama/ollama/runners"
			
 
				-	"github.com/ollama/ollama/server/imageproc"
			
 
				 	"github.com/ollama/ollama/template"
			
 
				 	"github.com/ollama/ollama/types/errtypes"
			
 
				 	"github.com/ollama/ollama/types/model"
			
@@ -205,12 +205,18 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 
				 	images := make([]llm.ImageData, len(req.Images))
			
 
				 	for i := range req.Images {
			
 
				 		if isMllama {
			
 
				-			data, aspectRatioID, err := imageproc.Preprocess(req.Images[i])
			
 
				+			data, opts, err := mllama.Preprocess(bytes.NewReader(req.Images[i]))
			
 
				 			if err != nil {
			
 
				 				c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
			
 
				 				return
			
 
				 			}
			
 
				 
			
 
				+			ar, ok := opts["aspectRatioIndex"].(int)
			
 
				+			if !ok {
			
 
				+				c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
			
 
				+				return
			
 
				+			}
			
 
				+
			
 
				 			buf := new(bytes.Buffer)
			
 
				 			err = binary.Write(buf, binary.LittleEndian, data)
			
 
				 			if err != nil {
			
@@ -218,7 +224,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 
				 				return
			
 
				 			}
			
 
				 
			
 
				-			images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: aspectRatioID}
			
 
				+			images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: ar}
			
 
				 		} else {
			
 
				 			images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
			
 
				 		}