7 months ago · 5486c57364
--- a/server/imageproc/images.go
+++ b/server/imageproc/images.go
@@ -42,7 +42,7 @@ func min(a, b int) int {
 
				 	return b
			
 
				 }
			
 
				 
			
 
				-func GetImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
			
 
				+func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
			
 
				 	targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
			
 
				 	targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
			
 
				 
			
@@ -62,7 +62,7 @@ func GetImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) im
 
				 	return image.Point{w, h}
			
 
				 }
			
 
				 
			
 
				-func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
			
 
				+func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
			
 
				 	possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
			
 
				 	possibleCanvasSizes := []image.Point{}
			
 
				 	for _, pta := range possibleTileArrangements {
			
@@ -104,11 +104,13 @@ func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
 
				 		selectedScale = minUpscale
			
 
				 	}
			
 
				 
			
 
				-	selectedCanvas := possibleCanvasSizes[0]
			
 
				+	var selectedCanvas image.Point
			
 
				 	for n, pcs := range possibleCanvasSizes {
			
 
				 		if scales[n] == selectedScale {
			
 
				-			// choose the largest possible canvas
			
 
				-			if pcs.X*pcs.Y > selectedCanvas.X*selectedCanvas.Y {
			
 
				+			// choose the smallest possible canvas
			
 
				+			if selectedCanvas.X == 0 && selectedCanvas.Y == 0 {
			
 
				+				selectedCanvas = pcs
			
 
				+			} else if pcs.X*pcs.Y < selectedCanvas.X*selectedCanvas.Y {
			
 
				 				selectedCanvas = pcs
			
 
				 			}
			
 
				 		}
			
@@ -116,7 +118,7 @@ func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
 
				 	return selectedCanvas
			
 
				 }
			
 
				 
			
 
				-func SplitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
			
 
				+func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
			
 
				 	b := img.Bounds()
			
 
				 	width := b.Max.X - b.Min.X
			
 
				 	height := b.Max.Y - b.Min.Y
			
@@ -141,10 +143,9 @@ func ResizeImage(img image.Image, outputSize image.Point, maxImageTiles int) (im
 
				 	b := img.Bounds()
			
 
				 	tileSize := outputSize.Y
			
 
				 
			
 
				-	canvasSize := GetOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
			
 
				+	canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
			
 
				 	aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
			
 
				-
			
 
				-	newSize := GetImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
			
 
				+	newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
			
 
				 
			
 
				 	dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
			
 
				 	draw.ApproxBiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
			
@@ -165,7 +166,7 @@ func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image
 
				 }
			
 
				 
			
 
				 func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
			
 
				-	subImages := SplitToTiles(img, aspectRatio)
			
 
				+	subImages := splitToTiles(img, aspectRatio)
			
 
				 
			
 
				 	var pixelVals []float32
			
 
				 
			
@@ -218,8 +219,6 @@ func Preprocess(imageData []byte) ([]float32, int, error) {
 
				 	newImage, aspectRatio := ResizeImage(img, outputSize, maxTiles)
			
 
				 	newImage = PadImage(newImage, outputSize, aspectRatio)
			
 
				 
			
 
				-	// todo: need to scale (dim) by 1/256
			
 
				-
			
 
				 	data := PackImages(newImage, aspectRatio, mean, std)
			
 
				 	supportedRatios := GetSupportedAspectRatios(maxTiles)
			
 
				 	var aspectRatioIndex int
			
--- a/server/imageproc/images_test.go
+++ b/server/imageproc/images_test.go
@@ -1,7 +1,9 @@
 
				 package imageproc
			
 
				 
			
 
				 import (
			
 
				+	"bytes"
			
 
				 	"image"
			
 
				+	"image/png"
			
 
				 	"reflect"
			
 
				 	"testing"
			
 
				 )
			
@@ -27,12 +29,12 @@ func testEq(a, b any) bool {
 
				 }
			
 
				 
			
 
				 func TestAspectRatios(t *testing.T) {
			
 
				-	type AspectCase struct {
			
 
				+	type aspectCase struct {
			
 
				 		MaxTiles int
			
 
				 		Expected []image.Point
			
 
				 	}
			
 
				 
			
 
				-	cases := []AspectCase{
			
 
				+	cases := []aspectCase{
			
 
				 		{
			
 
				 			MaxTiles: 1,
			
 
				 			Expected: []image.Point{{1, 1}},
			
@@ -61,14 +63,14 @@ func TestAspectRatios(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestGetImageSizeFitToCanvas(t *testing.T) {
			
 
				-	type ImageSizeCase struct {
			
 
				+	type imageSizeCase struct {
			
 
				 		ImageRect  image.Point
			
 
				 		CanvasRect image.Point
			
 
				 		TileSize   int
			
 
				 		Expected   image.Point
			
 
				 	}
			
 
				 
			
 
				-	cases := []ImageSizeCase{
			
 
				+	cases := []imageSizeCase{
			
 
				 		{
			
 
				 			ImageRect:  image.Point{400, 400},
			
 
				 			CanvasRect: image.Point{640, 480},
			
@@ -108,7 +110,7 @@ func TestGetImageSizeFitToCanvas(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actual := GetImageSizeFitToCanvas(c.ImageRect, c.CanvasRect, c.TileSize)
			
 
				+		actual := getImageSizeFitToCanvas(c.ImageRect, c.CanvasRect, c.TileSize)
			
 
				 
			
 
				 		if actual != c.Expected {
			
 
				 			t.Errorf("incorrect image rect: '%#v'. expected: '%#v'", actual, c.Expected)
			
@@ -117,19 +119,19 @@ func TestGetImageSizeFitToCanvas(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestGetOptimalTiledCanvas(t *testing.T) {
			
 
				-	type TiledCanvasSizeCase struct {
			
 
				+	type tiledCanvasSizeCase struct {
			
 
				 		ImageSize     image.Point
			
 
				 		MaxImageTiles int
			
 
				 		TileSize      int
			
 
				 		Expected      image.Point
			
 
				 	}
			
 
				 
			
 
				-	cases := []TiledCanvasSizeCase{
			
 
				+	cases := []tiledCanvasSizeCase{
			
 
				 		{
			
 
				 			ImageSize:     image.Point{1024, 768},
			
 
				 			MaxImageTiles: 4,
			
 
				 			TileSize:      1000,
			
 
				-			Expected:      image.Point{4000, 1000},
			
 
				+			Expected:      image.Point{2000, 1000},
			
 
				 		},
			
 
				 		{
			
 
				 			ImageSize:     image.Point{1024, 768},
			
@@ -140,7 +142,7 @@ func TestGetOptimalTiledCanvas(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actual := GetOptimalTiledCanvas(c.ImageSize, c.MaxImageTiles, c.TileSize)
			
 
				+		actual := getOptimalTiledCanvas(c.ImageSize, c.MaxImageTiles, c.TileSize)
			
 
				 
			
 
				 		if actual != c.Expected {
			
 
				 			t.Errorf("incorrect tiled canvas: '%#v'. expected: '%#v'", actual, c.Expected)
			
@@ -149,13 +151,13 @@ func TestGetOptimalTiledCanvas(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestSplitToTiles(t *testing.T) {
			
 
				-	type SplitCase struct {
			
 
				+	type splitCase struct {
			
 
				 		TestImage    image.Image
			
 
				 		NumTilesSize image.Point
			
 
				 		Expected     []image.Image
			
 
				 	}
			
 
				 
			
 
				-	cases := []SplitCase{
			
 
				+	cases := []splitCase{
			
 
				 		{
			
 
				 			TestImage:    image.NewRGBA(image.Rect(0, 0, 1024, 768)),
			
 
				 			NumTilesSize: image.Point{1, 1},
			
@@ -182,7 +184,7 @@ func TestSplitToTiles(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		actual := SplitToTiles(c.TestImage, c.NumTilesSize)
			
 
				+		actual := splitToTiles(c.TestImage, c.NumTilesSize)
			
 
				 
			
 
				 		if len(actual) != len(c.Expected) {
			
 
				 			t.Errorf("incorrect number of images '%d': expected: '%d'", len(actual), len(c.Expected))
			
@@ -197,7 +199,7 @@ func TestSplitToTiles(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestResize(t *testing.T) {
			
 
				-	type ResizeCase struct {
			
 
				+	type resizeCase struct {
			
 
				 		TestImage           image.Image
			
 
				 		OutputSize          image.Point
			
 
				 		MaxImageTiles       int
			
@@ -205,7 +207,7 @@ func TestResize(t *testing.T) {
 
				 		ExpectedAspectRatio image.Point
			
 
				 	}
			
 
				 
			
 
				-	cases := []ResizeCase{
			
 
				+	cases := []resizeCase{
			
 
				 		{
			
 
				 			TestImage:           image.NewRGBA(image.Rect(0, 0, 200, 200)),
			
 
				 			OutputSize:          image.Point{100, 100},
			
@@ -218,7 +220,14 @@ func TestResize(t *testing.T) {
 
				 			OutputSize:          image.Point{100, 100},
			
 
				 			MaxImageTiles:       2,
			
 
				 			ExpectedImage:       image.NewRGBA(image.Rect(0, 0, 100, 100)),
			
 
				-			ExpectedAspectRatio: image.Point{1, 2},
			
 
				+			ExpectedAspectRatio: image.Point{1, 1},
			
 
				+		},
			
 
				+		{
			
 
				+			TestImage:           image.NewRGBA(image.Rect(0, 0, 10, 10)),
			
 
				+			OutputSize:          image.Point{560, 560},
			
 
				+			MaxImageTiles:       4,
			
 
				+			ExpectedImage:       image.NewRGBA(image.Rect(0, 0, 560, 560)),
			
 
				+			ExpectedAspectRatio: image.Point{1, 1},
			
 
				 		},
			
 
				 		{
			
 
				 			TestImage:           image.NewRGBA(image.Rect(0, 0, 2560, 1920)),
			
@@ -244,20 +253,20 @@ func TestResize(t *testing.T) {
 
				 		}
			
 
				 
			
 
				 		if actualAspectRatio != c.ExpectedAspectRatio {
			
 
				-			t.Errorf("canvas size incorrect: '%#v': expected: '%#v'", actualAspectRatio, c.ExpectedAspectRatio)
			
 
				+			t.Errorf("aspect ratio incorrect: '%#v': expected: '%#v'", actualAspectRatio, c.ExpectedAspectRatio)
			
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 func TestPad(t *testing.T) {
			
 
				-	type PadCase struct {
			
 
				+	type padCase struct {
			
 
				 		TestImage   image.Image
			
 
				 		OutputSize  image.Point
			
 
				 		AspectRatio image.Point
			
 
				 		Expected    image.Image
			
 
				 	}
			
 
				 
			
 
				-	cases := []PadCase{
			
 
				+	cases := []padCase{
			
 
				 		{
			
 
				 			TestImage:   image.NewRGBA(image.Rect(0, 0, 1000, 667)),
			
 
				 			OutputSize:  image.Point{560, 560},
			
@@ -276,30 +285,79 @@ func TestPad(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestPackImages(t *testing.T) {
			
 
				-	type PackCase struct {
			
 
				-		TestImage   image.Image
			
 
				-		AspectRatio image.Point
			
 
				+	type packCase struct {
			
 
				+		TestImage    image.Image
			
 
				+		AspectRatio  image.Point
			
 
				+		ExpectedVals int
			
 
				 	}
			
 
				 
			
 
				 	mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
			
 
				 	std := [3]float32{0.26862954, 0.26130258, 0.27577711}
			
 
				 
			
 
				-	cases := []PackCase{
			
 
				+	cases := []packCase{
			
 
				 		{
			
 
				-			TestImage:   image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
			
 
				-			AspectRatio: image.Point{2, 2},
			
 
				+			TestImage:    image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
			
 
				+			AspectRatio:  image.Point{2, 2},
			
 
				+			ExpectedVals: 2 * 2 * 3 * 560 * 560,
			
 
				+		},
			
 
				+		{
			
 
				+			TestImage:    image.NewRGBA(image.Rect(0, 0, 560, 560)),
			
 
				+			AspectRatio:  image.Point{1, 1},
			
 
				+			ExpectedVals: 1 * 1 * 3 * 560 * 560,
			
 
				 		},
			
 
				 		{
			
 
				-			TestImage:   image.NewRGBA(image.Rect(0, 0, 560, 560)),
			
 
				-			AspectRatio: image.Point{1, 1},
			
 
				+			TestImage:    image.NewRGBA(image.Rect(0, 0, 1120, 560)),
			
 
				+			AspectRatio:  image.Point{1, 2},
			
 
				+			ExpectedVals: 1 * 2 * 3 * 560 * 560,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, c := range cases {
			
 
				+		actualVals := PackImages(c.TestImage, c.AspectRatio, mean, std)
			
 
				+		if len(actualVals) != c.ExpectedVals {
			
 
				+			t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestPreprocess(t *testing.T) {
			
 
				+	type preprocessCase struct {
			
 
				+		TestImage             image.Image
			
 
				+		ExpectedVals          int
			
 
				+		ExpectedAspectRatioID int
			
 
				+	}
			
 
				+
			
 
				+	cases := []preprocessCase{
			
 
				+		{
			
 
				+			TestImage:             image.NewRGBA(image.Rect(0, 0, 10, 10)),
			
 
				+			ExpectedVals:          0,
			
 
				+			ExpectedAspectRatioID: 1,
			
 
				 		},
			
 
				 		{
			
 
				-			TestImage:   image.NewRGBA(image.Rect(0, 0, 1120, 560)),
			
 
				-			AspectRatio: image.Point{1, 2},
			
 
				+			TestImage:             image.NewRGBA(image.Rect(0, 0, 1024, 768)),
			
 
				+			ExpectedVals:          0,
			
 
				+			ExpectedAspectRatioID: 6,
			
 
				 		},
			
 
				 	}
			
 
				 
			
 
				 	for _, c := range cases {
			
 
				-		PackImages(c.TestImage, c.AspectRatio, mean, std)
			
 
				+		var buf bytes.Buffer
			
 
				+		err := png.Encode(&buf, c.TestImage)
			
 
				+		if err != nil {
			
 
				+			t.Fatal(err)
			
 
				+		}
			
 
				+
			
 
				+		imgData, aspectRatioID, err := Preprocess(buf.Bytes())
			
 
				+		if err != nil {
			
 
				+			t.Fatalf("error processing: %q", err)
			
 
				+		}
			
 
				+
			
 
				+		if len(imgData) == 0 {
			
 
				+			t.Errorf("no image data returned")
			
 
				+		}
			
 
				+
			
 
				+		if aspectRatioID != c.ExpectedAspectRatioID {
			
 
				+			t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
			
 
				+		}
			
 
				 	}
			
 
				 }
			
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
@@ -3,6 +3,8 @@ package server
 
				 import (
			
 
				 	"bytes"
			
 
				 	"context"
			
 
				+	"image"
			
 
				+	"image/png"
			
 
				 	"testing"
			
 
				 
			
 
				 	"github.com/google/go-cmp/cmp"
			
@@ -13,18 +15,41 @@ import (
 
				 
			
 
				 func TestChatPrompt(t *testing.T) {
			
 
				 	type expect struct {
			
 
				-		prompt string
			
 
				-		images [][]byte
			
 
				+		prompt        string
			
 
				+		images        [][]byte
			
 
				+		aspectRatioID int
			
 
				 	}
			
 
				 
			
 
				+	tmpl, err := template.Parse(`
			
 
				+{{- if .System }}{{ .System }} {{ end }}
			
 
				+{{- if .Prompt }}{{ .Prompt }} {{ end }}
			
 
				+{{- if .Response }}{{ .Response }} {{ end }}`)
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+	visionModel := Model{Template: tmpl, ProjectorPaths: []string{"vision"}}
			
 
				+	mllamaModel := Model{Template: tmpl, ProjectorPaths: []string{"vision"}, Config: ConfigV2{ModelFamilies: []string{"mllama"}}}
			
 
				+
			
 
				+	img := image.NewRGBA(image.Rect(0, 0, 5, 5))
			
 
				+	var buf bytes.Buffer
			
 
				+
			
 
				+	err = png.Encode(&buf, img)
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				+	imgBuf := buf.Bytes()
			
 
				+
			
 
				 	cases := []struct {
			
 
				 		name  string
			
 
				+		model Model
			
 
				 		limit int
			
 
				 		msgs  []api.Message
			
 
				 		expect
			
 
				 	}{
			
 
				 		{
			
 
				 			name:  "messages",
			
 
				+			model: visionModel,
			
 
				 			limit: 64,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!"},
			
@@ -37,6 +62,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "truncate messages",
			
 
				+			model: visionModel,
			
 
				 			limit: 1,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!"},
			
@@ -49,6 +75,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "truncate messages with image",
			
 
				+			model: visionModel,
			
 
				 			limit: 64,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!"},
			
@@ -64,6 +91,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "truncate messages with images",
			
 
				+			model: visionModel,
			
 
				 			limit: 64,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
			
@@ -79,6 +107,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "messages with images",
			
 
				+			model: visionModel,
			
 
				 			limit: 2048,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
			
@@ -95,6 +124,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "message with image tag",
			
 
				+			model: visionModel,
			
 
				 			limit: 2048,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}},
			
@@ -111,6 +141,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "messages with interleaved images",
			
 
				+			model: visionModel,
			
 
				 			limit: 2048,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!"},
			
@@ -129,6 +160,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "truncate message with interleaved images",
			
 
				+			model: visionModel,
			
 
				 			limit: 1024,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!"},
			
@@ -146,6 +178,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "message with system prompt",
			
 
				+			model: visionModel,
			
 
				 			limit: 2048,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "system", Content: "You are the Test Who Lived."},
			
@@ -159,6 +192,7 @@ func TestChatPrompt(t *testing.T) {
 
				 		},
			
 
				 		{
			
 
				 			name:  "out of order system",
			
 
				+			model: visionModel,
			
 
				 			limit: 2048,
			
 
				 			msgs: []api.Message{
			
 
				 				{Role: "user", Content: "You're a test, Harry!"},
			
@@ -170,19 +204,39 @@ func TestChatPrompt(t *testing.T) {
 
				 				prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ",
			
 
				 			},
			
 
				 		},
			
 
				-	}
			
 
				-
			
 
				-	tmpl, err := template.Parse(`
			
 
				-{{- if .System }}{{ .System }} {{ end }}
			
 
				-{{- if .Prompt }}{{ .Prompt }} {{ end }}
			
 
				-{{- if .Response }}{{ .Response }} {{ end }}`)
			
 
				-	if err != nil {
			
 
				-		t.Fatal(err)
			
 
				+		{
			
 
				+			name:  "messages with mllama (no images)",
			
 
				+			model: mllamaModel,
			
 
				+			limit: 2048,
			
 
				+			msgs: []api.Message{
			
 
				+				{Role: "user", Content: "You're a test, Harry!"},
			
 
				+				{Role: "assistant", Content: "I-I'm a what?"},
			
 
				+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
			
 
				+			},
			
 
				+			expect: expect{
			
 
				+				prompt: "You're a test, Harry! I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
			
 
				+			},
			
 
				+		},
			
 
				+		{
			
 
				+			name:  "messages with mllama",
			
 
				+			model: mllamaModel,
			
 
				+			limit: 2048,
			
 
				+			msgs: []api.Message{
			
 
				+				{Role: "user", Content: "You're a test, Harry!"},
			
 
				+				{Role: "assistant", Content: "I-I'm a what?"},
			
 
				+				{Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{imgBuf}},
			
 
				+			},
			
 
				+			expect: expect{
			
 
				+				prompt:        "You're a test, Harry! I-I'm a what? <|image|>A test. And a thumping good one at that, I'd wager. ",
			
 
				+				images:        [][]byte{imgBuf},
			
 
				+				aspectRatioID: 1,
			
 
				+			},
			
 
				+		},
			
 
				 	}
			
 
				 
			
 
				 	for _, tt := range cases {
			
 
				 		t.Run(tt.name, func(t *testing.T) {
			
 
				-			model := Model{Template: tmpl, ProjectorPaths: []string{"vision"}}
			
 
				+			model := tt.model
			
 
				 			opts := api.Options{Runner: api.Runner{NumCtx: tt.limit}}
			
 
				 			prompt, images, err := chatPrompt(context.TODO(), &model, mockRunner{}.Tokenize, &opts, tt.msgs, nil)
			
 
				 			if err != nil {
			
@@ -202,8 +256,14 @@ func TestChatPrompt(t *testing.T) {
 
				 					t.Errorf("expected ID %d, got %d", i, images[i].ID)
			
 
				 				}
			
 
				 
			
 
				-				if !bytes.Equal(images[i].Data, tt.images[i]) {
			
 
				-					t.Errorf("expected %q, got %q", tt.images[i], images[i].Data)
			
 
				+				if len(model.Config.ModelFamilies) == 0 {
			
 
				+					if !bytes.Equal(images[i].Data, tt.images[i]) {
			
 
				+						t.Errorf("expected %q, got %q", tt.images[i], images[i].Data)
			
 
				+					}
			
 
				+				} else {
			
 
				+					if images[i].AspectRatioID != tt.aspectRatioID {
			
 
				+						t.Errorf("expected aspect ratio %d, got %d", tt.aspectRatioID, images[i].AspectRatioID)
			
 
				+					}
			
 
				 				}
			
 
				 			}
			
 
				 		})
			
--- a/template/template_test.go
+++ b/template/template_test.go
@@ -317,45 +317,6 @@ What is your name?<|im_end|>
 
				 <|im_start|>assistant
			
 
				 `,
			
 
				 		},
			
 
				-		{
			
 
				-			"moondream",
			
 
				-			[]template{
			
 
				-				// this does not have a "no response" test because it's impossible to render the same output
			
 
				-				{"response", `{{ if .Prompt }}Question: {{ .Prompt }}
			
 
				-
			
 
				-{{ end }}Answer: {{ .Response }}
			
 
				-
			
 
				-`},
			
 
				-				{"messages", `
			
 
				-{{- range .Messages }}
			
 
				-{{- if eq .Role "user" }}Question: {{ .Content }}
			
 
				-
			
 
				-{{ else if eq .Role "assistant" }}Answer: {{ .Content }}
			
 
				-
			
 
				-{{ end }}
			
 
				-{{- end }}Answer: `},
			
 
				-			},
			
 
				-			Values{
			
 
				-				Messages: []api.Message{
			
 
				-					{Role: "user", Content: "What's in this image?", Images: []api.ImageData{[]byte("")}},
			
 
				-					{Role: "assistant", Content: "It's a hot dog."},
			
 
				-					{Role: "user", Content: "What's in _this_ image?"},
			
 
				-					{Role: "user", Images: []api.ImageData{[]byte("")}},
			
 
				-					{Role: "user", Content: "Is it a hot dog?"},
			
 
				-				},
			
 
				-			},
			
 
				-			`Question: [img-0] What's in this image?
			
 
				-
			
 
				-Answer: It's a hot dog.
			
 
				-
			
 
				-Question: What's in _this_ image?
			
 
				-
			
 
				-[img-1]
			
 
				-
			
 
				-Is it a hot dog?
			
 
				-
			
 
				-Answer: `,
			
 
				-		},
			
 
				 	}
			
 
				 
			
 
				 	for _, tt := range cases {