1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- package qwen2vl
- import (
- "fmt"
- "image"
- _ "image/jpeg"
- _ "image/png"
- "io"
- "math"
- "github.com/ollama/ollama/model/imageproc"
- )
- const (
- DefaultFactor = 28
- DefaultMinPixels = 56 * 56
- DefaultMaxPixels = 14 * 14 * 4 * 1280
- )
- // smartResize calculates the size of the image to resize to based on the
- // factor, minPixels, and maxPixels.
- func smartResize(size image.Point, factor, minPixels, maxPixels int) image.Point {
- // 1. Both dimensions of size are divisible by factor
- // 2. The area of the image is between minPixels and maxPixels
- // 3. The aspect ratio of the image is as close to 1:1 as possible
- if size.Y < factor || size.X < factor {
- panic("image is too small to resize")
- } else if max(size.X, size.Y)/min(size.X, size.Y) > 200 {
- panic("aspect ratio must be less than 200:1")
- }
- f := float64(factor)
- width := float64(size.X)
- height := float64(size.Y)
- xBar := math.Round(width/f) * f
- yBar := math.Round(height/f) * f
- if xBar*yBar > float64(maxPixels) {
- beta := math.Sqrt(height * width / float64(maxPixels))
- xBar = math.Floor(width/beta/f) * f
- yBar = math.Floor(height/beta/f) * f
- } else if xBar*yBar < float64(minPixels) {
- beta := math.Sqrt(float64(minPixels) / (height * width))
- xBar = math.Ceil(width*beta/f) * f
- yBar = math.Ceil(height*beta/f) * f
- }
- return image.Point{int(xBar), int(yBar)}
- }
- func resizeImage(img image.Image, format string, size image.Point) image.Image {
- if format == "png" {
- img = imageproc.Composite(img)
- }
- return imageproc.Resize(img, size, imageproc.ResizeBilinear)
- }
- func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
- img, format, err := image.Decode(imageData)
- if err != nil {
- return nil, nil, fmt.Errorf("failed to decode image: %w", err)
- }
- size := smartResize(img.Bounds().Max, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
- img = resizeImage(img, format, size)
- data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
- opts := map[string]any{}
- return data, opts, nil
- }
|