images.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. package imageproc
  2. import (
  3. "bytes"
  4. "fmt"
  5. "image"
  6. "image/color"
  7. _ "image/jpeg"
  8. _ "image/png"
  9. "math"
  10. "slices"
  11. "golang.org/x/image/draw"
  12. )
  13. func GetSupportedAspectRatios(maxTiles int) []image.Point {
  14. ratios := []image.Point{}
  15. for w := range maxTiles {
  16. for h := range maxTiles {
  17. if (w+1)*(h+1) <= maxTiles {
  18. ratios = append(ratios, image.Point{w + 1, h + 1})
  19. }
  20. }
  21. }
  22. return ratios
  23. }
  24. func clip(a, a_min, a_max int) int {
  25. if a < a_min {
  26. return a_min
  27. } else if a > a_max {
  28. return a_max
  29. }
  30. return a
  31. }
  32. func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
  33. targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
  34. targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
  35. scaleWidth := float64(targetWidth) / float64(imageSize.X)
  36. scaleHeight := float64(targetHeight) / float64(imageSize.Y)
  37. var w, h int
  38. if scaleWidth < scaleHeight {
  39. w = targetWidth
  40. h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
  41. } else {
  42. w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
  43. h = targetHeight
  44. }
  45. return image.Point{w, h}
  46. }
  47. func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
  48. possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
  49. possibleCanvasSizes := []image.Point{}
  50. for _, pta := range possibleTileArrangements {
  51. possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
  52. }
  53. scales := []float64{}
  54. for _, pcs := range possibleCanvasSizes {
  55. scaleHeight := float64(pcs.Y) / float64(imageSize.Y)
  56. scaleWidth := float64(pcs.X) / float64(imageSize.X)
  57. if scaleWidth > scaleHeight {
  58. scales = append(scales, scaleHeight)
  59. } else {
  60. scales = append(scales, scaleWidth)
  61. }
  62. }
  63. var minUpscale float64
  64. var maxDownscale float64
  65. var upscale bool
  66. for _, s := range scales {
  67. if s > 1.0 {
  68. upscale = true
  69. if minUpscale == 0 {
  70. minUpscale = s
  71. } else {
  72. minUpscale = math.Min(minUpscale, s)
  73. }
  74. } else {
  75. maxDownscale = math.Max(maxDownscale, s)
  76. }
  77. }
  78. selectedScale := maxDownscale
  79. if upscale {
  80. selectedScale = minUpscale
  81. }
  82. var selectedCanvas image.Point
  83. for n, pcs := range possibleCanvasSizes {
  84. if scales[n] == selectedScale {
  85. // choose the smallest possible canvas
  86. if selectedCanvas.X == 0 && selectedCanvas.Y == 0 {
  87. selectedCanvas = pcs
  88. } else if pcs.X*pcs.Y < selectedCanvas.X*selectedCanvas.Y {
  89. selectedCanvas = pcs
  90. }
  91. }
  92. }
  93. return selectedCanvas
  94. }
  95. func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
  96. b := img.Bounds()
  97. width := b.Max.X - b.Min.X
  98. height := b.Max.Y - b.Min.Y
  99. tileHeight := height / numTilesSize.Y
  100. tileWidth := width / numTilesSize.X
  101. images := []image.Image{}
  102. for h := range numTilesSize.Y {
  103. for w := range numTilesSize.X {
  104. rect := image.Rect(tileWidth*w, tileHeight*h, tileWidth*(w+1), tileHeight*(h+1))
  105. images = append(images, img.(interface {
  106. SubImage(image.Rectangle) image.Image
  107. }).SubImage(rect))
  108. }
  109. }
  110. return images
  111. }
  112. // remove the "alpha" channel by drawing over a prefilled image
  113. func compositeImage(img image.Image) image.Image {
  114. dst := image.NewRGBA(img.Bounds())
  115. white := color.RGBA{255, 255, 255, 255}
  116. draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
  117. draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
  118. return dst
  119. }
  120. func ResizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
  121. if format == "png" {
  122. img = compositeImage(img)
  123. }
  124. b := img.Bounds()
  125. tileSize := outputSize.Y
  126. canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
  127. aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
  128. newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
  129. dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
  130. // scaling choices:
  131. // NearestNeighbor fast, blocky output
  132. // ApproxBiLinear fast, medium quality
  133. // BiLinear slow, high quality
  134. // CatmullRom very slow, very high quality
  135. draw.BiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
  136. return dst, aspectRatio
  137. }
  138. func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
  139. paddedSize := image.Point{
  140. X: outputSize.X * aspectRatio.X,
  141. Y: outputSize.Y * aspectRatio.Y,
  142. }
  143. dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
  144. draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
  145. return dst
  146. }
  147. func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
  148. subImages := splitToTiles(img, aspectRatio)
  149. var pixelVals []float32
  150. for _, subImg := range subImages {
  151. bounds := subImg.Bounds()
  152. var rVals, gVals, bVals []float32
  153. for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
  154. for x := bounds.Min.X; x < bounds.Max.X; x++ {
  155. c := subImg.At(x, y)
  156. r, g, b, _ := c.RGBA()
  157. rVal := float32(r>>8) / 255.0
  158. gVal := float32(g>>8) / 255.0
  159. bVal := float32(b>>8) / 255.0
  160. rVal = (rVal - mean[0]) / std[0]
  161. gVal = (gVal - mean[1]) / std[1]
  162. bVal = (bVal - mean[2]) / std[2]
  163. rVals = append(rVals, rVal)
  164. gVals = append(gVals, gVal)
  165. bVals = append(bVals, bVal)
  166. }
  167. }
  168. pixelVals = append(pixelVals, rVals...)
  169. pixelVals = append(pixelVals, gVals...)
  170. pixelVals = append(pixelVals, bVals...)
  171. }
  172. return pixelVals
  173. }
  174. func Preprocess(imageData []byte) ([]float32, int, error) {
  175. // todo: need guard in here for bad image data
  176. // mllama values
  177. outputSize := image.Point{560, 560}
  178. maxTiles := 4
  179. // clip values
  180. mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
  181. std := [3]float32{0.26862954, 0.26130258, 0.27577711}
  182. img, format, err := image.Decode(bytes.NewReader(imageData))
  183. if err != nil {
  184. return nil, 0, fmt.Errorf("failed to decode image: %w", err)
  185. }
  186. newImage, aspectRatio := ResizeImage(img, format, outputSize, maxTiles)
  187. newImage = PadImage(newImage, outputSize, aspectRatio)
  188. data := PackImages(newImage, aspectRatio, mean, std)
  189. aspectRatioIndex := slices.Index(GetSupportedAspectRatios(maxTiles), aspectRatio) + 1
  190. return data, aspectRatioIndex, nil
  191. }