images.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. package imageproc
  2. import (
  3. "bytes"
  4. "fmt"
  5. "image"
  6. _ "image/jpeg"
  7. _ "image/png"
  8. "math"
  9. "golang.org/x/image/draw"
  10. )
  11. func GetSupportedAspectRatios(maxTiles int) []image.Point {
  12. ratios := []image.Point{}
  13. for w := range maxTiles {
  14. for h := range maxTiles {
  15. if (w+1)*(h+1) <= maxTiles {
  16. ratios = append(ratios, image.Point{w + 1, h + 1})
  17. }
  18. }
  19. }
  20. return ratios
  21. }
  22. func clip(a, a_min, a_max int) int {
  23. if a < a_min {
  24. return a_min
  25. } else if a > a_max {
  26. return a_max
  27. }
  28. return a
  29. }
  30. func min(a, b int) int {
  31. if a < b {
  32. return a
  33. }
  34. return b
  35. }
  36. func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
  37. targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
  38. targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
  39. scaleWidth := float64(targetWidth) / float64(imageSize.X)
  40. scaleHeight := float64(targetHeight) / float64(imageSize.Y)
  41. var w, h int
  42. if scaleWidth < scaleHeight {
  43. w = targetWidth
  44. h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
  45. } else {
  46. w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
  47. h = targetHeight
  48. }
  49. return image.Point{w, h}
  50. }
  51. func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
  52. possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
  53. possibleCanvasSizes := []image.Point{}
  54. for _, pta := range possibleTileArrangements {
  55. possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
  56. }
  57. scales := []float64{}
  58. for _, pcs := range possibleCanvasSizes {
  59. scaleHeight := float64(pcs.Y) / float64(imageSize.Y)
  60. scaleWidth := float64(pcs.X) / float64(imageSize.X)
  61. if scaleWidth > scaleHeight {
  62. scales = append(scales, scaleHeight)
  63. } else {
  64. scales = append(scales, scaleWidth)
  65. }
  66. }
  67. var minUpscale float64
  68. var maxDownscale float64
  69. var upscale bool
  70. for _, s := range scales {
  71. if s > 1.0 {
  72. upscale = true
  73. if minUpscale == 0 {
  74. minUpscale = s
  75. } else {
  76. minUpscale = math.Min(minUpscale, s)
  77. }
  78. } else {
  79. maxDownscale = math.Max(maxDownscale, s)
  80. }
  81. }
  82. selectedScale := maxDownscale
  83. if upscale {
  84. selectedScale = minUpscale
  85. }
  86. var selectedCanvas image.Point
  87. for n, pcs := range possibleCanvasSizes {
  88. if scales[n] == selectedScale {
  89. // choose the smallest possible canvas
  90. if selectedCanvas.X == 0 && selectedCanvas.Y == 0 {
  91. selectedCanvas = pcs
  92. } else if pcs.X*pcs.Y < selectedCanvas.X*selectedCanvas.Y {
  93. selectedCanvas = pcs
  94. }
  95. }
  96. }
  97. return selectedCanvas
  98. }
  99. func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
  100. b := img.Bounds()
  101. width := b.Max.X - b.Min.X
  102. height := b.Max.Y - b.Min.Y
  103. tileHeight := height / numTilesSize.Y
  104. tileWidth := width / numTilesSize.X
  105. images := []image.Image{}
  106. for h := range numTilesSize.Y {
  107. for w := range numTilesSize.X {
  108. rect := image.Rect(tileWidth*w, tileHeight*h, tileWidth*(w+1), tileHeight*(h+1))
  109. images = append(images, img.(interface {
  110. SubImage(image.Rectangle) image.Image
  111. }).SubImage(rect))
  112. }
  113. }
  114. return images
  115. }
  116. func ResizeImage(img image.Image, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
  117. b := img.Bounds()
  118. tileSize := outputSize.Y
  119. canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
  120. aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
  121. newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
  122. dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
  123. draw.ApproxBiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
  124. return dst, aspectRatio
  125. }
  126. func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
  127. paddedSize := image.Point{
  128. X: outputSize.X * aspectRatio.X,
  129. Y: outputSize.Y * aspectRatio.Y,
  130. }
  131. dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
  132. draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
  133. return dst
  134. }
  135. func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
  136. subImages := splitToTiles(img, aspectRatio)
  137. var pixelVals []float32
  138. for _, subImg := range subImages {
  139. bounds := subImg.Bounds()
  140. rVals := []float32{}
  141. gVals := []float32{}
  142. bVals := []float32{}
  143. for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
  144. for x := bounds.Min.X; x < bounds.Max.X; x++ {
  145. c := subImg.At(x, y)
  146. r, g, b, _ := c.RGBA()
  147. rVal := float32(r>>8) / 255.0
  148. gVal := float32(g>>8) / 255.0
  149. bVal := float32(b>>8) / 255.0
  150. rVal = (rVal - mean[0]) / std[0]
  151. gVal = (gVal - mean[1]) / std[1]
  152. bVal = (bVal - mean[2]) / std[2]
  153. rVals = append(rVals, rVal)
  154. gVals = append(gVals, gVal)
  155. bVals = append(bVals, bVal)
  156. }
  157. }
  158. pixelVals = append(pixelVals, rVals...)
  159. pixelVals = append(pixelVals, gVals...)
  160. pixelVals = append(pixelVals, bVals...)
  161. }
  162. return pixelVals
  163. }
  164. func Preprocess(imageData []byte) ([]float32, int, error) {
  165. // todo: need guard in here for bad image data
  166. // mllama values
  167. outputSize := image.Point{560, 560}
  168. maxTiles := 4
  169. // clip values
  170. mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
  171. std := [3]float32{0.26862954, 0.26130258, 0.27577711}
  172. img, _, err := image.Decode(bytes.NewReader(imageData))
  173. if err != nil {
  174. return nil, 0, fmt.Errorf("failed to decode image: %w", err)
  175. }
  176. newImage, aspectRatio := ResizeImage(img, outputSize, maxTiles)
  177. newImage = PadImage(newImage, outputSize, aspectRatio)
  178. data := PackImages(newImage, aspectRatio, mean, std)
  179. supportedRatios := GetSupportedAspectRatios(maxTiles)
  180. var aspectRatioIndex int
  181. for n, r := range supportedRatios {
  182. if r == aspectRatio {
  183. aspectRatioIndex = n + 1
  184. break
  185. }
  186. }
  187. return data, aspectRatioIndex, nil
  188. }