images.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. package imageproc
  2. import (
  3. "bytes"
  4. "fmt"
  5. "image"
  6. _ "image/jpeg"
  7. _ "image/png"
  8. "math"
  9. "golang.org/x/image/draw"
  10. )
  11. func GetSupportedAspectRatios(maxTiles int) []image.Point {
  12. ratios := []image.Point{}
  13. for w := range maxTiles {
  14. for h := range maxTiles {
  15. if (w+1)*(h+1) <= maxTiles {
  16. ratios = append(ratios, image.Point{w + 1, h + 1})
  17. }
  18. }
  19. }
  20. return ratios
  21. }
  22. func clip(a, a_min, a_max int) int {
  23. if a < a_min {
  24. return a_min
  25. } else if a > a_max {
  26. return a_max
  27. }
  28. return a
  29. }
  30. func min(a, b int) int {
  31. if a < b {
  32. return a
  33. }
  34. return b
  35. }
  36. func GetImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
  37. targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
  38. targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
  39. scaleWidth := float64(targetWidth) / float64(imageSize.X)
  40. scaleHeight := float64(targetHeight) / float64(imageSize.Y)
  41. var w, h int
  42. if scaleWidth < scaleHeight {
  43. w = targetWidth
  44. h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
  45. } else {
  46. w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
  47. h = targetHeight
  48. }
  49. return image.Point{w, h}
  50. }
  51. func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
  52. possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
  53. possibleCanvasSizes := []image.Point{}
  54. for _, pta := range possibleTileArrangements {
  55. possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
  56. }
  57. scales := []float64{}
  58. for _, pcs := range possibleCanvasSizes {
  59. scaleHeight := float64(pcs.Y) / float64(imageSize.Y)
  60. scaleWidth := float64(pcs.X) / float64(imageSize.X)
  61. if scaleWidth > scaleHeight {
  62. scales = append(scales, scaleHeight)
  63. } else {
  64. scales = append(scales, scaleWidth)
  65. }
  66. }
  67. var minUpscale float64
  68. var maxDownscale float64
  69. var upscale bool
  70. for _, s := range scales {
  71. if s > 1.0 {
  72. upscale = true
  73. if minUpscale == 0 {
  74. minUpscale = s
  75. } else {
  76. minUpscale = math.Min(minUpscale, s)
  77. }
  78. } else {
  79. maxDownscale = math.Max(maxDownscale, s)
  80. }
  81. }
  82. selectedScale := maxDownscale
  83. if upscale {
  84. selectedScale = minUpscale
  85. }
  86. selectedCanvas := possibleCanvasSizes[0]
  87. for n, pcs := range possibleCanvasSizes {
  88. if scales[n] == selectedScale {
  89. // choose the largest possible canvas
  90. if pcs.X*pcs.Y > selectedCanvas.X*selectedCanvas.Y {
  91. selectedCanvas = pcs
  92. }
  93. }
  94. }
  95. return selectedCanvas
  96. }
  97. func SplitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
  98. b := img.Bounds()
  99. width := b.Max.X - b.Min.X
  100. height := b.Max.Y - b.Min.Y
  101. tileHeight := height / numTilesSize.Y
  102. tileWidth := width / numTilesSize.X
  103. images := []image.Image{}
  104. for h := range numTilesSize.Y {
  105. for w := range numTilesSize.X {
  106. rect := image.Rect(tileWidth*w, tileHeight*h, tileWidth*(w+1), tileHeight*(h+1))
  107. images = append(images, img.(interface {
  108. SubImage(image.Rectangle) image.Image
  109. }).SubImage(rect))
  110. }
  111. }
  112. return images
  113. }
  114. func ResizeImage(img image.Image, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
  115. b := img.Bounds()
  116. tileSize := outputSize.Y
  117. canvasSize := GetOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
  118. aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
  119. newSize := GetImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
  120. dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
  121. draw.ApproxBiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
  122. return dst, aspectRatio
  123. }
  124. func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
  125. paddedSize := image.Point{
  126. X: outputSize.X * aspectRatio.X,
  127. Y: outputSize.Y * aspectRatio.Y,
  128. }
  129. dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
  130. centerX := (paddedSize.X - img.Bounds().Max.X) / 2
  131. centerY := (paddedSize.Y - img.Bounds().Max.Y) / 2
  132. pos := image.Rect(centerX, centerY, centerX+img.Bounds().Max.X, centerY+img.Bounds().Max.Y)
  133. draw.Draw(dst, pos, img, image.Point{0, 0}, draw.Over)
  134. return dst
  135. }
  136. func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
  137. subImages := SplitToTiles(img, aspectRatio)
  138. var pixelVals []float32
  139. for _, subImg := range subImages {
  140. bounds := subImg.Bounds()
  141. rVals := []float32{}
  142. gVals := []float32{}
  143. bVals := []float32{}
  144. for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
  145. for x := bounds.Min.X; x < bounds.Max.X; x++ {
  146. c := subImg.At(x, y)
  147. r, g, b, _ := c.RGBA()
  148. rVal := float32(r>>8) / 255.0
  149. gVal := float32(g>>8) / 255.0
  150. bVal := float32(b>>8) / 255.0
  151. rVal = (rVal - mean[0]) / std[0]
  152. gVal = (gVal - mean[1]) / std[1]
  153. bVal = (bVal - mean[2]) / std[2]
  154. rVals = append(rVals, rVal)
  155. gVals = append(gVals, gVal)
  156. bVals = append(bVals, bVal)
  157. }
  158. }
  159. pixelVals = append(pixelVals, rVals...)
  160. pixelVals = append(pixelVals, gVals...)
  161. pixelVals = append(pixelVals, bVals...)
  162. }
  163. return pixelVals
  164. }
  165. func Preprocess(imageData []byte) ([]float32, int, error) {
  166. // todo: need guard in here for bad image data
  167. // mllama values
  168. outputSize := image.Point{560, 560}
  169. maxTiles := 4
  170. // clip values
  171. mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
  172. std := [3]float32{0.26862954, 0.26130258, 0.27577711}
  173. img, _, err := image.Decode(bytes.NewReader(imageData))
  174. if err != nil {
  175. return nil, 0, fmt.Errorf("failed to decode image: %w", err)
  176. }
  177. newImage, aspectRatio := ResizeImage(img, outputSize, maxTiles)
  178. newImage = PadImage(newImage, outputSize, aspectRatio)
  179. // todo: need to scale (dim) by 1/256
  180. data := PackImages(newImage, aspectRatio, mean, std)
  181. supportedRatios := GetSupportedAspectRatios(maxTiles)
  182. var aspectRatioIndex int
  183. for n, r := range supportedRatios {
  184. if r == aspectRatio {
  185. aspectRatioIndex = n+1
  186. break
  187. }
  188. }
  189. return data, aspectRatioIndex, nil
  190. }