images.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. package imageproc
  2. import (
  3. "bytes"
  4. "fmt"
  5. "image"
  6. "image/color"
  7. _ "image/jpeg"
  8. _ "image/png"
  9. "math"
  10. "golang.org/x/image/draw"
  11. )
  12. func GetSupportedAspectRatios(maxTiles int) []image.Point {
  13. ratios := []image.Point{}
  14. for w := range maxTiles {
  15. for h := range maxTiles {
  16. if (w+1)*(h+1) <= maxTiles {
  17. ratios = append(ratios, image.Point{w + 1, h + 1})
  18. }
  19. }
  20. }
  21. return ratios
  22. }
  23. func clip(a, a_min, a_max int) int {
  24. if a < a_min {
  25. return a_min
  26. } else if a > a_max {
  27. return a_max
  28. }
  29. return a
  30. }
  31. func min(a, b int) int {
  32. if a < b {
  33. return a
  34. }
  35. return b
  36. }
  37. func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
  38. targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
  39. targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
  40. scaleWidth := float64(targetWidth) / float64(imageSize.X)
  41. scaleHeight := float64(targetHeight) / float64(imageSize.Y)
  42. var w, h int
  43. if scaleWidth < scaleHeight {
  44. w = targetWidth
  45. h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
  46. } else {
  47. w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
  48. h = targetHeight
  49. }
  50. return image.Point{w, h}
  51. }
  52. func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
  53. possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
  54. possibleCanvasSizes := []image.Point{}
  55. for _, pta := range possibleTileArrangements {
  56. possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
  57. }
  58. scales := []float64{}
  59. for _, pcs := range possibleCanvasSizes {
  60. scaleHeight := float64(pcs.Y) / float64(imageSize.Y)
  61. scaleWidth := float64(pcs.X) / float64(imageSize.X)
  62. if scaleWidth > scaleHeight {
  63. scales = append(scales, scaleHeight)
  64. } else {
  65. scales = append(scales, scaleWidth)
  66. }
  67. }
  68. var minUpscale float64
  69. var maxDownscale float64
  70. var upscale bool
  71. for _, s := range scales {
  72. if s > 1.0 {
  73. upscale = true
  74. if minUpscale == 0 {
  75. minUpscale = s
  76. } else {
  77. minUpscale = math.Min(minUpscale, s)
  78. }
  79. } else {
  80. maxDownscale = math.Max(maxDownscale, s)
  81. }
  82. }
  83. selectedScale := maxDownscale
  84. if upscale {
  85. selectedScale = minUpscale
  86. }
  87. var selectedCanvas image.Point
  88. for n, pcs := range possibleCanvasSizes {
  89. if scales[n] == selectedScale {
  90. // choose the smallest possible canvas
  91. if selectedCanvas.X == 0 && selectedCanvas.Y == 0 {
  92. selectedCanvas = pcs
  93. } else if pcs.X*pcs.Y < selectedCanvas.X*selectedCanvas.Y {
  94. selectedCanvas = pcs
  95. }
  96. }
  97. }
  98. return selectedCanvas
  99. }
  100. func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
  101. b := img.Bounds()
  102. width := b.Max.X - b.Min.X
  103. height := b.Max.Y - b.Min.Y
  104. tileHeight := height / numTilesSize.Y
  105. tileWidth := width / numTilesSize.X
  106. images := []image.Image{}
  107. for h := range numTilesSize.Y {
  108. for w := range numTilesSize.X {
  109. rect := image.Rect(tileWidth*w, tileHeight*h, tileWidth*(w+1), tileHeight*(h+1))
  110. images = append(images, img.(interface {
  111. SubImage(image.Rectangle) image.Image
  112. }).SubImage(rect))
  113. }
  114. }
  115. return images
  116. }
  117. // remove the "alpha" channel by drawing over a prefilled image
  118. func compositeImage(img image.Image) image.Image {
  119. dst := image.NewRGBA(img.Bounds())
  120. white := color.RGBA{255, 255, 255, 255}
  121. draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
  122. draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
  123. return dst
  124. }
  125. func ResizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
  126. if format == "png" {
  127. img = compositeImage(img)
  128. }
  129. b := img.Bounds()
  130. tileSize := outputSize.Y
  131. canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
  132. aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
  133. newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
  134. dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
  135. // scaling choices:
  136. // NearestNeighbor fast, blocky output
  137. // ApproxBiLinear fast, medium quality
  138. // BiLinear slow, high quality
  139. // CatmullRom very slow, very high quality
  140. draw.BiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
  141. return dst, aspectRatio
  142. }
  143. func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
  144. paddedSize := image.Point{
  145. X: outputSize.X * aspectRatio.X,
  146. Y: outputSize.Y * aspectRatio.Y,
  147. }
  148. dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
  149. draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
  150. return dst
  151. }
  152. func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
  153. subImages := splitToTiles(img, aspectRatio)
  154. var pixelVals []float32
  155. for _, subImg := range subImages {
  156. bounds := subImg.Bounds()
  157. rVals := []float32{}
  158. gVals := []float32{}
  159. bVals := []float32{}
  160. for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
  161. for x := bounds.Min.X; x < bounds.Max.X; x++ {
  162. c := subImg.At(x, y)
  163. r, g, b, _ := c.RGBA()
  164. rVal := float32(r>>8) / 255.0
  165. gVal := float32(g>>8) / 255.0
  166. bVal := float32(b>>8) / 255.0
  167. rVal = (rVal - mean[0]) / std[0]
  168. gVal = (gVal - mean[1]) / std[1]
  169. bVal = (bVal - mean[2]) / std[2]
  170. rVals = append(rVals, rVal)
  171. gVals = append(gVals, gVal)
  172. bVals = append(bVals, bVal)
  173. }
  174. }
  175. pixelVals = append(pixelVals, rVals...)
  176. pixelVals = append(pixelVals, gVals...)
  177. pixelVals = append(pixelVals, bVals...)
  178. }
  179. return pixelVals
  180. }
  181. func Preprocess(imageData []byte) ([]float32, int, error) {
  182. // todo: need guard in here for bad image data
  183. // mllama values
  184. outputSize := image.Point{560, 560}
  185. maxTiles := 4
  186. // clip values
  187. mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
  188. std := [3]float32{0.26862954, 0.26130258, 0.27577711}
  189. img, format, err := image.Decode(bytes.NewReader(imageData))
  190. if err != nil {
  191. return nil, 0, fmt.Errorf("failed to decode image: %w", err)
  192. }
  193. newImage, aspectRatio := ResizeImage(img, format, outputSize, maxTiles)
  194. newImage = PadImage(newImage, outputSize, aspectRatio)
  195. data := PackImages(newImage, aspectRatio, mean, std)
  196. supportedRatios := GetSupportedAspectRatios(maxTiles)
  197. var aspectRatioIndex int
  198. for n, r := range supportedRatios {
  199. if r == aspectRatio {
  200. aspectRatioIndex = n + 1
  201. break
  202. }
  203. }
  204. return data, aspectRatioIndex, nil
  205. }