0004-metal.patch 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. From 29411d9a9d2b6a0af6425ffe88498f17f71f7d5d Mon Sep 17 00:00:00 2001
  2. From: Michael Yang <mxyng@pm.me>
  3. Date: Mon, 16 Sep 2024 15:53:12 -0700
  4. Subject: [PATCH] 04-metal.diff
  5. ---
  6. ggml/src/ggml-metal.m | 30 +++++++++++++-----------------
  7. 1 file changed, 13 insertions(+), 17 deletions(-)
  8. diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
  9. index 91b5e61b..9cfa72ac 100644
  10. --- a/ggml/src/ggml-metal.m
  11. +++ b/ggml/src/ggml-metal.m
  12. @@ -1734,27 +1734,23 @@ static enum ggml_status ggml_metal_graph_compute(
  13. // to the matrix-vector kernel
  14. int ne11_mm_min = 1;
  15. -#if 0
  16. // the numbers below are measured on M2 Ultra for 7B and 13B models
  17. // these numbers do not translate to other devices or model sizes
  18. // TODO: need to find a better approach
  19. - if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
  20. - switch (src0t) {
  21. - case GGML_TYPE_F16: ne11_mm_min = 2; break;
  22. - case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
  23. - case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
  24. - case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
  25. - case GGML_TYPE_Q4_0:
  26. - case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
  27. - case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
  28. - case GGML_TYPE_Q5_0: // not tested yet
  29. - case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
  30. - case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
  31. - case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
  32. - default: ne11_mm_min = 1; break;
  33. - }
  34. + switch (src0t) {
  35. + case GGML_TYPE_F16: ne11_mm_min = 2; break;
  36. + case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
  37. + case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
  38. + case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
  39. + case GGML_TYPE_Q4_0:
  40. + case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
  41. + case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
  42. + case GGML_TYPE_Q5_0: // not tested yet
  43. + case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
  44. + case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
  45. + case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
  46. + default: ne11_mm_min = 1; break;
  47. }
  48. -#endif
  49. // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
  50. // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
  51. --
  52. 2.46.0