04-metal.diff 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
  2. index 0207b787..b5e9884b 100644
  3. --- a/ggml/src/ggml-metal.m
  4. +++ b/ggml/src/ggml-metal.m
  5. @@ -1396,27 +1396,23 @@ static enum ggml_status ggml_metal_graph_compute(
  6. // to the matrix-vector kernel
  7. int ne11_mm_min = 1;
  8. -#if 0
  9. // the numbers below are measured on M2 Ultra for 7B and 13B models
  10. // these numbers do not translate to other devices or model sizes
  11. // TODO: need to find a better approach
  12. - if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
  13. - switch (src0t) {
  14. - case GGML_TYPE_F16: ne11_mm_min = 2; break;
  15. - case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
  16. - case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
  17. - case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
  18. - case GGML_TYPE_Q4_0:
  19. - case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
  20. - case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
  21. - case GGML_TYPE_Q5_0: // not tested yet
  22. - case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
  23. - case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
  24. - case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
  25. - default: ne11_mm_min = 1; break;
  26. - }
  27. + switch (src0t) {
  28. + case GGML_TYPE_F16: ne11_mm_min = 2; break;
  29. + case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
  30. + case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
  31. + case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
  32. + case GGML_TYPE_Q4_0:
  33. + case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
  34. + case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
  35. + case GGML_TYPE_Q5_0: // not tested yet
  36. + case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
  37. + case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
  38. + case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
  39. + default: ne11_mm_min = 1; break;
  40. }
  41. -#endif
  42. // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
  43. // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel