quantize.cuh 717 B

1234567891011121314151617181920
  1. #pragma once
  2. #include "common.cuh"
  3. #include "mmq.cuh"
  4. #include <cstdint>
  5. #define CUDA_QUANTIZE_BLOCK_SIZE 256
  6. typedef void (*quantize_cuda_t)(
  7. const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
  8. const ggml_type type_x, cudaStream_t stream);
  9. void quantize_row_q8_1_cuda(
  10. const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
  11. const ggml_type type_x, cudaStream_t stream);
  12. void quantize_mmq_q8_1_cuda(
  13. const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
  14. const ggml_type type_x, cudaStream_t stream);