123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- /**
- * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
- *
- * MIT License
- *
- * Copyright (c) 2023-2024 The ggml authors
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #define GGML_COMMON_DECL_C
- #include "ggml-common.h"
- #include "ggml-aarch64.h"
- #include "ggml-impl.h"
- #include "ggml-quants.h"
- #include <assert.h>
- #define UNUSED GGML_UNUSED
- static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
- block_q4_0x4 out;
- for (int i = 0; i < 4; i++) {
- out.d[i] = in[i].d;
- }
- const int end = QK4_0 * 2 / blck_size_interleave;
- if (blck_size_interleave == 8) {
- const uint64_t xor_mask = 0x8888888888888888ULL;
- for (int i = 0; i < end; ++i) {
- int src_id = i % 4;
- int src_offset = (i / 4) * blck_size_interleave;
- int dst_offset = i * blck_size_interleave;
- uint64_t elems;
- // Using memcpy to avoid unaligned memory accesses
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
- elems ^= xor_mask;
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
- }
- } else if (blck_size_interleave == 4) {
- const uint32_t xor_mask = 0x88888888;
- for (int i = 0; i < end; ++i) {
- int src_id = i % 4;
- int src_offset = (i / 4) * blck_size_interleave;
- int dst_offset = i * blck_size_interleave;
- uint32_t elems;
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
- elems ^= xor_mask;
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
- }
- } else {
- GGML_ASSERT(false);
- }
- return out;
- }
- // interleave 8 block_q4_0s in blocks of blck_size_interleave
- // returns an interleaved block_q4_0x8
- // in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
- // first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
- static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
- block_q4_0x8 out;
- for (int i = 0; i < 8; i++) {
- out.d[i] = in[i].d;
- }
- const int end = QK4_0 * 4 / blck_size_interleave;
- const uint64_t xor_mask = 0x8888888888888888ULL;
- for (int i = 0; i < end; ++i) {
- int src_id = i % 8;
- int src_offset = (i / 8) * blck_size_interleave;
- int dst_offset = i * blck_size_interleave;
- uint64_t elems;
- memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
- elems ^= xor_mask;
- memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
- }
- return out;
- }
- static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, int nrows_interleaved, int blck_size_interleave) {
- assert(n_per_row % QK4_0 == 0);
- const int nb = n_per_row / QK4_0;
- void * out_ptr = NULL;
- if (nrows_interleaved == 8) {
- out_ptr = (block_q4_0x8 *) dst;
- }
- else if (nrows_interleaved == 4) {
- out_ptr = (block_q4_0x4 *) dst;
- }
- assert(nrows_interleaved <= 8);
- block_q4_0 dst_tmp[8];
- for (int b = 0; b < (nrow * n_per_row); b += nrows_interleaved * n_per_row) {
- for (int64_t x = 0; x < nb; x++) {
- for (int i = 0; i < nrows_interleaved; i++ ) {
- quantize_row_q4_0_ref(src + b + i * n_per_row + x * QK4_0, (block_q4_0 *) dst_tmp + i, QK4_0);
- }
- if (nrows_interleaved == 8) {
- *(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
- out_ptr = (block_q4_0x8 *) out_ptr + 1;
- }
- else if (nrows_interleaved == 4) {
- *(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
- out_ptr = (block_q4_0x4 *) out_ptr + 1;
- }
- }
- }
- return ((nrow * n_per_row) / QK4_0 * sizeof(block_q4_0));
- }
- size_t quantize_q4_0_4x4(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
- UNUSED(quant_weights);
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 4);
- }
- size_t quantize_q4_0_4x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
- UNUSED(quant_weights);
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 4, 8);
- }
- size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
- UNUSED(quant_weights);
- return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
- }
|