123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- /**
- * llama.cpp - commit 3f1ae2e32cde00c39b96be6d01c2997c29bae555 - do not edit this file
- *
- * MIT License
- *
- * Copyright (c) 2023-2024 The ggml authors
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #pragma once
- #include "llama.h"
- #include <string>
- #include <vector>
- #include <stdexcept>
- #ifdef __GNUC__
- #ifdef __MINGW32__
- #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
- #else
- #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
- #endif
- #else
- #define LLAMA_ATTRIBUTE_FORMAT(...)
- #endif
- //
- // logging
- //
- LLAMA_ATTRIBUTE_FORMAT(2, 3)
- void llama_log_internal (ggml_log_level level, const char * format, ...);
- void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
- #define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
- #define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
- #define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
- #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
- #define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
- #define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
- //
- // helpers
- //
- struct time_meas {
- time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
- ~time_meas() {
- if (t_start_us >= 0) {
- t_acc += ggml_time_us() - t_start_us;
- }
- }
- const int64_t t_start_us;
- int64_t & t_acc;
- };
- static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
- if (search.empty()) {
- return;
- }
- std::string builder;
- builder.reserve(s.length());
- size_t pos = 0;
- size_t last_pos = 0;
- while ((pos = s.find(search, last_pos)) != std::string::npos) {
- builder.append(s, last_pos, pos - last_pos);
- builder.append(replace);
- last_pos = pos + search.length();
- }
- builder.append(s, last_pos, std::string::npos);
- s = std::move(builder);
- }
- const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
- struct llama_context * ctx
- );
- // the ring buffer works similarly to std::deque, but with a fixed capacity
- template<typename T>
- struct ring_buffer {
- ring_buffer(size_t cap) : capacity(cap), data(cap) {}
- T & front() {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[first];
- }
- const T & front() const {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[first];
- }
- T & back() {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[pos];
- }
- const T & back() const {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- return data[pos];
- }
- void push_back(const T & value) {
- if (capacity == 0) {
- throw std::runtime_error("ring buffer: capacity is zero");
- }
- if (sz == capacity) {
- // advance the start when buffer is full
- first = (first + 1) % capacity;
- } else {
- sz++;
- }
- data[pos] = value;
- pos = (pos + 1) % capacity;
- }
- T pop_front() {
- if (sz == 0) {
- throw std::runtime_error("ring buffer is empty");
- }
- T value = data[first];
- first = (first + 1) % capacity;
- sz--;
- return value;
- }
- //T & operator[](size_t i) {
- // if (i >= sz) {
- // throw std::runtime_error("ring buffer: index out of bounds");
- // }
- // return data[(first + i) % capacity];
- //}
- //const T & at(size_t i) const {
- // if (i >= sz) {
- // throw std::runtime_error("ring buffer: index out of bounds");
- // }
- // return data[(first + i) % capacity];
- //}
- const T & rat(size_t i) const {
- if (i >= sz) {
- throw std::runtime_error("ring buffer: index out of bounds");
- }
- return data[(first + sz - i - 1) % capacity];
- }
- std::vector<T> to_vector() const {
- std::vector<T> result;
- result.reserve(sz);
- for (size_t i = 0; i < sz; i++) {
- result.push_back(data[(first + i) % capacity]);
- }
- return result;
- }
- void clear() {
- // here only reset the status of the buffer
- sz = 0;
- first = 0;
- pos = 0;
- }
- bool empty() const {
- return sz == 0;
- }
- size_t size() const {
- return sz;
- }
- size_t capacity = 0;
- size_t sz = 0;
- size_t first = 0;
- size_t pos = 0;
- std::vector<T> data;
- };
|