浏览代码

Convert Safetensors to an Ollama model (#2824)

Patrick Devine 1 年之前
父节点
当前提交
2c017ca441
共有 9 个文件被更改,包括 3083 次插入153 次删除
  1. 89 8
      cmd/cmd.go
  2. 331 0
      convert/convert.go
  3. 1497 0
      convert/sentencepiece/sentencepiece_model.pb.go
  4. 333 0
      convert/sentencepiece_model.proto
  5. 22 3
      go.mod
  6. 148 2
      go.sum
  7. 2 2
      llm/ggml.go
  8. 574 137
      llm/gguf.go
  9. 87 1
      server/images.go

+ 89 - 8
cmd/cmd.go

@@ -1,6 +1,7 @@
 package cmd
 
 import (
+	"archive/zip"
 	"bytes"
 	"context"
 	"crypto/ed25519"
@@ -87,22 +88,82 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = filepath.Join(filepath.Dir(filename), path)
 			}
 
-			bin, err := os.Open(path)
+			fi, err := os.Stat(path)
 			if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
 				continue
 			} else if err != nil {
 				return err
 			}
-			defer bin.Close()
 
-			hash := sha256.New()
-			if _, err := io.Copy(hash, bin); err != nil {
-				return err
+			// TODO make this work w/ adapters
+			if fi.IsDir() {
+				tf, err := os.CreateTemp("", "ollama-tf")
+				if err != nil {
+					return err
+				}
+				defer os.RemoveAll(tf.Name())
+
+				zf := zip.NewWriter(tf)
+
+				files, err := filepath.Glob(filepath.Join(path, "model-*.safetensors"))
+				if err != nil {
+					return err
+				}
+
+				if len(files) == 0 {
+					return fmt.Errorf("no safetensors files were found in '%s'", path)
+				}
+
+				// add the safetensor config file + tokenizer
+				files = append(files, filepath.Join(path, "config.json"))
+				files = append(files, filepath.Join(path, "added_tokens.json"))
+				files = append(files, filepath.Join(path, "tokenizer.model"))
+
+				for _, fn := range files {
+					f, err := os.Open(fn)
+					if os.IsNotExist(err) && strings.HasSuffix(fn, "added_tokens.json") {
+						continue
+					} else if err != nil {
+						return err
+					}
+
+					fi, err := f.Stat()
+					if err != nil {
+						return err
+					}
+
+					h, err := zip.FileInfoHeader(fi)
+					if err != nil {
+						return err
+					}
+
+					h.Name = filepath.Base(fn)
+					h.Method = zip.Store
+
+					w, err := zf.CreateHeader(h)
+					if err != nil {
+						return err
+					}
+
+					_, err = io.Copy(w, f)
+					if err != nil {
+						return err
+					}
+
+				}
+
+				if err := zf.Close(); err != nil {
+					return err
+				}
+
+				if err := tf.Close(); err != nil {
+					return err
+				}
+				path = tf.Name()
 			}
-			bin.Seek(0, io.SeekStart)
 
-			digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
-			if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
+			digest, err := createBlob(cmd, client, path)
+			if err != nil {
 				return err
 			}
 
@@ -141,6 +202,26 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	return nil
 }
 
+func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
+	bin, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer bin.Close()
+
+	hash := sha256.New()
+	if _, err := io.Copy(hash, bin); err != nil {
+		return "", err
+	}
+	bin.Seek(0, io.SeekStart)
+
+	digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
+	if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
+		return "", err
+	}
+	return digest, nil
+}
+
 func RunHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {

+ 331 - 0
convert/convert.go

@@ -0,0 +1,331 @@
+package convert
+
+import (
+	"bytes"
+	"cmp"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"regexp"
+	"slices"
+
+	"github.com/mitchellh/mapstructure"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/jmorganca/ollama/convert/sentencepiece"
+	"github.com/jmorganca/ollama/llm"
+)
+
+type Params struct {
+	Architectures    []string `json:"architectures"`
+	VocabSize        int      `json:"vocab_size"`
+	HiddenSize       int      `json:"hidden_size"`       // n_embd
+	HiddenLayers     int      `json:"num_hidden_layers"` // n_layer
+	ContextSize      int      `json:"max_position_embeddings"`
+	IntermediateSize int      `json:"intermediate_size"`
+	AttentionHeads   int      `json:"num_attention_heads"` // n_head
+	KeyValHeads      int      `json:"num_key_value_heads"`
+	NormEPS          float64  `json:"rms_norm_eps"`
+	RopeFreqBase     float64  `json:"rope_theta"`
+	BoSTokenID       int      `json:"bos_token_id"`
+	EoSTokenID       int      `json:"eos_token_id"`
+}
+
+type MetaData struct {
+	Type    string `mapstructure:"dtype"`
+	Shape   []int  `mapstructure:"shape"`
+	Offsets []int  `mapstructure:"data_offsets"`
+}
+
+func ReadSafeTensors(fn string, offset uint64) ([]llm.Tensor, uint64, error) {
+	f, err := os.Open(fn)
+	if err != nil {
+		return []llm.Tensor{}, 0, err
+	}
+	defer f.Close()
+
+	var jsonSize uint64
+	binary.Read(f, binary.LittleEndian, &jsonSize)
+
+	buf := make([]byte, jsonSize)
+	_, err = io.ReadFull(f, buf)
+	if err != nil {
+		return []llm.Tensor{}, 0, err
+	}
+
+	d := json.NewDecoder(bytes.NewBuffer(buf))
+	d.UseNumber()
+	var parsed map[string]interface{}
+	if err = d.Decode(&parsed); err != nil {
+		return []llm.Tensor{}, 0, err
+	}
+
+	var keys []string
+	for k := range parsed {
+		keys = append(keys, k)
+	}
+
+	slices.Sort(keys)
+
+	slog.Info("converting layers")
+
+	var tensors []llm.Tensor
+	for _, k := range keys {
+		vals := parsed[k].(map[string]interface{})
+		var data MetaData
+		if err = mapstructure.Decode(vals, &data); err != nil {
+			return []llm.Tensor{}, 0, err
+		}
+
+		var size uint64
+		var kind uint32
+		switch len(data.Shape) {
+		case 0:
+			// metadata
+			continue
+		case 1:
+			// convert to float32
+			kind = 0
+			size = uint64(data.Shape[0] * 4)
+		case 2:
+			// convert to float16
+			kind = 1
+			size = uint64(data.Shape[0] * data.Shape[1] * 2)
+		}
+
+		ggufName, err := GetTensorName(k)
+		if err != nil {
+			slog.Error("%v", err)
+			return []llm.Tensor{}, 0, err
+		}
+
+		shape := [4]uint64{0, 0, 0, 0}
+		for cnt, s := range data.Shape {
+			shape[cnt] = uint64(s)
+		}
+
+		t := llm.Tensor{
+			Name:          ggufName,
+			Kind:          kind,
+			Offset:        offset,
+			Shape:         shape,
+			FileName:      fn,
+			OffsetPadding: 8 + jsonSize,
+			FileOffsets:   []uint64{uint64(data.Offsets[0]), uint64(data.Offsets[1])},
+		}
+		slog.Debug(fmt.Sprintf("%v", t))
+		tensors = append(tensors, t)
+		offset += size
+	}
+	return tensors, offset, nil
+}
+
+func GetSafeTensors(dirpath string) ([]llm.Tensor, error) {
+	var tensors []llm.Tensor
+	files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
+	if err != nil {
+		return []llm.Tensor{}, err
+	}
+
+	var offset uint64
+	for _, f := range files {
+		var t []llm.Tensor
+		var err error
+		t, offset, err = ReadSafeTensors(f, offset)
+		if err != nil {
+			slog.Error("%v", err)
+			return []llm.Tensor{}, err
+		}
+		tensors = append(tensors, t...)
+	}
+	return tensors, nil
+}
+
+func GetParams(dirpath string) (*Params, error) {
+	f, err := os.Open(filepath.Join(dirpath, "config.json"))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var params Params
+
+	d := json.NewDecoder(f)
+	err = d.Decode(&params)
+	if err != nil {
+		return nil, err
+	}
+
+	return &params, nil
+}
+
+// Details on gguf's tokenizer can be found at:
+// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
+type Vocab struct {
+	Tokens []string
+	Scores []float32
+	Types  []int32
+}
+
+func LoadTokens(dirpath string) (*Vocab, error) {
+	slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
+	in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
+	if err != nil {
+		return nil, err
+	}
+
+	// To regenerate sentencepiece from the protobufs use:
+	// protoc -I=./ --go_out=./ sentencepiece_model.proto
+	modelProto := &sentencepiece.ModelProto{}
+	if err := proto.Unmarshal(in, modelProto); err != nil {
+		return nil, err
+	}
+
+	v := &Vocab{
+		Tokens: make([]string, 0),
+		Scores: make([]float32, 0),
+		Types:  make([]int32, 0),
+	}
+
+	pieces := modelProto.GetPieces()
+	for _, p := range pieces {
+		v.Tokens = append(v.Tokens, p.GetPiece())
+		v.Scores = append(v.Scores, p.GetScore())
+		t := p.GetType()
+		v.Types = append(v.Types, int32(t))
+	}
+
+	slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
+
+	// add any additional tokens
+	addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
+	if os.IsNotExist(err) {
+		return v, nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	slog.Info("reading user defined tokens")
+
+	var extraTokenData map[string]int
+	if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
+		return nil, err
+	}
+
+	type token struct {
+		key string
+		pos int
+	}
+
+	extraTokens := make([]token, 0)
+	for k, id := range extraTokenData {
+		extraTokens = append(extraTokens, token{k, id})
+	}
+
+	slices.SortFunc(extraTokens, func(a, b token) int {
+		return cmp.Compare(a.pos, b.pos)
+	})
+
+	numToks := len(v.Tokens)
+
+	for cnt, t := range extraTokens {
+		// the token id should match the specific index for the total number of tokens
+		if t.pos != cnt+numToks {
+			return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
+		}
+		v.Tokens = append(v.Tokens, t.key)
+		v.Scores = append(v.Scores, -1000.0)
+		v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
+	}
+	slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
+
+	return v, nil
+}
+
+func GetTensorName(n string) (string, error) {
+	tMap := map[string]string{
+		"model.embed_tokens.weight":                           "token_embd.weight",
+		"model.layers.(\\d+).input_layernorm.weight":          "blk.$1.attn_norm.weight",
+		"model.layers.(\\d+).mlp.down_proj.weight":            "blk.$1.ffn_down.weight",
+		"model.layers.(\\d+).mlp.gate_proj.weight":            "blk.$1.ffn_gate.weight",
+		"model.layers.(\\d+).mlp.up_proj.weight":              "blk.$1.ffn_up.weight",
+		"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
+		"model.layers.(\\d+).self_attn.k_proj.weight":         "blk.$1.attn_k.weight",
+		"model.layers.(\\d+).self_attn.o_proj.weight":         "blk.$1.attn_output.weight",
+		"model.layers.(\\d+).self_attn.q_proj.weight":         "blk.$1.attn_q.weight",
+		"model.layers.(\\d+).self_attn.v_proj.weight":         "blk.$1.attn_v.weight",
+		"lm_head.weight":    "output.weight",
+		"model.norm.weight": "output_norm.weight",
+	}
+
+	v, ok := tMap[n]
+	if ok {
+		return v, nil
+	}
+
+	// quick hack to rename the layers to gguf format
+	for k, v := range tMap {
+		re := regexp.MustCompile(k)
+		newName := re.ReplaceAllString(n, v)
+		if newName != n {
+			return newName, nil
+		}
+	}
+
+	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
+}
+
+func WriteGGUF(name string, tensors []llm.Tensor, params *Params, vocab *Vocab) (string, error) {
+	c := llm.ContainerGGUF{
+		ByteOrder: binary.LittleEndian,
+	}
+
+	m := llm.NewGGUFModel(&c)
+	m.Tensors = tensors
+	m.KV["general.architecture"] = "llama"
+	m.KV["general.name"] = name
+	m.KV["llama.context_length"] = uint32(params.ContextSize)
+	m.KV["llama.embedding_length"] = uint32(params.HiddenSize)
+	m.KV["llama.block_count"] = uint32(params.HiddenLayers)
+	m.KV["llama.feed_forward_length"] = uint32(params.IntermediateSize)
+	m.KV["llama.rope.dimension_count"] = uint32(128)
+	m.KV["llama.attention.head_count"] = uint32(params.AttentionHeads)
+	m.KV["llama.attention.head_count_kv"] = uint32(params.KeyValHeads)
+	m.KV["llama.attention.layer_norm_rms_epsilon"] = float32(params.NormEPS)
+	m.KV["llama.rope.freq_base"] = float32(params.RopeFreqBase)
+	m.KV["general.file_type"] = uint32(1)
+	m.KV["tokenizer.ggml.model"] = "llama"
+
+	m.KV["tokenizer.ggml.tokens"] = vocab.Tokens
+	m.KV["tokenizer.ggml.scores"] = vocab.Scores
+	m.KV["tokenizer.ggml.token_type"] = vocab.Types
+
+	m.KV["tokenizer.ggml.bos_token_id"] = uint32(params.BoSTokenID)
+	m.KV["tokenizer.ggml.eos_token_id"] = uint32(params.EoSTokenID)
+	m.KV["tokenizer.ggml.unknown_token_id"] = uint32(0)
+	m.KV["tokenizer.ggml.add_bos_token"] = true
+	m.KV["tokenizer.ggml.add_eos_token"] = false
+
+	// llamacpp sets the chat template, however we don't need to set it since we pass it in through a layer
+	// m.KV["tokenizer.chat_template"] = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" // XXX removeme
+
+	c.V3.NumTensor = uint64(len(tensors))
+	c.V3.NumKV = uint64(len(m.KV))
+
+	f, err := os.CreateTemp("", "ollama-gguf")
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	err = m.Encode(f)
+	if err != nil {
+		return "", err
+	}
+
+	return f.Name(), nil
+}

+ 1497 - 0
convert/sentencepiece/sentencepiece_model.pb.go

@@ -0,0 +1,1497 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.!
+
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.32.0
+// 	protoc        v4.25.2
+// source: sentencepiece_model.proto
+
+package sentencepiece
+
+import (
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// Model type. only have UNIGRAM now.
+type TrainerSpec_ModelType int32
+
+const (
+	TrainerSpec_UNIGRAM TrainerSpec_ModelType = 1 // Unigram language model with dynamic algorithm
+	TrainerSpec_BPE     TrainerSpec_ModelType = 2 // Byte Pair Encoding
+	TrainerSpec_WORD    TrainerSpec_ModelType = 3 // Delimitered by whitespace.
+	TrainerSpec_CHAR    TrainerSpec_ModelType = 4 // tokenizes into character sequence
+)
+
+// Enum value maps for TrainerSpec_ModelType.
+var (
+	TrainerSpec_ModelType_name = map[int32]string{
+		1: "UNIGRAM",
+		2: "BPE",
+		3: "WORD",
+		4: "CHAR",
+	}
+	TrainerSpec_ModelType_value = map[string]int32{
+		"UNIGRAM": 1,
+		"BPE":     2,
+		"WORD":    3,
+		"CHAR":    4,
+	}
+)
+
+func (x TrainerSpec_ModelType) Enum() *TrainerSpec_ModelType {
+	p := new(TrainerSpec_ModelType)
+	*p = x
+	return p
+}
+
+func (x TrainerSpec_ModelType) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (TrainerSpec_ModelType) Descriptor() protoreflect.EnumDescriptor {
+	return file_sentencepiece_model_proto_enumTypes[0].Descriptor()
+}
+
+func (TrainerSpec_ModelType) Type() protoreflect.EnumType {
+	return &file_sentencepiece_model_proto_enumTypes[0]
+}
+
+func (x TrainerSpec_ModelType) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Do not use.
+func (x *TrainerSpec_ModelType) UnmarshalJSON(b []byte) error {
+	num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b)
+	if err != nil {
+		return err
+	}
+	*x = TrainerSpec_ModelType(num)
+	return nil
+}
+
+// Deprecated: Use TrainerSpec_ModelType.Descriptor instead.
+func (TrainerSpec_ModelType) EnumDescriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{0, 0}
+}
+
+type ModelProto_SentencePiece_Type int32
+
+const (
+	ModelProto_SentencePiece_NORMAL       ModelProto_SentencePiece_Type = 1 // normal symbol
+	ModelProto_SentencePiece_UNKNOWN      ModelProto_SentencePiece_Type = 2 // unknown symbol. only <unk> for now.
+	ModelProto_SentencePiece_CONTROL      ModelProto_SentencePiece_Type = 3 // control symbols. </s>, <s>, <2ja> etc.
+	ModelProto_SentencePiece_USER_DEFINED ModelProto_SentencePiece_Type = 4 // user defined symbols.
+	// Typical usage of USER_DEFINED symbol
+	// is placeholder.
+	ModelProto_SentencePiece_BYTE   ModelProto_SentencePiece_Type = 6 // byte symbols. Used when `byte_fallback` is true.
+	ModelProto_SentencePiece_UNUSED ModelProto_SentencePiece_Type = 5 // this piece is not used.
+)
+
+// Enum value maps for ModelProto_SentencePiece_Type.
+var (
+	ModelProto_SentencePiece_Type_name = map[int32]string{
+		1: "NORMAL",
+		2: "UNKNOWN",
+		3: "CONTROL",
+		4: "USER_DEFINED",
+		6: "BYTE",
+		5: "UNUSED",
+	}
+	ModelProto_SentencePiece_Type_value = map[string]int32{
+		"NORMAL":       1,
+		"UNKNOWN":      2,
+		"CONTROL":      3,
+		"USER_DEFINED": 4,
+		"BYTE":         6,
+		"UNUSED":       5,
+	}
+)
+
+func (x ModelProto_SentencePiece_Type) Enum() *ModelProto_SentencePiece_Type {
+	p := new(ModelProto_SentencePiece_Type)
+	*p = x
+	return p
+}
+
+func (x ModelProto_SentencePiece_Type) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (ModelProto_SentencePiece_Type) Descriptor() protoreflect.EnumDescriptor {
+	return file_sentencepiece_model_proto_enumTypes[1].Descriptor()
+}
+
+func (ModelProto_SentencePiece_Type) Type() protoreflect.EnumType {
+	return &file_sentencepiece_model_proto_enumTypes[1]
+}
+
+func (x ModelProto_SentencePiece_Type) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Do not use.
+func (x *ModelProto_SentencePiece_Type) UnmarshalJSON(b []byte) error {
+	num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b)
+	if err != nil {
+		return err
+	}
+	*x = ModelProto_SentencePiece_Type(num)
+	return nil
+}
+
+// Deprecated: Use ModelProto_SentencePiece_Type.Descriptor instead.
+func (ModelProto_SentencePiece_Type) EnumDescriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{3, 0, 0}
+}
+
+// TrainerSpec encodes a various parameters for SentencePiece training.
+// Next id: 55
+type TrainerSpec struct {
+	state           protoimpl.MessageState
+	sizeCache       protoimpl.SizeCache
+	unknownFields   protoimpl.UnknownFields
+	extensionFields protoimpl.ExtensionFields
+
+	// /////////////////////////////////////////////////////////////////
+	// General parameters
+	//
+	// Input corpus files.
+	//
+	//	Trainer accepts the following two formats:
+	//	A) Monolingual: plain text, one sentence per line.
+	//	B) Bilingual:   TSV, source sentence <tab> target sentence
+	//	When bilingual data is passed, shared vocabulary model is built.
+	//	Note that the input file must be raw corpus, not a preprocessed corpus.
+	//	Trainer only loads the first `input_sentence_size` sentences specified
+	//	with this parameter.
+	Input []string `protobuf:"bytes,1,rep,name=input" json:"input,omitempty"`
+	// Input corpus format:
+	// "text": one-sentence-per-line text format (default)
+	// "tsv":  sentence <tab> freq
+	InputFormat *string `protobuf:"bytes,7,opt,name=input_format,json=inputFormat" json:"input_format,omitempty"`
+	// Output model file prefix.
+	// <model_prefix>.model and <model_prefix>.vocab are generated.
+	ModelPrefix *string                `protobuf:"bytes,2,opt,name=model_prefix,json=modelPrefix" json:"model_prefix,omitempty"`
+	ModelType   *TrainerSpec_ModelType `protobuf:"varint,3,opt,name=model_type,json=modelType,enum=sentencepiece.TrainerSpec_ModelType,def=1" json:"model_type,omitempty"`
+	// Vocabulary size. 8k is the default size.
+	VocabSize *int32 `protobuf:"varint,4,opt,name=vocab_size,json=vocabSize,def=8000" json:"vocab_size,omitempty"`
+	// List of the languages this model can accept.
+	// Since the model is language-agnostic, this field is used as a reference.
+	AcceptLanguage []string `protobuf:"bytes,5,rep,name=accept_language,json=acceptLanguage" json:"accept_language,omitempty"`
+	// Size of self-test samples, which are encoded in the model file.
+	SelfTestSampleSize *int32 `protobuf:"varint,6,opt,name=self_test_sample_size,json=selfTestSampleSize,def=0" json:"self_test_sample_size,omitempty"`
+	// Whether to use DP version of sentencepiece. Use it with TSV input format
+	// (requires precomputed word tab counts to work).
+	EnableDifferentialPrivacy *bool `protobuf:"varint,50,opt,name=enable_differential_privacy,json=enableDifferentialPrivacy,def=0" json:"enable_differential_privacy,omitempty"`
+	// Set these parameters if you need DP version of sentencepiece.
+	// std of noise to add.
+	DifferentialPrivacyNoiseLevel *float32 `protobuf:"fixed32,51,opt,name=differential_privacy_noise_level,json=differentialPrivacyNoiseLevel,def=0" json:"differential_privacy_noise_level,omitempty"`
+	// Clipping threshold to apply after adding noise. All the words with
+	// frequency less than this value are dropped.
+	DifferentialPrivacyClippingThreshold *uint64 `protobuf:"varint,52,opt,name=differential_privacy_clipping_threshold,json=differentialPrivacyClippingThreshold,def=0" json:"differential_privacy_clipping_threshold,omitempty"`
+	// /////////////////////////////////////////////////////////////////
+	// Training parameters.
+	//
+	// Uses characters which cover the corpus with the ratio of `chars_coverage`.
+	// This parameter determines the set of basic Alphabet of sentence piece.
+	// 1.0 - `chars_coverage` characters are treated as UNK.
+	// See also required_chars field.
+	CharacterCoverage *float32 `protobuf:"fixed32,10,opt,name=character_coverage,json=characterCoverage,def=0.9995" json:"character_coverage,omitempty"`
+	// Maximum size of sentences the trainer loads from `input` parameter.
+	// Trainer simply loads the `input` files in sequence.
+	// It is better to shuffle the input corpus randomly.
+	InputSentenceSize    *uint64 `protobuf:"varint,11,opt,name=input_sentence_size,json=inputSentenceSize,def=0" json:"input_sentence_size,omitempty"`
+	ShuffleInputSentence *bool   `protobuf:"varint,19,opt,name=shuffle_input_sentence,json=shuffleInputSentence,def=1" json:"shuffle_input_sentence,omitempty"`
+	// Maximum size of sentences to make seed sentence pieces.
+	// Extended suffix array is constructed to extract frequent
+	// sub-strings from the corpus. This uses 20N working space,
+	// where N is the size of corpus.
+	//
+	// Deprecated: Marked as deprecated in sentencepiece_model.proto.
+	MiningSentenceSize *int32 `protobuf:"varint,12,opt,name=mining_sentence_size,json=miningSentenceSize" json:"mining_sentence_size,omitempty"`
+	// Maximum size of sentences to train sentence pieces.
+	//
+	// Deprecated: Marked as deprecated in sentencepiece_model.proto.
+	TrainingSentenceSize *int32 `protobuf:"varint,13,opt,name=training_sentence_size,json=trainingSentenceSize" json:"training_sentence_size,omitempty"`
+	// The size of seed sentencepieces.
+	// `seed_sentencepiece_size` must be larger than `vocab_size`.
+	SeedSentencepieceSize *int32 `protobuf:"varint,14,opt,name=seed_sentencepiece_size,json=seedSentencepieceSize,def=1000000" json:"seed_sentencepiece_size,omitempty"`
+	// In every EM sub-iterations, keeps top
+	// `shrinking_factor` * `current sentencepieces size` with respect to
+	// the loss of the sentence piece. This value should be smaller than 1.0.
+	ShrinkingFactor *float32 `protobuf:"fixed32,15,opt,name=shrinking_factor,json=shrinkingFactor,def=0.75" json:"shrinking_factor,omitempty"`
+	// The maximum sentence length in byte. The sentences with the length
+	// larger than `max_sentence_length` is simply ignored.
+	// Longer input tends to bring the following risks:
+	//   - Overflow during EM training (unigram language model only)
+	//   - Performance drop because of O(n log n) cost in BPE.
+	MaxSentenceLength *int32 `protobuf:"varint,18,opt,name=max_sentence_length,json=maxSentenceLength,def=4192" json:"max_sentence_length,omitempty"`
+	// Number of threads in the training.
+	NumThreads *int32 `protobuf:"varint,16,opt,name=num_threads,json=numThreads,def=16" json:"num_threads,omitempty"`
+	// Number of EM sub iterations.
+	NumSubIterations *int32 `protobuf:"varint,17,opt,name=num_sub_iterations,json=numSubIterations,def=2" json:"num_sub_iterations,omitempty"`
+	// /////////////////////////////////////////////////////////////////
+	// SentencePiece parameters which control the shapes of sentence piece.
+	//
+	// Maximum length of sentencepiece.
+	MaxSentencepieceLength *int32 `protobuf:"varint,20,opt,name=max_sentencepiece_length,json=maxSentencepieceLength,def=16" json:"max_sentencepiece_length,omitempty"`
+	// Uses Unicode script to split sentence pieces.
+	// When `split_by_unicode_script` is true, we do not allow sentence piece to
+	// include multiple Unicode scripts, e.g. "F1" is not a valid piece.
+	// Exception: CJ characters (Hiragana/Katakana/Han) are all handled
+	// as one script type, since Japanese word can consist of multiple scripts.
+	// This exception is always applied regardless of the accept-language
+	// parameter.
+	SplitByUnicodeScript *bool `protobuf:"varint,21,opt,name=split_by_unicode_script,json=splitByUnicodeScript,def=1" json:"split_by_unicode_script,omitempty"`
+	// When `split_by_number` is true, put a boundary between number and
+	// non-number transition. If we want to treat "F1" is one token, set this flag
+	// to be false.
+	SplitByNumber *bool `protobuf:"varint,23,opt,name=split_by_number,json=splitByNumber,def=1" json:"split_by_number,omitempty"`
+	// Use a white space to split sentence pieces.
+	// When `split_by_whitespace` is false, we may have the piece containing
+	// a white space in the middle. e.g., "in_the".
+	SplitByWhitespace *bool `protobuf:"varint,22,opt,name=split_by_whitespace,json=splitByWhitespace,def=1" json:"split_by_whitespace,omitempty"`
+	// Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
+	// hello_. When `treat_whitespace_as_suffix` is true,
+	// NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
+	// of sentence.
+	TreatWhitespaceAsSuffix *bool `protobuf:"varint,24,opt,name=treat_whitespace_as_suffix,json=treatWhitespaceAsSuffix,def=0" json:"treat_whitespace_as_suffix,omitempty"`
+	// Allows pieces that only contain whitespaces instead of appearing only as
+	// prefix or suffix of other pieces.
+	AllowWhitespaceOnlyPieces *bool `protobuf:"varint,26,opt,name=allow_whitespace_only_pieces,json=allowWhitespaceOnlyPieces,def=0" json:"allow_whitespace_only_pieces,omitempty"`
+	// Split all digits (0-9) into separate pieces.
+	SplitDigits *bool `protobuf:"varint,25,opt,name=split_digits,json=splitDigits,def=0" json:"split_digits,omitempty"`
+	// Defines the pre-tokenization delimiter.
+	// When specified, no pieces crossing this delimiter is not included
+	// in the vocab. Then the delimiter string is virtually ignored
+	// during the training. This field can allows constraints on the vocabulary
+	// selection. Note that this field is available on unigram mode.
+	PretokenizationDelimiter *string `protobuf:"bytes,53,opt,name=pretokenization_delimiter,json=pretokenizationDelimiter,def=" json:"pretokenization_delimiter,omitempty"`
+	// /////////////////////////////////////////////////////////////////
+	// Vocabulary management
+	//
+	// Defines control symbols used as an indicator to
+	// change the behavior of the decoder. <s> and </s> are pre-defined.
+	// We can use this field to encode various meta information,
+	// including language indicator in multilingual model.
+	// These symbols are not visible to users, but visible to
+	// the decoder. Note that when the input sentence contains control symbols,
+	// they are not treated as one token, but segmented into normal pieces.
+	// Control symbols must be inserted independently from the segmentation.
+	ControlSymbols []string `protobuf:"bytes,30,rep,name=control_symbols,json=controlSymbols" json:"control_symbols,omitempty"`
+	// Defines user defined symbols.
+	// These symbols are added with extremely high score
+	// so they are always treated as one unique symbol in any context.
+	// Typical usage of user_defined_symbols is placeholder for named entities.
+	UserDefinedSymbols []string `protobuf:"bytes,31,rep,name=user_defined_symbols,json=userDefinedSymbols" json:"user_defined_symbols,omitempty"`
+	// Defines required characters. Each UTF8 character in this string is included
+	// in the character set regardless of character_coverage value. Unlike
+	// user_defined_symbols, these characters have scores based on the frequency
+	// on input sentences, and the model can form subwords using characters
+	// in this field.
+	RequiredChars *string `protobuf:"bytes,36,opt,name=required_chars,json=requiredChars" json:"required_chars,omitempty"`
+	// Decomposes unknown pieces into UTF-8 bytes.
+	ByteFallback *bool `protobuf:"varint,35,opt,name=byte_fallback,json=byteFallback,def=0" json:"byte_fallback,omitempty"`
+	// When creating the vocabulary file, defines whether or not to additionally
+	// output the score for each piece.
+	VocabularyOutputPieceScore *bool `protobuf:"varint,32,opt,name=vocabulary_output_piece_score,json=vocabularyOutputPieceScore,def=1" json:"vocabulary_output_piece_score,omitempty"`
+	// `vocab_size` is treated as hard limit. Crash if
+	// the model can not produce the vocab of size `vocab_size`,
+	// When `hard_vocab_limit` is false, vocab_size is treated
+	// as soft limit. Note that when model_type=char,
+	// always assumes hard_vocab_limit = false.
+	HardVocabLimit *bool `protobuf:"varint,33,opt,name=hard_vocab_limit,json=hardVocabLimit,def=1" json:"hard_vocab_limit,omitempty"`
+	// use all symbols for vocab extraction. This flag is valid
+	// if model type is either CHAR or WORD
+	UseAllVocab *bool `protobuf:"varint,34,opt,name=use_all_vocab,json=useAllVocab,def=0" json:"use_all_vocab,omitempty"`
+	// /////////////////////////////////////////////////////////////////
+	// Reserved special meta tokens.
+	// * -1 is not used.
+	// * unk_id must not be -1.
+	// Id must starts with 0 and be contigous.
+	UnkId    *int32  `protobuf:"varint,40,opt,name=unk_id,json=unkId,def=0" json:"unk_id,omitempty"`  // <unk>
+	BosId    *int32  `protobuf:"varint,41,opt,name=bos_id,json=bosId,def=1" json:"bos_id,omitempty"`  // <s>
+	EosId    *int32  `protobuf:"varint,42,opt,name=eos_id,json=eosId,def=2" json:"eos_id,omitempty"`  // </s>
+	PadId    *int32  `protobuf:"varint,43,opt,name=pad_id,json=padId,def=-1" json:"pad_id,omitempty"` // <pad> (padding)
+	UnkPiece *string `protobuf:"bytes,45,opt,name=unk_piece,json=unkPiece,def=<unk>" json:"unk_piece,omitempty"`
+	BosPiece *string `protobuf:"bytes,46,opt,name=bos_piece,json=bosPiece,def=<s>" json:"bos_piece,omitempty"`
+	EosPiece *string `protobuf:"bytes,47,opt,name=eos_piece,json=eosPiece,def=</s>" json:"eos_piece,omitempty"`
+	PadPiece *string `protobuf:"bytes,48,opt,name=pad_piece,json=padPiece,def=<pad>" json:"pad_piece,omitempty"`
+	// Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
+	// since this character can be useful both for user and
+	// developer. We can easily figure out that <unk> is emitted.
+	UnkSurface *string `protobuf:"bytes,44,opt,name=unk_surface,json=unkSurface,def= ⁇ " json:"unk_surface,omitempty"`
+	// Increase bit depth to allow unigram model training on large
+	// (>10M sentences) corpora. A Side-effect of enabling this flag
+	// is increased memory usage.
+	TrainExtremelyLargeCorpus *bool `protobuf:"varint,49,opt,name=train_extremely_large_corpus,json=trainExtremelyLargeCorpus,def=0" json:"train_extremely_large_corpus,omitempty"`
+	// Path to a seed sentencepieces file, with one tab-separated
+	// seed sentencepiece <tab> frequency per line.
+	SeedSentencepiecesFile *string `protobuf:"bytes,54,opt,name=seed_sentencepieces_file,json=seedSentencepiecesFile,def=" json:"seed_sentencepieces_file,omitempty"`
+}
+
+// Default values for TrainerSpec fields.
+const (
+	Default_TrainerSpec_ModelType                            = TrainerSpec_UNIGRAM
+	Default_TrainerSpec_VocabSize                            = int32(8000)
+	Default_TrainerSpec_SelfTestSampleSize                   = int32(0)
+	Default_TrainerSpec_EnableDifferentialPrivacy            = bool(false)
+	Default_TrainerSpec_DifferentialPrivacyNoiseLevel        = float32(0)
+	Default_TrainerSpec_DifferentialPrivacyClippingThreshold = uint64(0)
+	Default_TrainerSpec_CharacterCoverage                    = float32(0.9994999766349792)
+	Default_TrainerSpec_InputSentenceSize                    = uint64(0)
+	Default_TrainerSpec_ShuffleInputSentence                 = bool(true)
+	Default_TrainerSpec_SeedSentencepieceSize                = int32(1000000)
+	Default_TrainerSpec_ShrinkingFactor                      = float32(0.75)
+	Default_TrainerSpec_MaxSentenceLength                    = int32(4192)
+	Default_TrainerSpec_NumThreads                           = int32(16)
+	Default_TrainerSpec_NumSubIterations                     = int32(2)
+	Default_TrainerSpec_MaxSentencepieceLength               = int32(16)
+	Default_TrainerSpec_SplitByUnicodeScript                 = bool(true)
+	Default_TrainerSpec_SplitByNumber                        = bool(true)
+	Default_TrainerSpec_SplitByWhitespace                    = bool(true)
+	Default_TrainerSpec_TreatWhitespaceAsSuffix              = bool(false)
+	Default_TrainerSpec_AllowWhitespaceOnlyPieces            = bool(false)
+	Default_TrainerSpec_SplitDigits                          = bool(false)
+	Default_TrainerSpec_PretokenizationDelimiter             = string("")
+	Default_TrainerSpec_ByteFallback                         = bool(false)
+	Default_TrainerSpec_VocabularyOutputPieceScore           = bool(true)
+	Default_TrainerSpec_HardVocabLimit                       = bool(true)
+	Default_TrainerSpec_UseAllVocab                          = bool(false)
+	Default_TrainerSpec_UnkId                                = int32(0)
+	Default_TrainerSpec_BosId                                = int32(1)
+	Default_TrainerSpec_EosId                                = int32(2)
+	Default_TrainerSpec_PadId                                = int32(-1)
+	Default_TrainerSpec_UnkPiece                             = string("<unk>")
+	Default_TrainerSpec_BosPiece                             = string("<s>")
+	Default_TrainerSpec_EosPiece                             = string("</s>")
+	Default_TrainerSpec_PadPiece                             = string("<pad>")
+	Default_TrainerSpec_UnkSurface                           = string(" ⁇ ")
+	Default_TrainerSpec_TrainExtremelyLargeCorpus            = bool(false)
+	Default_TrainerSpec_SeedSentencepiecesFile               = string("")
+)
+
+func (x *TrainerSpec) Reset() {
+	*x = TrainerSpec{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_sentencepiece_model_proto_msgTypes[0]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *TrainerSpec) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TrainerSpec) ProtoMessage() {}
+
+func (x *TrainerSpec) ProtoReflect() protoreflect.Message {
+	mi := &file_sentencepiece_model_proto_msgTypes[0]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use TrainerSpec.ProtoReflect.Descriptor instead.
+func (*TrainerSpec) Descriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *TrainerSpec) GetInput() []string {
+	if x != nil {
+		return x.Input
+	}
+	return nil
+}
+
+func (x *TrainerSpec) GetInputFormat() string {
+	if x != nil && x.InputFormat != nil {
+		return *x.InputFormat
+	}
+	return ""
+}
+
+func (x *TrainerSpec) GetModelPrefix() string {
+	if x != nil && x.ModelPrefix != nil {
+		return *x.ModelPrefix
+	}
+	return ""
+}
+
+func (x *TrainerSpec) GetModelType() TrainerSpec_ModelType {
+	if x != nil && x.ModelType != nil {
+		return *x.ModelType
+	}
+	return Default_TrainerSpec_ModelType
+}
+
+func (x *TrainerSpec) GetVocabSize() int32 {
+	if x != nil && x.VocabSize != nil {
+		return *x.VocabSize
+	}
+	return Default_TrainerSpec_VocabSize
+}
+
+func (x *TrainerSpec) GetAcceptLanguage() []string {
+	if x != nil {
+		return x.AcceptLanguage
+	}
+	return nil
+}
+
+func (x *TrainerSpec) GetSelfTestSampleSize() int32 {
+	if x != nil && x.SelfTestSampleSize != nil {
+		return *x.SelfTestSampleSize
+	}
+	return Default_TrainerSpec_SelfTestSampleSize
+}
+
+func (x *TrainerSpec) GetEnableDifferentialPrivacy() bool {
+	if x != nil && x.EnableDifferentialPrivacy != nil {
+		return *x.EnableDifferentialPrivacy
+	}
+	return Default_TrainerSpec_EnableDifferentialPrivacy
+}
+
+func (x *TrainerSpec) GetDifferentialPrivacyNoiseLevel() float32 {
+	if x != nil && x.DifferentialPrivacyNoiseLevel != nil {
+		return *x.DifferentialPrivacyNoiseLevel
+	}
+	return Default_TrainerSpec_DifferentialPrivacyNoiseLevel
+}
+
+func (x *TrainerSpec) GetDifferentialPrivacyClippingThreshold() uint64 {
+	if x != nil && x.DifferentialPrivacyClippingThreshold != nil {
+		return *x.DifferentialPrivacyClippingThreshold
+	}
+	return Default_TrainerSpec_DifferentialPrivacyClippingThreshold
+}
+
+func (x *TrainerSpec) GetCharacterCoverage() float32 {
+	if x != nil && x.CharacterCoverage != nil {
+		return *x.CharacterCoverage
+	}
+	return Default_TrainerSpec_CharacterCoverage
+}
+
+func (x *TrainerSpec) GetInputSentenceSize() uint64 {
+	if x != nil && x.InputSentenceSize != nil {
+		return *x.InputSentenceSize
+	}
+	return Default_TrainerSpec_InputSentenceSize
+}
+
+func (x *TrainerSpec) GetShuffleInputSentence() bool {
+	if x != nil && x.ShuffleInputSentence != nil {
+		return *x.ShuffleInputSentence
+	}
+	return Default_TrainerSpec_ShuffleInputSentence
+}
+
+// Deprecated: Marked as deprecated in sentencepiece_model.proto.
+func (x *TrainerSpec) GetMiningSentenceSize() int32 {
+	if x != nil && x.MiningSentenceSize != nil {
+		return *x.MiningSentenceSize
+	}
+	return 0
+}
+
+// Deprecated: Marked as deprecated in sentencepiece_model.proto.
+func (x *TrainerSpec) GetTrainingSentenceSize() int32 {
+	if x != nil && x.TrainingSentenceSize != nil {
+		return *x.TrainingSentenceSize
+	}
+	return 0
+}
+
+func (x *TrainerSpec) GetSeedSentencepieceSize() int32 {
+	if x != nil && x.SeedSentencepieceSize != nil {
+		return *x.SeedSentencepieceSize
+	}
+	return Default_TrainerSpec_SeedSentencepieceSize
+}
+
+func (x *TrainerSpec) GetShrinkingFactor() float32 {
+	if x != nil && x.ShrinkingFactor != nil {
+		return *x.ShrinkingFactor
+	}
+	return Default_TrainerSpec_ShrinkingFactor
+}
+
+func (x *TrainerSpec) GetMaxSentenceLength() int32 {
+	if x != nil && x.MaxSentenceLength != nil {
+		return *x.MaxSentenceLength
+	}
+	return Default_TrainerSpec_MaxSentenceLength
+}
+
+func (x *TrainerSpec) GetNumThreads() int32 {
+	if x != nil && x.NumThreads != nil {
+		return *x.NumThreads
+	}
+	return Default_TrainerSpec_NumThreads
+}
+
+func (x *TrainerSpec) GetNumSubIterations() int32 {
+	if x != nil && x.NumSubIterations != nil {
+		return *x.NumSubIterations
+	}
+	return Default_TrainerSpec_NumSubIterations
+}
+
+func (x *TrainerSpec) GetMaxSentencepieceLength() int32 {
+	if x != nil && x.MaxSentencepieceLength != nil {
+		return *x.MaxSentencepieceLength
+	}
+	return Default_TrainerSpec_MaxSentencepieceLength
+}
+
+func (x *TrainerSpec) GetSplitByUnicodeScript() bool {
+	if x != nil && x.SplitByUnicodeScript != nil {
+		return *x.SplitByUnicodeScript
+	}
+	return Default_TrainerSpec_SplitByUnicodeScript
+}
+
+func (x *TrainerSpec) GetSplitByNumber() bool {
+	if x != nil && x.SplitByNumber != nil {
+		return *x.SplitByNumber
+	}
+	return Default_TrainerSpec_SplitByNumber
+}
+
+func (x *TrainerSpec) GetSplitByWhitespace() bool {
+	if x != nil && x.SplitByWhitespace != nil {
+		return *x.SplitByWhitespace
+	}
+	return Default_TrainerSpec_SplitByWhitespace
+}
+
+func (x *TrainerSpec) GetTreatWhitespaceAsSuffix() bool {
+	if x != nil && x.TreatWhitespaceAsSuffix != nil {
+		return *x.TreatWhitespaceAsSuffix
+	}
+	return Default_TrainerSpec_TreatWhitespaceAsSuffix
+}
+
+func (x *TrainerSpec) GetAllowWhitespaceOnlyPieces() bool {
+	if x != nil && x.AllowWhitespaceOnlyPieces != nil {
+		return *x.AllowWhitespaceOnlyPieces
+	}
+	return Default_TrainerSpec_AllowWhitespaceOnlyPieces
+}
+
+func (x *TrainerSpec) GetSplitDigits() bool {
+	if x != nil && x.SplitDigits != nil {
+		return *x.SplitDigits
+	}
+	return Default_TrainerSpec_SplitDigits
+}
+
+func (x *TrainerSpec) GetPretokenizationDelimiter() string {
+	if x != nil && x.PretokenizationDelimiter != nil {
+		return *x.PretokenizationDelimiter
+	}
+	return Default_TrainerSpec_PretokenizationDelimiter
+}
+
+func (x *TrainerSpec) GetControlSymbols() []string {
+	if x != nil {
+		return x.ControlSymbols
+	}
+	return nil
+}
+
+func (x *TrainerSpec) GetUserDefinedSymbols() []string {
+	if x != nil {
+		return x.UserDefinedSymbols
+	}
+	return nil
+}
+
+func (x *TrainerSpec) GetRequiredChars() string {
+	if x != nil && x.RequiredChars != nil {
+		return *x.RequiredChars
+	}
+	return ""
+}
+
+func (x *TrainerSpec) GetByteFallback() bool {
+	if x != nil && x.ByteFallback != nil {
+		return *x.ByteFallback
+	}
+	return Default_TrainerSpec_ByteFallback
+}
+
+func (x *TrainerSpec) GetVocabularyOutputPieceScore() bool {
+	if x != nil && x.VocabularyOutputPieceScore != nil {
+		return *x.VocabularyOutputPieceScore
+	}
+	return Default_TrainerSpec_VocabularyOutputPieceScore
+}
+
+func (x *TrainerSpec) GetHardVocabLimit() bool {
+	if x != nil && x.HardVocabLimit != nil {
+		return *x.HardVocabLimit
+	}
+	return Default_TrainerSpec_HardVocabLimit
+}
+
+func (x *TrainerSpec) GetUseAllVocab() bool {
+	if x != nil && x.UseAllVocab != nil {
+		return *x.UseAllVocab
+	}
+	return Default_TrainerSpec_UseAllVocab
+}
+
+func (x *TrainerSpec) GetUnkId() int32 {
+	if x != nil && x.UnkId != nil {
+		return *x.UnkId
+	}
+	return Default_TrainerSpec_UnkId
+}
+
+func (x *TrainerSpec) GetBosId() int32 {
+	if x != nil && x.BosId != nil {
+		return *x.BosId
+	}
+	return Default_TrainerSpec_BosId
+}
+
+func (x *TrainerSpec) GetEosId() int32 {
+	if x != nil && x.EosId != nil {
+		return *x.EosId
+	}
+	return Default_TrainerSpec_EosId
+}
+
+func (x *TrainerSpec) GetPadId() int32 {
+	if x != nil && x.PadId != nil {
+		return *x.PadId
+	}
+	return Default_TrainerSpec_PadId
+}
+
+func (x *TrainerSpec) GetUnkPiece() string {
+	if x != nil && x.UnkPiece != nil {
+		return *x.UnkPiece
+	}
+	return Default_TrainerSpec_UnkPiece
+}
+
+func (x *TrainerSpec) GetBosPiece() string {
+	if x != nil && x.BosPiece != nil {
+		return *x.BosPiece
+	}
+	return Default_TrainerSpec_BosPiece
+}
+
+func (x *TrainerSpec) GetEosPiece() string {
+	if x != nil && x.EosPiece != nil {
+		return *x.EosPiece
+	}
+	return Default_TrainerSpec_EosPiece
+}
+
+func (x *TrainerSpec) GetPadPiece() string {
+	if x != nil && x.PadPiece != nil {
+		return *x.PadPiece
+	}
+	return Default_TrainerSpec_PadPiece
+}
+
+func (x *TrainerSpec) GetUnkSurface() string {
+	if x != nil && x.UnkSurface != nil {
+		return *x.UnkSurface
+	}
+	return Default_TrainerSpec_UnkSurface
+}
+
+func (x *TrainerSpec) GetTrainExtremelyLargeCorpus() bool {
+	if x != nil && x.TrainExtremelyLargeCorpus != nil {
+		return *x.TrainExtremelyLargeCorpus
+	}
+	return Default_TrainerSpec_TrainExtremelyLargeCorpus
+}
+
+func (x *TrainerSpec) GetSeedSentencepiecesFile() string {
+	if x != nil && x.SeedSentencepiecesFile != nil {
+		return *x.SeedSentencepiecesFile
+	}
+	return Default_TrainerSpec_SeedSentencepiecesFile
+}
+
+// NormalizerSpec encodes a various parameters for string normalizaiton
+type NormalizerSpec struct {
+	state           protoimpl.MessageState
+	sizeCache       protoimpl.SizeCache
+	unknownFields   protoimpl.UnknownFields
+	extensionFields protoimpl.ExtensionFields
+
+	// name of normalization rule.
+	Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
+	// Pre-compiled normalization rule created by
+	// Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
+	// Usually this field is set by Builder::GetNormalizerSpec() method.
+	PrecompiledCharsmap []byte `protobuf:"bytes,2,opt,name=precompiled_charsmap,json=precompiledCharsmap" json:"precompiled_charsmap,omitempty"`
+	// Adds dummy whitespace at the beginning of text in order to
+	// treat "world" in "world" and "hello world" in the same way.
+	AddDummyPrefix *bool `protobuf:"varint,3,opt,name=add_dummy_prefix,json=addDummyPrefix,def=1" json:"add_dummy_prefix,omitempty"`
+	// Removes leading, trailing, and duplicate internal whitespace.
+	RemoveExtraWhitespaces *bool `protobuf:"varint,4,opt,name=remove_extra_whitespaces,json=removeExtraWhitespaces,def=1" json:"remove_extra_whitespaces,omitempty"`
+	// Replaces whitespace with meta symbol.
+	// This field must be true to train sentence piece model.
+	EscapeWhitespaces *bool `protobuf:"varint,5,opt,name=escape_whitespaces,json=escapeWhitespaces,def=1" json:"escape_whitespaces,omitempty"`
+	// Custom normalization rule file in TSV format.
+	// https://github.com/google/sentencepiece/blob/master/doc/normalization.md
+	// This field is only used in SentencePieceTrainer::Train() method, which
+	// compiles the rule into the binary rule stored in `precompiled_charsmap`.
+	NormalizationRuleTsv *string `protobuf:"bytes,6,opt,name=normalization_rule_tsv,json=normalizationRuleTsv" json:"normalization_rule_tsv,omitempty"`
+}
+
+// Default values for NormalizerSpec fields.
+const (
+	Default_NormalizerSpec_AddDummyPrefix         = bool(true)
+	Default_NormalizerSpec_RemoveExtraWhitespaces = bool(true)
+	Default_NormalizerSpec_EscapeWhitespaces      = bool(true)
+)
+
+func (x *NormalizerSpec) Reset() {
+	*x = NormalizerSpec{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_sentencepiece_model_proto_msgTypes[1]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *NormalizerSpec) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*NormalizerSpec) ProtoMessage() {}
+
+func (x *NormalizerSpec) ProtoReflect() protoreflect.Message {
+	mi := &file_sentencepiece_model_proto_msgTypes[1]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use NormalizerSpec.ProtoReflect.Descriptor instead.
+func (*NormalizerSpec) Descriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *NormalizerSpec) GetName() string {
+	if x != nil && x.Name != nil {
+		return *x.Name
+	}
+	return ""
+}
+
+func (x *NormalizerSpec) GetPrecompiledCharsmap() []byte {
+	if x != nil {
+		return x.PrecompiledCharsmap
+	}
+	return nil
+}
+
+func (x *NormalizerSpec) GetAddDummyPrefix() bool {
+	if x != nil && x.AddDummyPrefix != nil {
+		return *x.AddDummyPrefix
+	}
+	return Default_NormalizerSpec_AddDummyPrefix
+}
+
+func (x *NormalizerSpec) GetRemoveExtraWhitespaces() bool {
+	if x != nil && x.RemoveExtraWhitespaces != nil {
+		return *x.RemoveExtraWhitespaces
+	}
+	return Default_NormalizerSpec_RemoveExtraWhitespaces
+}
+
+func (x *NormalizerSpec) GetEscapeWhitespaces() bool {
+	if x != nil && x.EscapeWhitespaces != nil {
+		return *x.EscapeWhitespaces
+	}
+	return Default_NormalizerSpec_EscapeWhitespaces
+}
+
+func (x *NormalizerSpec) GetNormalizationRuleTsv() string {
+	if x != nil && x.NormalizationRuleTsv != nil {
+		return *x.NormalizationRuleTsv
+	}
+	return ""
+}
+
+// Proto to store samples for self-testing.
+type SelfTestData struct {
+	state           protoimpl.MessageState
+	sizeCache       protoimpl.SizeCache
+	unknownFields   protoimpl.UnknownFields
+	extensionFields protoimpl.ExtensionFields
+
+	Samples []*SelfTestData_Sample `protobuf:"bytes,1,rep,name=samples" json:"samples,omitempty"`
+}
+
+func (x *SelfTestData) Reset() {
+	*x = SelfTestData{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_sentencepiece_model_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SelfTestData) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SelfTestData) ProtoMessage() {}
+
+func (x *SelfTestData) ProtoReflect() protoreflect.Message {
+	mi := &file_sentencepiece_model_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SelfTestData.ProtoReflect.Descriptor instead.
+func (*SelfTestData) Descriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *SelfTestData) GetSamples() []*SelfTestData_Sample {
+	if x != nil {
+		return x.Samples
+	}
+	return nil
+}
+
+// ModelProto stores model parameters.
+// SentencePieceProcessor is supposed to be self-contained.
+// All settings/parameters which may change the behavior must be encoded
+// in ModelProto.
+type ModelProto struct {
+	state           protoimpl.MessageState
+	sizeCache       protoimpl.SizeCache
+	unknownFields   protoimpl.UnknownFields
+	extensionFields protoimpl.ExtensionFields
+
+	// Sentence pieces with scores.
+	Pieces []*ModelProto_SentencePiece `protobuf:"bytes,1,rep,name=pieces" json:"pieces,omitempty"`
+	// Spec used to generate this model file.
+	TrainerSpec *TrainerSpec `protobuf:"bytes,2,opt,name=trainer_spec,json=trainerSpec" json:"trainer_spec,omitempty"`
+	// Spec for text normalization.
+	NormalizerSpec *NormalizerSpec `protobuf:"bytes,3,opt,name=normalizer_spec,json=normalizerSpec" json:"normalizer_spec,omitempty"`
+	// Stores sample input and its expected segmentation to verify the model.
+	SelfTestData *SelfTestData `protobuf:"bytes,4,opt,name=self_test_data,json=selfTestData" json:"self_test_data,omitempty"`
+	// Spec for text de-normalization.
+	DenormalizerSpec *NormalizerSpec `protobuf:"bytes,5,opt,name=denormalizer_spec,json=denormalizerSpec" json:"denormalizer_spec,omitempty"`
+}
+
+func (x *ModelProto) Reset() {
+	*x = ModelProto{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_sentencepiece_model_proto_msgTypes[3]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ModelProto) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ModelProto) ProtoMessage() {}
+
+func (x *ModelProto) ProtoReflect() protoreflect.Message {
+	mi := &file_sentencepiece_model_proto_msgTypes[3]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ModelProto.ProtoReflect.Descriptor instead.
+func (*ModelProto) Descriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{3}
+}
+
+func (x *ModelProto) GetPieces() []*ModelProto_SentencePiece {
+	if x != nil {
+		return x.Pieces
+	}
+	return nil
+}
+
+func (x *ModelProto) GetTrainerSpec() *TrainerSpec {
+	if x != nil {
+		return x.TrainerSpec
+	}
+	return nil
+}
+
+func (x *ModelProto) GetNormalizerSpec() *NormalizerSpec {
+	if x != nil {
+		return x.NormalizerSpec
+	}
+	return nil
+}
+
+func (x *ModelProto) GetSelfTestData() *SelfTestData {
+	if x != nil {
+		return x.SelfTestData
+	}
+	return nil
+}
+
+func (x *ModelProto) GetDenormalizerSpec() *NormalizerSpec {
+	if x != nil {
+		return x.DenormalizerSpec
+	}
+	return nil
+}
+
+type SelfTestData_Sample struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Input    *string `protobuf:"bytes,1,opt,name=input" json:"input,omitempty"`
+	Expected *string `protobuf:"bytes,2,opt,name=expected" json:"expected,omitempty"`
+}
+
+func (x *SelfTestData_Sample) Reset() {
+	*x = SelfTestData_Sample{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_sentencepiece_model_proto_msgTypes[4]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SelfTestData_Sample) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SelfTestData_Sample) ProtoMessage() {}
+
+func (x *SelfTestData_Sample) ProtoReflect() protoreflect.Message {
+	mi := &file_sentencepiece_model_proto_msgTypes[4]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SelfTestData_Sample.ProtoReflect.Descriptor instead.
+func (*SelfTestData_Sample) Descriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{2, 0}
+}
+
+func (x *SelfTestData_Sample) GetInput() string {
+	if x != nil && x.Input != nil {
+		return *x.Input
+	}
+	return ""
+}
+
+func (x *SelfTestData_Sample) GetExpected() string {
+	if x != nil && x.Expected != nil {
+		return *x.Expected
+	}
+	return ""
+}
+
+type ModelProto_SentencePiece struct {
+	state           protoimpl.MessageState
+	sizeCache       protoimpl.SizeCache
+	unknownFields   protoimpl.UnknownFields
+	extensionFields protoimpl.ExtensionFields
+
+	Piece *string                        `protobuf:"bytes,1,opt,name=piece" json:"piece,omitempty"` // piece must not be empty.
+	Score *float32                       `protobuf:"fixed32,2,opt,name=score" json:"score,omitempty"`
+	Type  *ModelProto_SentencePiece_Type `protobuf:"varint,3,opt,name=type,enum=sentencepiece.ModelProto_SentencePiece_Type,def=1" json:"type,omitempty"`
+}
+
+// Default values for ModelProto_SentencePiece fields.
+const (
+	Default_ModelProto_SentencePiece_Type = ModelProto_SentencePiece_NORMAL
+)
+
+func (x *ModelProto_SentencePiece) Reset() {
+	*x = ModelProto_SentencePiece{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_sentencepiece_model_proto_msgTypes[5]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ModelProto_SentencePiece) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ModelProto_SentencePiece) ProtoMessage() {}
+
+func (x *ModelProto_SentencePiece) ProtoReflect() protoreflect.Message {
+	mi := &file_sentencepiece_model_proto_msgTypes[5]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ModelProto_SentencePiece.ProtoReflect.Descriptor instead.
+func (*ModelProto_SentencePiece) Descriptor() ([]byte, []int) {
+	return file_sentencepiece_model_proto_rawDescGZIP(), []int{3, 0}
+}
+
+func (x *ModelProto_SentencePiece) GetPiece() string {
+	if x != nil && x.Piece != nil {
+		return *x.Piece
+	}
+	return ""
+}
+
+func (x *ModelProto_SentencePiece) GetScore() float32 {
+	if x != nil && x.Score != nil {
+		return *x.Score
+	}
+	return 0
+}
+
+func (x *ModelProto_SentencePiece) GetType() ModelProto_SentencePiece_Type {
+	if x != nil && x.Type != nil {
+		return *x.Type
+	}
+	return Default_ModelProto_SentencePiece_Type
+}
+
+var File_sentencepiece_model_proto protoreflect.FileDescriptor
+
+var file_sentencepiece_model_proto_rawDesc = []byte{
+	0x0a, 0x19, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x5f,
+	0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0d, 0x73, 0x65, 0x6e,
+	0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x22, 0xc6, 0x12, 0x0a, 0x0b, 0x54,
+	0x72, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e,
+	0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6e, 0x70, 0x75, 0x74,
+	0x12, 0x21, 0x0a, 0x0c, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74,
+	0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x46, 0x6f, 0x72,
+	0x6d, 0x61, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f, 0x70, 0x72, 0x65,
+	0x66, 0x69, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
+	0x50, 0x72, 0x65, 0x66, 0x69, 0x78, 0x12, 0x4c, 0x0a, 0x0a, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x5f,
+	0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x73, 0x65, 0x6e,
+	0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x54, 0x72, 0x61, 0x69, 0x6e,
+	0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x54, 0x79, 0x70, 0x65,
+	0x3a, 0x07, 0x55, 0x4e, 0x49, 0x47, 0x52, 0x41, 0x4d, 0x52, 0x09, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
+	0x54, 0x79, 0x70, 0x65, 0x12, 0x23, 0x0a, 0x0a, 0x76, 0x6f, 0x63, 0x61, 0x62, 0x5f, 0x73, 0x69,
+	0x7a, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x04, 0x38, 0x30, 0x30, 0x30, 0x52, 0x09,
+	0x76, 0x6f, 0x63, 0x61, 0x62, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x61, 0x63, 0x63,
+	0x65, 0x70, 0x74, 0x5f, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x05, 0x20, 0x03,
+	0x28, 0x09, 0x52, 0x0e, 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x4c, 0x61, 0x6e, 0x67, 0x75, 0x61,
+	0x67, 0x65, 0x12, 0x34, 0x0a, 0x15, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f,
+	0x73, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28,
+	0x05, 0x3a, 0x01, 0x30, 0x52, 0x12, 0x73, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74, 0x53, 0x61,
+	0x6d, 0x70, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x45, 0x0a, 0x1b, 0x65, 0x6e, 0x61, 0x62,
+	0x6c, 0x65, 0x5f, 0x64, 0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f,
+	0x70, 0x72, 0x69, 0x76, 0x61, 0x63, 0x79, 0x18, 0x32, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66,
+	0x61, 0x6c, 0x73, 0x65, 0x52, 0x19, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x44, 0x69, 0x66, 0x66,
+	0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x69, 0x76, 0x61, 0x63, 0x79, 0x12,
+	0x4a, 0x0a, 0x20, 0x64, 0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f,
+	0x70, 0x72, 0x69, 0x76, 0x61, 0x63, 0x79, 0x5f, 0x6e, 0x6f, 0x69, 0x73, 0x65, 0x5f, 0x6c, 0x65,
+	0x76, 0x65, 0x6c, 0x18, 0x33, 0x20, 0x01, 0x28, 0x02, 0x3a, 0x01, 0x30, 0x52, 0x1d, 0x64, 0x69,
+	0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x69, 0x76, 0x61, 0x63,
+	0x79, 0x4e, 0x6f, 0x69, 0x73, 0x65, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x58, 0x0a, 0x27, 0x64,
+	0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x70, 0x72, 0x69, 0x76,
+	0x61, 0x63, 0x79, 0x5f, 0x63, 0x6c, 0x69, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x5f, 0x74, 0x68, 0x72,
+	0x65, 0x73, 0x68, 0x6f, 0x6c, 0x64, 0x18, 0x34, 0x20, 0x01, 0x28, 0x04, 0x3a, 0x01, 0x30, 0x52,
+	0x24, 0x64, 0x69, 0x66, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x50, 0x72, 0x69,
+	0x76, 0x61, 0x63, 0x79, 0x43, 0x6c, 0x69, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x54, 0x68, 0x72, 0x65,
+	0x73, 0x68, 0x6f, 0x6c, 0x64, 0x12, 0x35, 0x0a, 0x12, 0x63, 0x68, 0x61, 0x72, 0x61, 0x63, 0x74,
+	0x65, 0x72, 0x5f, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x67, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28,
+	0x02, 0x3a, 0x06, 0x30, 0x2e, 0x39, 0x39, 0x39, 0x35, 0x52, 0x11, 0x63, 0x68, 0x61, 0x72, 0x61,
+	0x63, 0x74, 0x65, 0x72, 0x43, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x67, 0x65, 0x12, 0x31, 0x0a, 0x13,
+	0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x73,
+	0x69, 0x7a, 0x65, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x04, 0x3a, 0x01, 0x30, 0x52, 0x11, 0x69, 0x6e,
+	0x70, 0x75, 0x74, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12,
+	0x3a, 0x0a, 0x16, 0x73, 0x68, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74,
+	0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x18, 0x13, 0x20, 0x01, 0x28, 0x08, 0x3a,
+	0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x14, 0x73, 0x68, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x49, 0x6e,
+	0x70, 0x75, 0x74, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x34, 0x0a, 0x14, 0x6d,
+	0x69, 0x6e, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x73,
+	0x69, 0x7a, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x05, 0x42, 0x02, 0x18, 0x01, 0x52, 0x12, 0x6d,
+	0x69, 0x6e, 0x69, 0x6e, 0x67, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x69, 0x7a,
+	0x65, 0x12, 0x38, 0x0a, 0x16, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x65,
+	0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28,
+	0x05, 0x42, 0x02, 0x18, 0x01, 0x52, 0x14, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x53,
+	0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x3f, 0x0a, 0x17, 0x73,
+	0x65, 0x65, 0x64, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63,
+	0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x07, 0x31, 0x30,
+	0x30, 0x30, 0x30, 0x30, 0x30, 0x52, 0x15, 0x73, 0x65, 0x65, 0x64, 0x53, 0x65, 0x6e, 0x74, 0x65,
+	0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x2f, 0x0a, 0x10,
+	0x73, 0x68, 0x72, 0x69, 0x6e, 0x6b, 0x69, 0x6e, 0x67, 0x5f, 0x66, 0x61, 0x63, 0x74, 0x6f, 0x72,
+	0x18, 0x0f, 0x20, 0x01, 0x28, 0x02, 0x3a, 0x04, 0x30, 0x2e, 0x37, 0x35, 0x52, 0x0f, 0x73, 0x68,
+	0x72, 0x69, 0x6e, 0x6b, 0x69, 0x6e, 0x67, 0x46, 0x61, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x34, 0x0a,
+	0x13, 0x6d, 0x61, 0x78, 0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x6c, 0x65,
+	0x6e, 0x67, 0x74, 0x68, 0x18, 0x12, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x04, 0x34, 0x31, 0x39, 0x32,
+	0x52, 0x11, 0x6d, 0x61, 0x78, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x4c, 0x65, 0x6e,
+	0x67, 0x74, 0x68, 0x12, 0x23, 0x0a, 0x0b, 0x6e, 0x75, 0x6d, 0x5f, 0x74, 0x68, 0x72, 0x65, 0x61,
+	0x64, 0x73, 0x18, 0x10, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, 0x31, 0x36, 0x52, 0x0a, 0x6e, 0x75,
+	0x6d, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2f, 0x0a, 0x12, 0x6e, 0x75, 0x6d, 0x5f,
+	0x73, 0x75, 0x62, 0x5f, 0x69, 0x74, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x11,
+	0x20, 0x01, 0x28, 0x05, 0x3a, 0x01, 0x32, 0x52, 0x10, 0x6e, 0x75, 0x6d, 0x53, 0x75, 0x62, 0x49,
+	0x74, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3c, 0x0a, 0x18, 0x6d, 0x61, 0x78,
+	0x5f, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x5f, 0x6c,
+	0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, 0x31, 0x36, 0x52,
+	0x16, 0x6d, 0x61, 0x78, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63,
+	0x65, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x3b, 0x0a, 0x17, 0x73, 0x70, 0x6c, 0x69, 0x74,
+	0x5f, 0x62, 0x79, 0x5f, 0x75, 0x6e, 0x69, 0x63, 0x6f, 0x64, 0x65, 0x5f, 0x73, 0x63, 0x72, 0x69,
+	0x70, 0x74, 0x18, 0x15, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x14,
+	0x73, 0x70, 0x6c, 0x69, 0x74, 0x42, 0x79, 0x55, 0x6e, 0x69, 0x63, 0x6f, 0x64, 0x65, 0x53, 0x63,
+	0x72, 0x69, 0x70, 0x74, 0x12, 0x2c, 0x0a, 0x0f, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x62, 0x79,
+	0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74,
+	0x72, 0x75, 0x65, 0x52, 0x0d, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x42, 0x79, 0x4e, 0x75, 0x6d, 0x62,
+	0x65, 0x72, 0x12, 0x34, 0x0a, 0x13, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x62, 0x79, 0x5f, 0x77,
+	0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x08, 0x3a,
+	0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x11, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x42, 0x79, 0x57, 0x68,
+	0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x42, 0x0a, 0x1a, 0x74, 0x72, 0x65, 0x61,
+	0x74, 0x5f, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x5f, 0x61, 0x73, 0x5f,
+	0x73, 0x75, 0x66, 0x66, 0x69, 0x78, 0x18, 0x18, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61,
+	0x6c, 0x73, 0x65, 0x52, 0x17, 0x74, 0x72, 0x65, 0x61, 0x74, 0x57, 0x68, 0x69, 0x74, 0x65, 0x73,
+	0x70, 0x61, 0x63, 0x65, 0x41, 0x73, 0x53, 0x75, 0x66, 0x66, 0x69, 0x78, 0x12, 0x46, 0x0a, 0x1c,
+	0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x5f, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65,
+	0x5f, 0x6f, 0x6e, 0x6c, 0x79, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x18, 0x1a, 0x20, 0x01,
+	0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x19, 0x61, 0x6c, 0x6c, 0x6f, 0x77,
+	0x57, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x4f, 0x6e, 0x6c, 0x79, 0x50, 0x69,
+	0x65, 0x63, 0x65, 0x73, 0x12, 0x28, 0x0a, 0x0c, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x5f, 0x64, 0x69,
+	0x67, 0x69, 0x74, 0x73, 0x18, 0x19, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73,
+	0x65, 0x52, 0x0b, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x44, 0x69, 0x67, 0x69, 0x74, 0x73, 0x12, 0x3d,
+	0x0a, 0x19, 0x70, 0x72, 0x65, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f,
+	0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x65, 0x72, 0x18, 0x35, 0x20, 0x01, 0x28,
+	0x09, 0x3a, 0x00, 0x52, 0x18, 0x70, 0x72, 0x65, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x61,
+	0x74, 0x69, 0x6f, 0x6e, 0x44, 0x65, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x65, 0x72, 0x12, 0x27, 0x0a,
+	0x0f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73,
+	0x18, 0x1e, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0e, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x53,
+	0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x30, 0x0a, 0x14, 0x75, 0x73, 0x65, 0x72, 0x5f, 0x64,
+	0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x5f, 0x73, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x18, 0x1f,
+	0x20, 0x03, 0x28, 0x09, 0x52, 0x12, 0x75, 0x73, 0x65, 0x72, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65,
+	0x64, 0x53, 0x79, 0x6d, 0x62, 0x6f, 0x6c, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x72, 0x65, 0x71, 0x75,
+	0x69, 0x72, 0x65, 0x64, 0x5f, 0x63, 0x68, 0x61, 0x72, 0x73, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x0d, 0x72, 0x65, 0x71, 0x75, 0x69, 0x72, 0x65, 0x64, 0x43, 0x68, 0x61, 0x72, 0x73, 0x12,
+	0x2a, 0x0a, 0x0d, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x66, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b,
+	0x18, 0x23, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0c, 0x62,
+	0x79, 0x74, 0x65, 0x46, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x12, 0x47, 0x0a, 0x1d, 0x76,
+	0x6f, 0x63, 0x61, 0x62, 0x75, 0x6c, 0x61, 0x72, 0x79, 0x5f, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74,
+	0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x5f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x18, 0x20, 0x20, 0x01,
+	0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x1a, 0x76, 0x6f, 0x63, 0x61, 0x62, 0x75,
+	0x6c, 0x61, 0x72, 0x79, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x50, 0x69, 0x65, 0x63, 0x65, 0x53,
+	0x63, 0x6f, 0x72, 0x65, 0x12, 0x2e, 0x0a, 0x10, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x76, 0x6f, 0x63,
+	0x61, 0x62, 0x5f, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x21, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04,
+	0x74, 0x72, 0x75, 0x65, 0x52, 0x0e, 0x68, 0x61, 0x72, 0x64, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x4c,
+	0x69, 0x6d, 0x69, 0x74, 0x12, 0x29, 0x0a, 0x0d, 0x75, 0x73, 0x65, 0x5f, 0x61, 0x6c, 0x6c, 0x5f,
+	0x76, 0x6f, 0x63, 0x61, 0x62, 0x18, 0x22, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c,
+	0x73, 0x65, 0x52, 0x0b, 0x75, 0x73, 0x65, 0x41, 0x6c, 0x6c, 0x56, 0x6f, 0x63, 0x61, 0x62, 0x12,
+	0x18, 0x0a, 0x06, 0x75, 0x6e, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x28, 0x20, 0x01, 0x28, 0x05, 0x3a,
+	0x01, 0x30, 0x52, 0x05, 0x75, 0x6e, 0x6b, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x06, 0x62, 0x6f, 0x73,
+	0x5f, 0x69, 0x64, 0x18, 0x29, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x01, 0x31, 0x52, 0x05, 0x62, 0x6f,
+	0x73, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x06, 0x65, 0x6f, 0x73, 0x5f, 0x69, 0x64, 0x18, 0x2a, 0x20,
+	0x01, 0x28, 0x05, 0x3a, 0x01, 0x32, 0x52, 0x05, 0x65, 0x6f, 0x73, 0x49, 0x64, 0x12, 0x19, 0x0a,
+	0x06, 0x70, 0x61, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x2b, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, 0x2d,
+	0x31, 0x52, 0x05, 0x70, 0x61, 0x64, 0x49, 0x64, 0x12, 0x22, 0x0a, 0x09, 0x75, 0x6e, 0x6b, 0x5f,
+	0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x09, 0x3a, 0x05, 0x3c, 0x75, 0x6e,
+	0x6b, 0x3e, 0x52, 0x08, 0x75, 0x6e, 0x6b, 0x50, 0x69, 0x65, 0x63, 0x65, 0x12, 0x20, 0x0a, 0x09,
+	0x62, 0x6f, 0x73, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x09, 0x3a,
+	0x03, 0x3c, 0x73, 0x3e, 0x52, 0x08, 0x62, 0x6f, 0x73, 0x50, 0x69, 0x65, 0x63, 0x65, 0x12, 0x21,
+	0x0a, 0x09, 0x65, 0x6f, 0x73, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x2f, 0x20, 0x01, 0x28,
+	0x09, 0x3a, 0x04, 0x3c, 0x2f, 0x73, 0x3e, 0x52, 0x08, 0x65, 0x6f, 0x73, 0x50, 0x69, 0x65, 0x63,
+	0x65, 0x12, 0x22, 0x0a, 0x09, 0x70, 0x61, 0x64, 0x5f, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x30,
+	0x20, 0x01, 0x28, 0x09, 0x3a, 0x05, 0x3c, 0x70, 0x61, 0x64, 0x3e, 0x52, 0x08, 0x70, 0x61, 0x64,
+	0x50, 0x69, 0x65, 0x63, 0x65, 0x12, 0x26, 0x0a, 0x0b, 0x75, 0x6e, 0x6b, 0x5f, 0x73, 0x75, 0x72,
+	0x66, 0x61, 0x63, 0x65, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x09, 0x3a, 0x05, 0x20, 0xe2, 0x81, 0x87,
+	0x20, 0x52, 0x0a, 0x75, 0x6e, 0x6b, 0x53, 0x75, 0x72, 0x66, 0x61, 0x63, 0x65, 0x12, 0x46, 0x0a,
+	0x1c, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x5f, 0x65, 0x78, 0x74, 0x72, 0x65, 0x6d, 0x65, 0x6c, 0x79,
+	0x5f, 0x6c, 0x61, 0x72, 0x67, 0x65, 0x5f, 0x63, 0x6f, 0x72, 0x70, 0x75, 0x73, 0x18, 0x31, 0x20,
+	0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x19, 0x74, 0x72, 0x61, 0x69,
+	0x6e, 0x45, 0x78, 0x74, 0x72, 0x65, 0x6d, 0x65, 0x6c, 0x79, 0x4c, 0x61, 0x72, 0x67, 0x65, 0x43,
+	0x6f, 0x72, 0x70, 0x75, 0x73, 0x12, 0x3a, 0x0a, 0x18, 0x73, 0x65, 0x65, 0x64, 0x5f, 0x73, 0x65,
+	0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x5f, 0x66, 0x69, 0x6c,
+	0x65, 0x18, 0x36, 0x20, 0x01, 0x28, 0x09, 0x3a, 0x00, 0x52, 0x16, 0x73, 0x65, 0x65, 0x64, 0x53,
+	0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x46, 0x69, 0x6c,
+	0x65, 0x22, 0x35, 0x0a, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b,
+	0x0a, 0x07, 0x55, 0x4e, 0x49, 0x47, 0x52, 0x41, 0x4d, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x42,
+	0x50, 0x45, 0x10, 0x02, 0x12, 0x08, 0x0a, 0x04, 0x57, 0x4f, 0x52, 0x44, 0x10, 0x03, 0x12, 0x08,
+	0x0a, 0x04, 0x43, 0x48, 0x41, 0x52, 0x10, 0x04, 0x2a, 0x09, 0x08, 0xc8, 0x01, 0x10, 0x80, 0x80,
+	0x80, 0x80, 0x02, 0x22, 0xbd, 0x02, 0x0a, 0x0e, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a,
+	0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x31, 0x0a, 0x14, 0x70, 0x72,
+	0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x63, 0x68, 0x61, 0x72, 0x73, 0x6d,
+	0x61, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x13, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d,
+	0x70, 0x69, 0x6c, 0x65, 0x64, 0x43, 0x68, 0x61, 0x72, 0x73, 0x6d, 0x61, 0x70, 0x12, 0x2e, 0x0a,
+	0x10, 0x61, 0x64, 0x64, 0x5f, 0x64, 0x75, 0x6d, 0x6d, 0x79, 0x5f, 0x70, 0x72, 0x65, 0x66, 0x69,
+	0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x0e, 0x61,
+	0x64, 0x64, 0x44, 0x75, 0x6d, 0x6d, 0x79, 0x50, 0x72, 0x65, 0x66, 0x69, 0x78, 0x12, 0x3e, 0x0a,
+	0x18, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x5f, 0x65, 0x78, 0x74, 0x72, 0x61, 0x5f, 0x77, 0x68,
+	0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a,
+	0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x16, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x45, 0x78, 0x74,
+	0x72, 0x61, 0x57, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73, 0x12, 0x33, 0x0a,
+	0x12, 0x65, 0x73, 0x63, 0x61, 0x70, 0x65, 0x5f, 0x77, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61,
+	0x63, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52,
+	0x11, 0x65, 0x73, 0x63, 0x61, 0x70, 0x65, 0x57, 0x68, 0x69, 0x74, 0x65, 0x73, 0x70, 0x61, 0x63,
+	0x65, 0x73, 0x12, 0x34, 0x0a, 0x16, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x61, 0x74,
+	0x69, 0x6f, 0x6e, 0x5f, 0x72, 0x75, 0x6c, 0x65, 0x5f, 0x74, 0x73, 0x76, 0x18, 0x06, 0x20, 0x01,
+	0x28, 0x09, 0x52, 0x14, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f,
+	0x6e, 0x52, 0x75, 0x6c, 0x65, 0x54, 0x73, 0x76, 0x2a, 0x09, 0x08, 0xc8, 0x01, 0x10, 0x80, 0x80,
+	0x80, 0x80, 0x02, 0x22, 0x93, 0x01, 0x0a, 0x0c, 0x53, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74,
+	0x44, 0x61, 0x74, 0x61, 0x12, 0x3c, 0x0a, 0x07, 0x73, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x73, 0x18,
+	0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65,
+	0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x53, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74, 0x44, 0x61,
+	0x74, 0x61, 0x2e, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x52, 0x07, 0x73, 0x61, 0x6d, 0x70, 0x6c,
+	0x65, 0x73, 0x1a, 0x3a, 0x0a, 0x06, 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x12, 0x14, 0x0a, 0x05,
+	0x69, 0x6e, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x69, 0x6e, 0x70,
+	0x75, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x65, 0x78, 0x70, 0x65, 0x63, 0x74, 0x65, 0x64, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x78, 0x70, 0x65, 0x63, 0x74, 0x65, 0x64, 0x2a, 0x09,
+	0x08, 0xc8, 0x01, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0xd7, 0x04, 0x0a, 0x0a, 0x4d, 0x6f,
+	0x64, 0x65, 0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x3f, 0x0a, 0x06, 0x70, 0x69, 0x65, 0x63,
+	0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65,
+	0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x50, 0x72,
+	0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x69, 0x65, 0x63,
+	0x65, 0x52, 0x06, 0x70, 0x69, 0x65, 0x63, 0x65, 0x73, 0x12, 0x3d, 0x0a, 0x0c, 0x74, 0x72, 0x61,
+	0x69, 0x6e, 0x65, 0x72, 0x5f, 0x73, 0x70, 0x65, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32,
+	0x1a, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e,
+	0x54, 0x72, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x52, 0x0b, 0x74, 0x72, 0x61,
+	0x69, 0x6e, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x12, 0x46, 0x0a, 0x0f, 0x6e, 0x6f, 0x72, 0x6d,
+	0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x5f, 0x73, 0x70, 0x65, 0x63, 0x18, 0x03, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x1d, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63,
+	0x65, 0x2e, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63,
+	0x52, 0x0e, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63,
+	0x12, 0x41, 0x0a, 0x0e, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x64, 0x61,
+	0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65,
+	0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x53, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73,
+	0x74, 0x44, 0x61, 0x74, 0x61, 0x52, 0x0c, 0x73, 0x65, 0x6c, 0x66, 0x54, 0x65, 0x73, 0x74, 0x44,
+	0x61, 0x74, 0x61, 0x12, 0x4a, 0x0a, 0x11, 0x64, 0x65, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69,
+	0x7a, 0x65, 0x72, 0x5f, 0x73, 0x70, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d,
+	0x2e, 0x73, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x4e,
+	0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x52, 0x10, 0x64,
+	0x65, 0x6e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x53, 0x70, 0x65, 0x63, 0x1a,
+	0xe6, 0x01, 0x0a, 0x0d, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x69, 0x65, 0x63,
+	0x65, 0x12, 0x14, 0x0a, 0x05, 0x70, 0x69, 0x65, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x05, 0x70, 0x69, 0x65, 0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65,
+	0x18, 0x02, 0x20, 0x01, 0x28, 0x02, 0x52, 0x05, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x12, 0x48, 0x0a,
+	0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2c, 0x2e, 0x73, 0x65,
+	0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65, 0x2e, 0x4d, 0x6f, 0x64, 0x65,
+	0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x65, 0x6e, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x50,
+	0x69, 0x65, 0x63, 0x65, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x3a, 0x06, 0x4e, 0x4f, 0x52, 0x4d, 0x41,
+	0x4c, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x54, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12,
+	0x0a, 0x0a, 0x06, 0x4e, 0x4f, 0x52, 0x4d, 0x41, 0x4c, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x55,
+	0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4f, 0x4e, 0x54,
+	0x52, 0x4f, 0x4c, 0x10, 0x03, 0x12, 0x10, 0x0a, 0x0c, 0x55, 0x53, 0x45, 0x52, 0x5f, 0x44, 0x45,
+	0x46, 0x49, 0x4e, 0x45, 0x44, 0x10, 0x04, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x59, 0x54, 0x45, 0x10,
+	0x06, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x4e, 0x55, 0x53, 0x45, 0x44, 0x10, 0x05, 0x2a, 0x09, 0x08,
+	0xc8, 0x01, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x2a, 0x09, 0x08, 0xc8, 0x01, 0x10, 0x80, 0x80,
+	0x80, 0x80, 0x02, 0x42, 0x13, 0x48, 0x03, 0x5a, 0x0f, 0x2e, 0x2f, 0x73, 0x65, 0x6e, 0x74, 0x65,
+	0x6e, 0x63, 0x65, 0x70, 0x69, 0x65, 0x63, 0x65,
+}
+
+var (
+	file_sentencepiece_model_proto_rawDescOnce sync.Once
+	file_sentencepiece_model_proto_rawDescData = file_sentencepiece_model_proto_rawDesc
+)
+
+func file_sentencepiece_model_proto_rawDescGZIP() []byte {
+	file_sentencepiece_model_proto_rawDescOnce.Do(func() {
+		file_sentencepiece_model_proto_rawDescData = protoimpl.X.CompressGZIP(file_sentencepiece_model_proto_rawDescData)
+	})
+	return file_sentencepiece_model_proto_rawDescData
+}
+
+var file_sentencepiece_model_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
+var file_sentencepiece_model_proto_msgTypes = make([]protoimpl.MessageInfo, 6)
+var file_sentencepiece_model_proto_goTypes = []interface{}{
+	(TrainerSpec_ModelType)(0),         // 0: sentencepiece.TrainerSpec.ModelType
+	(ModelProto_SentencePiece_Type)(0), // 1: sentencepiece.ModelProto.SentencePiece.Type
+	(*TrainerSpec)(nil),                // 2: sentencepiece.TrainerSpec
+	(*NormalizerSpec)(nil),             // 3: sentencepiece.NormalizerSpec
+	(*SelfTestData)(nil),               // 4: sentencepiece.SelfTestData
+	(*ModelProto)(nil),                 // 5: sentencepiece.ModelProto
+	(*SelfTestData_Sample)(nil),        // 6: sentencepiece.SelfTestData.Sample
+	(*ModelProto_SentencePiece)(nil),   // 7: sentencepiece.ModelProto.SentencePiece
+}
+var file_sentencepiece_model_proto_depIdxs = []int32{
+	0, // 0: sentencepiece.TrainerSpec.model_type:type_name -> sentencepiece.TrainerSpec.ModelType
+	6, // 1: sentencepiece.SelfTestData.samples:type_name -> sentencepiece.SelfTestData.Sample
+	7, // 2: sentencepiece.ModelProto.pieces:type_name -> sentencepiece.ModelProto.SentencePiece
+	2, // 3: sentencepiece.ModelProto.trainer_spec:type_name -> sentencepiece.TrainerSpec
+	3, // 4: sentencepiece.ModelProto.normalizer_spec:type_name -> sentencepiece.NormalizerSpec
+	4, // 5: sentencepiece.ModelProto.self_test_data:type_name -> sentencepiece.SelfTestData
+	3, // 6: sentencepiece.ModelProto.denormalizer_spec:type_name -> sentencepiece.NormalizerSpec
+	1, // 7: sentencepiece.ModelProto.SentencePiece.type:type_name -> sentencepiece.ModelProto.SentencePiece.Type
+	8, // [8:8] is the sub-list for method output_type
+	8, // [8:8] is the sub-list for method input_type
+	8, // [8:8] is the sub-list for extension type_name
+	8, // [8:8] is the sub-list for extension extendee
+	0, // [0:8] is the sub-list for field type_name
+}
+
+func init() { file_sentencepiece_model_proto_init() }
+func file_sentencepiece_model_proto_init() {
+	if File_sentencepiece_model_proto != nil {
+		return
+	}
+	if !protoimpl.UnsafeEnabled {
+		file_sentencepiece_model_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*TrainerSpec); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			case 3:
+				return &v.extensionFields
+			default:
+				return nil
+			}
+		}
+		file_sentencepiece_model_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*NormalizerSpec); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			case 3:
+				return &v.extensionFields
+			default:
+				return nil
+			}
+		}
+		file_sentencepiece_model_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SelfTestData); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			case 3:
+				return &v.extensionFields
+			default:
+				return nil
+			}
+		}
+		file_sentencepiece_model_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*ModelProto); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			case 3:
+				return &v.extensionFields
+			default:
+				return nil
+			}
+		}
+		file_sentencepiece_model_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SelfTestData_Sample); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_sentencepiece_model_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*ModelProto_SentencePiece); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			case 3:
+				return &v.extensionFields
+			default:
+				return nil
+			}
+		}
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: file_sentencepiece_model_proto_rawDesc,
+			NumEnums:      2,
+			NumMessages:   6,
+			NumExtensions: 0,
+			NumServices:   0,
+		},
+		GoTypes:           file_sentencepiece_model_proto_goTypes,
+		DependencyIndexes: file_sentencepiece_model_proto_depIdxs,
+		EnumInfos:         file_sentencepiece_model_proto_enumTypes,
+		MessageInfos:      file_sentencepiece_model_proto_msgTypes,
+	}.Build()
+	File_sentencepiece_model_proto = out.File
+	file_sentencepiece_model_proto_rawDesc = nil
+	file_sentencepiece_model_proto_goTypes = nil
+	file_sentencepiece_model_proto_depIdxs = nil
+}

+ 333 - 0
convert/sentencepiece_model.proto

@@ -0,0 +1,333 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.!
+
+syntax = "proto2";
+
+// TODO(taku): Needs to use LITE RUNTIME in OSS release.
+option optimize_for = LITE_RUNTIME;
+option go_package = "./sentencepiece";
+
+package sentencepiece;
+
+// TrainerSpec encodes a various parameters for SentencePiece training.
+// Next id: 55
+message TrainerSpec {
+  ///////////////////////////////////////////////////////////////////
+  // General parameters
+  //
+  // Input corpus files.
+  //  Trainer accepts the following two formats:
+  //  A) Monolingual: plain text, one sentence per line.
+  //  B) Bilingual:   TSV, source sentence <tab> target sentence
+  //  When bilingual data is passed, shared vocabulary model is built.
+  //  Note that the input file must be raw corpus, not a preprocessed corpus.
+  //  Trainer only loads the first `input_sentence_size` sentences specified
+  //  with this parameter.
+  repeated string input = 1;
+
+  // Input corpus format:
+  // "text": one-sentence-per-line text format (default)
+  // "tsv":  sentence <tab> freq
+  optional string input_format = 7;
+
+  // Output model file prefix.
+  // <model_prefix>.model and <model_prefix>.vocab are generated.
+  optional string model_prefix = 2;
+
+  // Model type. only have UNIGRAM now.
+  enum ModelType {
+    UNIGRAM = 1;  // Unigram language model with dynamic algorithm
+    BPE = 2;      // Byte Pair Encoding
+    WORD = 3;     // Delimitered by whitespace.
+    CHAR = 4;     // tokenizes into character sequence
+  }
+  optional ModelType model_type = 3 [default = UNIGRAM];
+
+  // Vocabulary size. 8k is the default size.
+  optional int32 vocab_size = 4 [default = 8000];
+
+  // List of the languages this model can accept.
+  // Since the model is language-agnostic, this field is used as a reference.
+  repeated string accept_language = 5;
+
+  // Size of self-test samples, which are encoded in the model file.
+  optional int32 self_test_sample_size = 6 [default = 0];
+
+  // Whether to use DP version of sentencepiece. Use it with TSV input format
+  // (requires precomputed word tab counts to work).
+  optional bool enable_differential_privacy = 50 [default = false];
+  // Set these parameters if you need DP version of sentencepiece.
+  // std of noise to add.
+  optional float differential_privacy_noise_level = 51 [default = 0.0];
+  // Clipping threshold to apply after adding noise. All the words with
+  // frequency less than this value are dropped.
+  optional uint64 differential_privacy_clipping_threshold = 52 [default = 0];
+
+  ///////////////////////////////////////////////////////////////////
+  // Training parameters.
+  //
+  // Uses characters which cover the corpus with the ratio of `chars_coverage`.
+  // This parameter determines the set of basic Alphabet of sentence piece.
+  // 1.0 - `chars_coverage` characters are treated as UNK.
+  // See also required_chars field.
+  optional float character_coverage = 10 [default = 0.9995];
+
+  // Maximum size of sentences the trainer loads from `input` parameter.
+  // Trainer simply loads the `input` files in sequence.
+  // It is better to shuffle the input corpus randomly.
+  optional uint64 input_sentence_size = 11 [default = 0];
+  optional bool shuffle_input_sentence = 19 [default = true];
+
+  // Maximum size of sentences to make seed sentence pieces.
+  // Extended suffix array is constructed to extract frequent
+  // sub-strings from the corpus. This uses 20N working space,
+  // where N is the size of corpus.
+  optional int32 mining_sentence_size = 12 [deprecated = true];
+
+  // Maximum size of sentences to train sentence pieces.
+  optional int32 training_sentence_size = 13 [deprecated = true];
+
+  // The size of seed sentencepieces.
+  // `seed_sentencepiece_size` must be larger than `vocab_size`.
+  optional int32 seed_sentencepiece_size = 14 [default = 1000000];
+
+  // In every EM sub-iterations, keeps top
+  // `shrinking_factor` * `current sentencepieces size` with respect to
+  // the loss of the sentence piece. This value should be smaller than 1.0.
+  optional float shrinking_factor = 15 [default = 0.75];
+
+  // The maximum sentence length in byte. The sentences with the length
+  // larger than `max_sentence_length` is simply ignored.
+  // Longer input tends to bring the following risks:
+  //  * Overflow during EM training (unigram language model only)
+  //  * Performance drop because of O(n log n) cost in BPE.
+  optional int32 max_sentence_length = 18 [default = 4192];
+
+  // Number of threads in the training.
+  optional int32 num_threads = 16 [default = 16];
+
+  // Number of EM sub iterations.
+  optional int32 num_sub_iterations = 17 [default = 2];
+
+  ///////////////////////////////////////////////////////////////////
+  // SentencePiece parameters which control the shapes of sentence piece.
+  //
+  // Maximum length of sentencepiece.
+  optional int32 max_sentencepiece_length = 20 [default = 16];
+
+  // Uses Unicode script to split sentence pieces.
+  // When `split_by_unicode_script` is true, we do not allow sentence piece to
+  // include multiple Unicode scripts, e.g. "F1" is not a valid piece.
+  // Exception: CJ characters (Hiragana/Katakana/Han) are all handled
+  // as one script type, since Japanese word can consist of multiple scripts.
+  // This exception is always applied regardless of the accept-language
+  // parameter.
+  optional bool split_by_unicode_script = 21 [default = true];
+
+  // When `split_by_number` is true, put a boundary between number and
+  // non-number transition. If we want to treat "F1" is one token, set this flag
+  // to be false.
+  optional bool split_by_number = 23 [default = true];
+
+  // Use a white space to split sentence pieces.
+  // When `split_by_whitespace` is false, we may have the piece containing
+  // a white space in the middle. e.g., "in_the".
+  optional bool split_by_whitespace = 22 [default = true];
+
+  // Adds whitespace symbol (_) as a suffix instead of prefix. e.g., _hello =>
+  // hello_. When `treat_whitespace_as_suffix` is true,
+  // NormalizerSpec::add_dummy_prefix will add the dummy whitespace to the end
+  // of sentence.
+  optional bool treat_whitespace_as_suffix = 24 [default = false];
+
+  // Allows pieces that only contain whitespaces instead of appearing only as
+  // prefix or suffix of other pieces.
+  optional bool allow_whitespace_only_pieces = 26 [default = false];
+
+  // Split all digits (0-9) into separate pieces.
+  optional bool split_digits = 25 [default = false];
+
+  // Defines the pre-tokenization delimiter.
+  // When specified, no pieces crossing this delimiter is not included
+  // in the vocab. Then the delimiter string is virtually ignored
+  // during the training. This field can allows constraints on the vocabulary
+  // selection. Note that this field is available on unigram mode.
+  optional string pretokenization_delimiter = 53 [ default = ""];
+
+  ///////////////////////////////////////////////////////////////////
+  // Vocabulary management
+  //
+  // Defines control symbols used as an indicator to
+  // change the behavior of the decoder. <s> and </s> are pre-defined.
+  // We can use this field to encode various meta information,
+  // including language indicator in multilingual model.
+  // These symbols are not visible to users, but visible to
+  // the decoder. Note that when the input sentence contains control symbols,
+  // they are not treated as one token, but segmented into normal pieces.
+  // Control symbols must be inserted independently from the segmentation.
+  repeated string control_symbols = 30;
+
+  // Defines user defined symbols.
+  // These symbols are added with extremely high score
+  // so they are always treated as one unique symbol in any context.
+  // Typical usage of user_defined_symbols is placeholder for named entities.
+  repeated string user_defined_symbols = 31;
+
+  // Defines required characters. Each UTF8 character in this string is included
+  // in the character set regardless of character_coverage value. Unlike
+  // user_defined_symbols, these characters have scores based on the frequency
+  // on input sentences, and the model can form subwords using characters
+  // in this field.
+  optional string required_chars = 36;
+
+  // Decomposes unknown pieces into UTF-8 bytes.
+  optional bool byte_fallback = 35 [default = false];
+
+  // When creating the vocabulary file, defines whether or not to additionally
+  // output the score for each piece.
+  optional bool vocabulary_output_piece_score = 32 [default = true];
+
+  // `vocab_size` is treated as hard limit. Crash if
+  // the model can not produce the vocab of size `vocab_size`,
+  // When `hard_vocab_limit` is false, vocab_size is treated
+  // as soft limit. Note that when model_type=char,
+  // always assumes hard_vocab_limit = false.
+  optional bool hard_vocab_limit = 33 [default = true];
+
+  // use all symbols for vocab extraction. This flag is valid
+  // if model type is either CHAR or WORD
+  optional bool use_all_vocab = 34 [default = false];
+
+  ///////////////////////////////////////////////////////////////////
+  // Reserved special meta tokens.
+  // * -1 is not used.
+  // * unk_id must not be -1.
+  // Id must starts with 0 and be contigous.
+  optional int32 unk_id = 40 [default = 0];   // <unk>
+  optional int32 bos_id = 41 [default = 1];   // <s>
+  optional int32 eos_id = 42 [default = 2];   // </s>
+  optional int32 pad_id = 43 [default = -1];  // <pad> (padding)
+  optional string unk_piece = 45 [default = "<unk>"];
+  optional string bos_piece = 46 [default = "<s>"];
+  optional string eos_piece = 47 [default = "</s>"];
+  optional string pad_piece = 48 [default = "<pad>"];
+
+  // Encodes <unk> into U+2047 (DOUBLE QUESTION MARK),
+  // since this character can be useful both for user and
+  // developer. We can easily figure out that <unk> is emitted.
+  optional string unk_surface = 44 [default = " \xE2\x81\x87 "];
+
+  // Increase bit depth to allow unigram model training on large
+  // (>10M sentences) corpora. A Side-effect of enabling this flag
+  // is increased memory usage.
+  optional bool train_extremely_large_corpus = 49 [default = false];
+
+ // Path to a seed sentencepieces file, with one tab-separated
+  // seed sentencepiece <tab> frequency per line.
+  optional string seed_sentencepieces_file = 54 [default = ""];
+
+  // Customized extensions: the range of field numbers
+  // are open to third-party extensions.
+  extensions 200 to max;
+}
+
+// NormalizerSpec encodes a various parameters for string normalizaiton
+message NormalizerSpec {
+  // name of normalization rule.
+  optional string name = 1;
+
+  // Pre-compiled normalization rule created by
+  // Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method.
+  // Usually this field is set by Builder::GetNormalizerSpec() method.
+  optional bytes precompiled_charsmap = 2;
+
+  // Adds dummy whitespace at the beginning of text in order to
+  // treat "world" in "world" and "hello world" in the same way.
+  optional bool add_dummy_prefix = 3 [default = true];
+
+  // Removes leading, trailing, and duplicate internal whitespace.
+  optional bool remove_extra_whitespaces = 4 [default = true];
+
+  // Replaces whitespace with meta symbol.
+  // This field must be true to train sentence piece model.
+  optional bool escape_whitespaces = 5 [default = true];
+
+  // Custom normalization rule file in TSV format.
+  // https://github.com/google/sentencepiece/blob/master/doc/normalization.md
+  // This field is only used in SentencePieceTrainer::Train() method, which
+  // compiles the rule into the binary rule stored in `precompiled_charsmap`.
+  optional string normalization_rule_tsv = 6;
+
+  // Customized extensions: the range of field numbers
+  // are open to third-party extensions.
+  extensions 200 to max;
+}
+
+// Proto to store samples for self-testing.
+message SelfTestData {
+  message Sample {
+    optional string input = 1;
+    optional string expected = 2;
+  }
+  repeated Sample samples = 1;
+
+  // Customized extensions: the range of field numbers
+  // are open to third-party extensions.
+  extensions 200 to max;
+}
+
+// ModelProto stores model parameters.
+// SentencePieceProcessor is supposed to be self-contained.
+// All settings/parameters which may change the behavior must be encoded
+// in ModelProto.
+message ModelProto {
+  message SentencePiece {
+    enum Type {
+      NORMAL = 1;        // normal symbol
+      UNKNOWN = 2;       // unknown symbol. only <unk> for now.
+      CONTROL = 3;       // control symbols. </s>, <s>, <2ja> etc.
+      USER_DEFINED = 4;  // user defined symbols.
+                         // Typical usage of USER_DEFINED symbol
+                         // is placeholder.
+      BYTE = 6;          // byte symbols. Used when `byte_fallback` is true.
+      UNUSED = 5;        // this piece is not used.
+    }
+    optional string piece = 1;  // piece must not be empty.
+    optional float score = 2;
+    optional Type type = 3 [default = NORMAL];
+
+    // Customized extensions: the range of field numbers
+    // are open to third-party extensions.
+    extensions 200 to max;
+  }
+
+  // Sentence pieces with scores.
+  repeated SentencePiece pieces = 1;
+
+  // Spec used to generate this model file.
+  optional TrainerSpec trainer_spec = 2;
+
+  // Spec for text normalization.
+  optional NormalizerSpec normalizer_spec = 3;
+
+  // Stores sample input and its expected segmentation to verify the model.
+  optional SelfTestData self_test_data = 4;
+
+  // Spec for text de-normalization.
+  optional NormalizerSpec denormalizer_spec = 5;
+
+  // Customized extensions: the range of field numbers
+  // are open to third-party extensions.
+  extensions 200 to max;
+}

+ 22 - 3
go.mod

@@ -1,23 +1,43 @@
 module github.com/jmorganca/ollama
 
-go 1.21
+go 1.22
+
+toolchain go1.22.0
 
 require (
 	github.com/containerd/console v1.0.3
+	github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
 	github.com/emirpasic/gods v1.18.1
 	github.com/gin-gonic/gin v1.9.1
+	github.com/golang/protobuf v1.5.0
 	github.com/google/uuid v1.0.0
+	github.com/mitchellh/mapstructure v1.5.0
 	github.com/olekukonko/tablewriter v0.0.5
 	github.com/spf13/cobra v1.7.0
 	github.com/stretchr/testify v1.8.4
+	github.com/x448/float16 v0.8.4
 	golang.org/x/sync v0.3.0
 )
 
+require github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9
+
 require (
+	github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc // indirect
+	github.com/chewxy/hm v1.0.0 // indirect
+	github.com/chewxy/math32 v1.0.8 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/google/flatbuffers v1.12.0 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
+	github.com/xtgo/set v1.0.0 // indirect
+	go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
+	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
+	gonum.org/v1/gonum v0.8.2 // indirect
+	gorgonia.org/vecf32 v0.9.0 // indirect
+	gorgonia.org/vecf64 v0.9.0 // indirect
 )
 
 require (
@@ -38,7 +58,6 @@ require (
 	github.com/mattn/go-isatty v0.0.19 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
-	github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
 	github.com/pelletier/go-toml/v2 v2.0.8 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
@@ -50,6 +69,6 @@ require (
 	golang.org/x/sys v0.13.0
 	golang.org/x/term v0.13.0
 	golang.org/x/text v0.13.0 // indirect
-	google.golang.org/protobuf v1.30.0 // indirect
+	google.golang.org/protobuf v1.30.0
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )

+ 148 - 2
go.sum

@@ -1,18 +1,38 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
+github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc h1:zvQ6w7KwtQWgMQiewOF9tFtundRMVZFSAksNV6ogzuY=
+github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ=
 github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
 github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
 github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
 github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
 github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
+github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
+github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
+github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
+github.com/chewxy/math32 v1.0.8 h1:fU5E4Ec4Z+5RtRAi3TovSxUjQPkgRh+HbP7tKB2OFbM=
+github.com/chewxy/math32 v1.0.8/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
 github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
 github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 h1:cBzrdJPAFBsgCrDPnZxlp1dF2+k4r1kVpD7+1S1PVjY=
+github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLcxEuYUlAd/EXyjc/v55nd3+47YAgWbSXVxPrNI=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
 github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
 github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
 github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
 github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g=
@@ -37,7 +57,31 @@ github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QX
 github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
 github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
 github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/flatbuffers v1.12.0 h1:/PtAHvnBY4Kqnx/xCQ3OIV9uYcSFGScBsWI3Oogeh6w=
+github.com/google/flatbuffers v1.12.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -48,6 +92,9 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
 github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
 github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
@@ -68,6 +115,8 @@ github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -75,14 +124,17 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
-github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
-github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
+github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9 h1:DV4iXjNn6fGeDl1AkZ1I0QB/0DBjrc7kPpxHrmuDzW4=
+github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9/go.mod h1:nR7l3gM6ubiOm+mCkmmUyIBUcBAyiUmW6dQrDZhugFE=
 github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
 github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
 github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
 github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
@@ -96,6 +148,8 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.1.4/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -112,19 +166,61 @@ github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6
 github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
 github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/xtgo/set v1.0.0 h1:6BCNBRv3ORNDQ7fyoJXRv+tstJz3m1JVFQErfeZz2pY=
+github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4=
+go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E=
 golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
 golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
 golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
 golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -137,12 +233,56 @@ golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
+gonum.org/v1/gonum v0.8.2 h1:CCXrcPKiGGotvnN6jfUsKk4rRqm7q09/YbKb5xCEvtM=
+gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
+gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f h1:Yv4xsIx7HZOoyUGSJ2ksDyWE2qIBXROsZKt2ny3hCGM=
+google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.32.0 h1:zWTV+LMdc3kaiJMSTOFz2UgSBgx8RNQoTGiZu3fR9S0=
+google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc/cmd/protoc-gen-go-grpc v0.0.0-20200910201057-6591123024b3/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
@@ -157,4 +297,10 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
 gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorgonia.org/vecf32 v0.9.0 h1:PClazic1r+JVJ1dEzRXgeiVl4g1/Hf/w+wUSqnco1Xg=
+gorgonia.org/vecf32 v0.9.0/go.mod h1:NCc+5D2oxddRL11hd+pCB1PEyXWOyiQxfZ/1wwhOXCA=
+gorgonia.org/vecf64 v0.9.0 h1:bgZDP5x0OzBF64PjMGC3EvTdOoMEcmfAh1VCUnZFm1A=
+gorgonia.org/vecf64 v0.9.0/go.mod h1:hp7IOWCnRiVQKON73kkC/AUMtEXyf9kGlVrtPQ9ccVA=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

+ 2 - 2
llm/ggml.go

@@ -163,9 +163,9 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
 	case FILE_MAGIC_GGLA:
 		c = &containerLORA{}
 	case FILE_MAGIC_GGUF_LE:
-		c = &containerGGUF{bo: binary.LittleEndian}
+		c = &ContainerGGUF{ByteOrder: binary.LittleEndian}
 	case FILE_MAGIC_GGUF_BE:
-		c = &containerGGUF{bo: binary.BigEndian}
+		c = &ContainerGGUF{ByteOrder: binary.BigEndian}
 	default:
 		return nil, errors.New("invalid file magic")
 	}

+ 574 - 137
llm/gguf.go

@@ -5,12 +5,20 @@ import (
 	"encoding/binary"
 	"fmt"
 	"io"
+	"log/slog"
+	"os"
+	"regexp"
+
+	"github.com/d4l3k/go-bfloat16"
+	"github.com/pdevine/tensor"
+	"github.com/pdevine/tensor/native"
+	"github.com/x448/float16"
 
 	"github.com/jmorganca/ollama/format"
 )
 
-type containerGGUF struct {
-	bo binary.ByteOrder
+type ContainerGGUF struct {
+	ByteOrder binary.ByteOrder
 
 	Version uint32
 
@@ -23,23 +31,28 @@ type containerGGUF struct {
 		NumTensor uint64
 		NumKV     uint64
 	}
+
+	V3 struct {
+		NumTensor uint64
+		NumKV     uint64
+	}
 }
 
-func (c *containerGGUF) Name() string {
+func (c *ContainerGGUF) Name() string {
 	return "gguf"
 }
 
-func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
-	binary.Read(rso, c.bo, &c.Version)
+func (c *ContainerGGUF) Decode(rso *readSeekOffset) (model, error) {
+	binary.Read(rso, c.ByteOrder, &c.Version)
 
 	switch c.Version {
 	case 1:
-		binary.Read(rso, c.bo, &c.V1)
+		binary.Read(rso, c.ByteOrder, &c.V1)
 	default:
-		binary.Read(rso, c.bo, &c.V2)
+		binary.Read(rso, c.ByteOrder, &c.V2)
 	}
 
-	model := newGGUFModel(c)
+	model := NewGGUFModel(c)
 	if err := model.Decode(rso); err != nil {
 		return nil, err
 	}
@@ -48,47 +61,61 @@ func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
 }
 
 const (
-	ggufTypeUint8 uint32 = iota
-	ggufTypeInt8
-	ggufTypeUint16
-	ggufTypeInt16
-	ggufTypeUint32
-	ggufTypeInt32
-	ggufTypeFloat32
-	ggufTypeBool
-	ggufTypeString
-	ggufTypeArray
-	ggufTypeUint64
-	ggufTypeInt64
-	ggufTypeFloat64
+	_ uint32 = iota
+	GGUFTokenNormal
+	GGUFTokenUnknown
+	GGUFTokenControl
+	GGUFTokenUserDefined
+	GGUFTokenUnused
+	GGUFTokenByte
 )
 
-type kv map[string]any
+const (
+	GGUFTypeUint8 uint32 = iota
+	GGUFTypeInt8
+	GGUFTypeUint16
+	GGUFTypeInt16
+	GGUFTypeUint32
+	GGUFTypeInt32
+	GGUFTypeFloat32
+	GGUFTypeBool
+	GGUFTypeString
+	GGUFTypeArray
+	GGUFTypeUint64
+	GGUFTypeInt64
+	GGUFTypeFloat64
+)
 
-type tensor struct {
-	name   string
-	kind   uint32
-	offset uint64
+type KV map[string]any
+
+type Tensor struct {
+	Name   string
+	Kind   uint32
+	Offset uint64
 
 	// shape is the number of elements in each dimension
-	shape [4]uint64
+	Shape [4]uint64
+
+	FileName      string
+	OffsetPadding uint64
+	FileOffsets   []uint64
 }
 
-func (t tensor) blockSize() uint64 {
+func (t Tensor) BlockSize() uint64 {
 	switch {
-	case t.kind < 2:
+	case t.Kind < 2:
 		return 1
-	case t.kind < 10:
+	case t.Kind < 10:
 		return 32
 	default:
 		return 256
 	}
 }
 
-func (t tensor) typeSize() uint64 {
-	blockSize := t.blockSize()
+func (t Tensor) TypeSize() uint64 {
+	blockSize := t.BlockSize()
 
-	switch t.kind {
+	switch t.Kind {
 	case 0: // FP32
 		return 4
 	case 1: // FP16
@@ -128,31 +155,63 @@ func (t tensor) typeSize() uint64 {
 	}
 }
 
-func (t tensor) parameters() uint64 {
-	return t.shape[0] * t.shape[1] * t.shape[2] * t.shape[3]
+func (t Tensor) Parameters() uint64 {
+	return t.Shape[0] * t.Shape[1] * t.Shape[2] * t.Shape[3]
 }
 
-func (t tensor) size() uint64 {
-	return t.parameters() * t.typeSize() / t.blockSize()
+func (t Tensor) Size() uint64 {
+	return t.Parameters() * t.TypeSize() / t.BlockSize()
 }
 
-type ggufModel struct {
-	*containerGGUF
+func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
+	n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
+	origShape := n.Shape().Clone()
+
+	// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
+	if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
+		return []uint16{}, err
+	}
 
-	kv
-	tensors []tensor
+	if err := n.T(0, 2, 1, 3); err != nil {
+		return []uint16{}, err
+	}
+
+	if err := n.Reshape(origShape...); err != nil {
+		return []uint16{}, err
+	}
+
+	if err := n.Transpose(); err != nil {
+		return []uint16{}, err
+	}
+	newN, err := native.SelectU16(n, 1)
+	if err != nil {
+		return []uint16{}, err
+	}
+
+	var fullTensor []uint16
+	for _, v := range newN {
+		fullTensor = append(fullTensor, v...)
+	}
+	return fullTensor, nil
+}
+
+type GGUFModel struct {
+	*ContainerGGUF
+
+	KV
+	Tensors []Tensor
 
 	parameters uint64
 }
 
-func newGGUFModel(container *containerGGUF) *ggufModel {
-	return &ggufModel{
-		containerGGUF: container,
-		kv:            make(kv),
+func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
+	return &GGUFModel{
+		ContainerGGUF: container,
+		KV:            make(KV),
 	}
 }
 
-func (llm *ggufModel) NumTensor() uint64 {
+func (llm *GGUFModel) NumTensor() uint64 {
 	if llm.Version == 1 {
 		return uint64(llm.V1.NumTensor)
 	}
@@ -160,7 +219,7 @@ func (llm *ggufModel) NumTensor() uint64 {
 	return llm.V2.NumTensor
 }
 
-func (llm *ggufModel) NumKV() uint64 {
+func (llm *GGUFModel) NumKV() uint64 {
 	if llm.Version == 1 {
 		return uint64(llm.V1.NumKV)
 	}
@@ -168,15 +227,15 @@ func (llm *ggufModel) NumKV() uint64 {
 	return llm.V2.NumKV
 }
 
-func (llm *ggufModel) ModelFamily() string {
-	if t, ok := llm.kv["general.architecture"].(string); ok {
+func (llm *GGUFModel) ModelFamily() string {
+	if t, ok := llm.KV["general.architecture"].(string); ok {
 		return t
 	}
 
 	return "unknown"
 }
 
-func (llm *ggufModel) ModelType() string {
+func (llm *GGUFModel) ModelType() string {
 	if llm.parameters > 0 {
 		return format.HumanNumber(llm.parameters)
 	}
@@ -184,15 +243,393 @@ func (llm *ggufModel) ModelType() string {
 	return "unknown"
 }
 
-func (llm *ggufModel) FileType() string {
-	if t, ok := llm.kv["general.file_type"].(uint32); ok {
+func (llm *GGUFModel) FileType() string {
+	if t, ok := llm.KV["general.file_type"].(uint32); ok {
 		return fileType(t)
 	}
 
 	return "unknown"
 }
 
-func (llm *ggufModel) Decode(rso *readSeekOffset) error {
+func (llm *GGUFModel) Encode(f *os.File) error {
+	// this mimics the order of the llama.cpp convert script
+	kOrder := []string{
+		"general.architecture",
+		"general.name",
+		"llama.context_length",
+		"llama.embedding_length",
+		"llama.block_count",
+		"llama.feed_forward_length",
+		"llama.rope.dimension_count",
+		"llama.attention.head_count",
+		"llama.attention.head_count_kv",
+		"llama.attention.layer_norm_rms_epsilon",
+		"llama.rope.freq_base",
+		"general.file_type",
+		"tokenizer.ggml.model",
+		"tokenizer.ggml.tokens",
+		"tokenizer.ggml.scores",
+		"tokenizer.ggml.token_type",
+		"tokenizer.ggml.bos_token_id",
+		"tokenizer.ggml.eos_token_id",
+		"tokenizer.ggml.unknown_token_id",
+		"tokenizer.ggml.add_bos_token",
+		"tokenizer.ggml.add_eos_token",
+		"tokenizer.chat_template",
+	}
+
+	if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
+		return err
+	}
+
+	if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
+		return err
+	}
+
+	if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
+		return err
+	}
+
+	if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
+		return err
+	}
+
+	for _, k := range kOrder {
+		val, ok := llm.KV[k]
+		if !ok {
+			continue
+		}
+
+		if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
+			return err
+		}
+		if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
+			return err
+		}
+
+		switch v := val.(type) {
+		case uint32:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
+				return err
+			}
+
+			if err := llm.writeUint32(f, v); err != nil {
+				return err
+			}
+		case float32:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
+				return err
+			}
+
+			if err := llm.writeF32(f, v); err != nil {
+				return err
+			}
+		case bool:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
+				return err
+			}
+
+			if err := llm.writeBool(f, v); err != nil {
+				return err
+			}
+		case string:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
+				return err
+			}
+
+			if err := llm.writeString(f, v); err != nil {
+				return err
+			}
+		case []int32:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
+				return err
+			}
+			for _, i := range v {
+				if err := llm.writeInt32(f, i); err != nil {
+					return err
+				}
+			}
+		case []uint32:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
+				return err
+			}
+			for _, i := range v {
+				if err := llm.writeUint32(f, i); err != nil {
+					return err
+				}
+			}
+		case []float32:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
+				return err
+			}
+			for _, fl := range v {
+				if err := llm.writeF32(f, fl); err != nil {
+					return err
+				}
+			}
+		case []string:
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
+				return err
+			}
+
+			if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
+				return err
+			}
+
+			for _, s := range v {
+				if err := llm.writeString(f, s); err != nil {
+					return err
+				}
+			}
+		}
+	}
+
+	// write layer metadata
+	for _, t := range llm.Tensors {
+		if err := llm.writeString(f, t.Name); err != nil {
+			return err
+		}
+
+		// the dimensions of the tensor
+		dims := 1
+		if t.Shape[1] > 0 {
+			dims = 2
+		}
+
+		if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
+			return err
+		}
+
+		for i := 0; i < dims; i++ {
+			if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
+				return err
+			}
+		}
+
+		if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
+			return err
+		}
+
+		if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
+			return err
+		}
+	}
+
+	offset, terr := f.Seek(0, io.SeekCurrent)
+	if terr != nil {
+		return terr
+	}
+	slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
+
+	if err := llm.writePadding(f, 32); err != nil {
+		return err
+	}
+
+	var dataFile *os.File
+	var currentFile string
+	var err error
+	for _, t := range llm.Tensors {
+		if currentFile != t.FileName {
+			if f != nil {
+				dataFile.Close()
+			}
+			currentFile = t.FileName
+			dataFile, err = os.Open(t.FileName)
+			if err != nil {
+				fmt.Println(err)
+				return err
+			}
+		}
+
+		dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
+
+		pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
+		re, err := regexp.Compile(pattern)
+		if err != nil {
+			return err
+		}
+
+		matches := re.FindAllStringSubmatch(t.Name, -1)
+		if len(matches) > 0 {
+			layerSize := t.FileOffsets[1] - t.FileOffsets[0]
+
+			var err error
+			tData := make([]uint16, layerSize/2)
+			if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
+				return err
+			}
+
+			layerType := matches[0][re.SubexpIndex("layer")]
+			var heads uint32
+			switch layerType {
+			case "q":
+				heads = llm.KV["llama.attention.head_count"].(uint32)
+			case "k":
+				heads = llm.KV["llama.attention.head_count_kv"].(uint32)
+				if heads == 0 {
+					heads = llm.KV["llama.attention.head_count"].(uint32)
+				}
+			}
+
+			tData, err = t.Repack(tData, int(heads))
+			if err != nil {
+				return err
+			}
+
+			var buf []byte
+			for _, n := range tData {
+				buf = binary.LittleEndian.AppendUint16(buf, n)
+			}
+
+			tempBuf := make([]uint16, len(tData))
+			tDataF32 := bfloat16.DecodeFloat32(buf)
+			for cnt, v := range tDataF32 {
+				tDataF16 := float16.Fromfloat32(v)
+				tempBuf[cnt] = uint16(tDataF16)
+			}
+
+			if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
+				return err
+			}
+
+			if err := llm.writePadding(f, 32); err != nil {
+				return err
+			}
+			continue
+		}
+
+		remaining := t.FileOffsets[1] - t.FileOffsets[0]
+
+		bufSize := uint64(10240)
+		var finished bool
+		for {
+			data := make([]byte, min(bufSize, remaining))
+
+			b, err := io.ReadFull(dataFile, data)
+			remaining -= uint64(b)
+
+			if err == io.EOF || remaining <= 0 {
+				finished = true
+			} else if err != nil {
+				return err
+			}
+
+			// convert bfloat16 -> ieee float32
+			tDataF32 := bfloat16.DecodeFloat32(data)
+
+			switch t.Kind {
+			case 0:
+				if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
+					return err
+				}
+			case 1:
+				// convert float32 -> float16
+				tempBuf := make([]uint16, len(data)/2)
+				for cnt, v := range tDataF32 {
+					tDataF16 := float16.Fromfloat32(v)
+					tempBuf[cnt] = uint16(tDataF16)
+				}
+				if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
+					return err
+				}
+			}
+			if finished {
+				break
+			}
+		}
+
+		if err := llm.writePadding(f, 32); err != nil {
+			return err
+		}
+	}
+	f.Close()
+
+	return nil
+}
+
+func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
+	// gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
+	offset, err := f.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return err
+	}
+	padding := ((offset + align - 1) / align) * align
+	buf := make([]byte, padding-offset)
+	if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
+	if err := binary.Write(f, llm.ByteOrder, v); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
+	if err := binary.Write(f, llm.ByteOrder, v); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
+	if err := binary.Write(f, llm.ByteOrder, v); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
+	if err := binary.Write(f, llm.ByteOrder, b); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (llm *GGUFModel) writeString(f *os.File, s string) error {
+	if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
+		return err
+	}
+
+	if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (llm *GGUFModel) Decode(rso *readSeekOffset) error {
 	// decode key-values
 	for i := 0; uint64(i) < llm.NumKV(); i++ {
 		k, err := llm.readString(rso)
@@ -204,36 +641,36 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
 
 		var v any
 		switch vtype {
-		case ggufTypeUint8:
+		case GGUFTypeUint8:
 			v = llm.readU8(rso)
-		case ggufTypeInt8:
+		case GGUFTypeInt8:
 			v = llm.readI8(rso)
-		case ggufTypeUint16:
+		case GGUFTypeUint16:
 			v = llm.readU16(rso)
-		case ggufTypeInt16:
+		case GGUFTypeInt16:
 			v = llm.readI16(rso)
-		case ggufTypeUint32:
+		case GGUFTypeUint32:
 			v = llm.readU32(rso)
-		case ggufTypeInt32:
+		case GGUFTypeInt32:
 			v = llm.readI32(rso)
-		case ggufTypeUint64:
+		case GGUFTypeUint64:
 			v = llm.readU64(rso)
-		case ggufTypeInt64:
+		case GGUFTypeInt64:
 			v = llm.readI64(rso)
-		case ggufTypeFloat32:
+		case GGUFTypeFloat32:
 			v = llm.readF32(rso)
-		case ggufTypeFloat64:
+		case GGUFTypeFloat64:
 			v = llm.readF64(rso)
-		case ggufTypeBool:
+		case GGUFTypeBool:
 			v = llm.readBool(rso)
-		case ggufTypeString:
+		case GGUFTypeString:
 			s, err := llm.readString(rso)
 			if err != nil {
 				return err
 			}
 
 			v = s
-		case ggufTypeArray:
+		case GGUFTypeArray:
 			a, err := llm.readArray(rso)
 			if err != nil {
 				return err
@@ -244,7 +681,7 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
 			return fmt.Errorf("invalid type: %d", vtype)
 		}
 
-		llm.kv[k] = v
+		llm.KV[k] = v
 	}
 
 	// decode tensors
@@ -262,33 +699,33 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
 			shape[i] = llm.readU64(rso)
 		}
 
-		tensor := tensor{
-			name:   name,
-			kind:   llm.readU32(rso),
-			offset: llm.readU64(rso),
-			shape:  shape,
+		tensor := Tensor{
+			Name:   name,
+			Kind:   llm.readU32(rso),
+			Offset: llm.readU64(rso),
+			Shape:  shape,
 		}
 
-		llm.tensors = append(llm.tensors, tensor)
-		llm.parameters += tensor.parameters()
+		llm.Tensors = append(llm.Tensors, tensor)
+		llm.parameters += tensor.Parameters()
 	}
 
-	alignment, ok := llm.kv["general.alignment"].(uint32)
+	alignment, ok := llm.KV["general.alignment"].(uint32)
 	if !ok {
 		alignment = 32
 	}
 
 	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
-	for _, tensor := range llm.tensors {
-		padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
+	for _, tensor := range llm.Tensors {
+		padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
 		rso.Seek(padded, io.SeekCurrent)
 	}
 
 	return nil
 }
 
-func (llm *ggufModel) NumLayers() uint32 {
-	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
+func (llm *GGUFModel) NumLayers() uint32 {
+	value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
 	if !exists {
 		return 0
 	}
@@ -296,8 +733,8 @@ func (llm *ggufModel) NumLayers() uint32 {
 	return value.(uint32)
 }
 
-func (llm *ggufModel) NumHead() uint32 {
-	value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
+func (llm *GGUFModel) NumHead() uint32 {
+	value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
 	if !exists {
 		return 0
 	}
@@ -305,8 +742,8 @@ func (llm *ggufModel) NumHead() uint32 {
 	return value.(uint32)
 }
 
-func (llm *ggufModel) NumEmbed() uint32 {
-	value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
+func (llm *GGUFModel) NumEmbed() uint32 {
+	value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
 	if !exists {
 		return 0
 	}
@@ -314,8 +751,8 @@ func (llm *ggufModel) NumEmbed() uint32 {
 	return value.(uint32)
 }
 
-func (llm *ggufModel) NumHeadKv() uint32 {
-	value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
+func (llm *GGUFModel) NumHeadKv() uint32 {
+	value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
 	if !exists {
 		return 0
 	}
@@ -323,8 +760,8 @@ func (llm *ggufModel) NumHeadKv() uint32 {
 	return value.(uint32)
 }
 
-func (llm *ggufModel) NumCtx() uint32 {
-	value, exists := llm.kv[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
+func (llm *GGUFModel) NumCtx() uint32 {
+	value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
 	if !exists {
 		return 0
 	}
@@ -332,7 +769,7 @@ func (llm *ggufModel) NumCtx() uint32 {
 	return value.(uint32)
 }
 
-func (llm *ggufModel) NumGQA() uint32 {
+func (llm *GGUFModel) NumGQA() uint32 {
 	numHeadKv := llm.NumHeadKv()
 	if numHeadKv == 0 {
 		return 0
@@ -341,75 +778,75 @@ func (llm *ggufModel) NumGQA() uint32 {
 	return llm.NumHead() / numHeadKv
 }
 
-func (llm ggufModel) readU8(r io.Reader) uint8 {
+func (llm GGUFModel) readU8(r io.Reader) uint8 {
 	var u8 uint8
-	binary.Read(r, llm.bo, &u8)
+	binary.Read(r, llm.ByteOrder, &u8)
 	return u8
 }
 
-func (llm ggufModel) readI8(r io.Reader) int8 {
+func (llm GGUFModel) readI8(r io.Reader) int8 {
 	var i8 int8
-	binary.Read(r, llm.bo, &i8)
+	binary.Read(r, llm.ByteOrder, &i8)
 	return i8
 }
 
-func (llm ggufModel) readU16(r io.Reader) uint16 {
+func (llm GGUFModel) readU16(r io.Reader) uint16 {
 	var u16 uint16
-	binary.Read(r, llm.bo, &u16)
+	binary.Read(r, llm.ByteOrder, &u16)
 	return u16
 }
 
-func (llm ggufModel) readI16(r io.Reader) int16 {
+func (llm GGUFModel) readI16(r io.Reader) int16 {
 	var i16 int16
-	binary.Read(r, llm.bo, &i16)
+	binary.Read(r, llm.ByteOrder, &i16)
 	return i16
 }
 
-func (llm ggufModel) readU32(r io.Reader) uint32 {
+func (llm GGUFModel) readU32(r io.Reader) uint32 {
 	var u32 uint32
-	binary.Read(r, llm.bo, &u32)
+	binary.Read(r, llm.ByteOrder, &u32)
 	return u32
 }
 
-func (llm ggufModel) readI32(r io.Reader) int32 {
+func (llm GGUFModel) readI32(r io.Reader) int32 {
 	var i32 int32
-	binary.Read(r, llm.bo, &i32)
+	binary.Read(r, llm.ByteOrder, &i32)
 	return i32
 }
 
-func (llm ggufModel) readU64(r io.Reader) uint64 {
+func (llm GGUFModel) readU64(r io.Reader) uint64 {
 	var u64 uint64
-	binary.Read(r, llm.bo, &u64)
+	binary.Read(r, llm.ByteOrder, &u64)
 	return u64
 }
 
-func (llm ggufModel) readI64(r io.Reader) int64 {
+func (llm GGUFModel) readI64(r io.Reader) int64 {
 	var i64 int64
-	binary.Read(r, llm.bo, &i64)
+	binary.Read(r, llm.ByteOrder, &i64)
 	return i64
 }
 
-func (llm ggufModel) readF32(r io.Reader) float32 {
+func (llm GGUFModel) readF32(r io.Reader) float32 {
 	var f32 float32
-	binary.Read(r, llm.bo, &f32)
+	binary.Read(r, llm.ByteOrder, &f32)
 	return f32
 }
 
-func (llm ggufModel) readF64(r io.Reader) float64 {
+func (llm GGUFModel) readF64(r io.Reader) float64 {
 	var f64 float64
-	binary.Read(r, llm.bo, &f64)
+	binary.Read(r, llm.ByteOrder, &f64)
 	return f64
 }
 
-func (llm ggufModel) readBool(r io.Reader) bool {
+func (llm GGUFModel) readBool(r io.Reader) bool {
 	var b bool
-	binary.Read(r, llm.bo, &b)
+	binary.Read(r, llm.ByteOrder, &b)
 	return b
 }
 
-func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
+func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
 	var nameLength uint32
-	binary.Read(r, llm.bo, &nameLength)
+	binary.Read(r, llm.ByteOrder, &nameLength)
 
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -422,13 +859,13 @@ func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
 	return b.String(), nil
 }
 
-func (llm ggufModel) readString(r io.Reader) (string, error) {
+func (llm GGUFModel) readString(r io.Reader) (string, error) {
 	if llm.Version == 1 {
 		return llm.readStringV1(r)
 	}
 
 	var nameLength uint64
-	binary.Read(r, llm.bo, &nameLength)
+	binary.Read(r, llm.ByteOrder, &nameLength)
 
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -438,29 +875,29 @@ func (llm ggufModel) readString(r io.Reader) (string, error) {
 	return b.String(), nil
 }
 
-func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
+func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
 	atype := llm.readU32(r)
 	n := llm.readU32(r)
 
 	for i := 0; uint32(i) < n; i++ {
 		switch atype {
-		case ggufTypeUint8:
+		case GGUFTypeUint8:
 			arr = append(arr, llm.readU8(r))
-		case ggufTypeInt8:
+		case GGUFTypeInt8:
 			arr = append(arr, llm.readI8(r))
-		case ggufTypeUint16:
+		case GGUFTypeUint16:
 			arr = append(arr, llm.readU16(r))
-		case ggufTypeInt16:
+		case GGUFTypeInt16:
 			arr = append(arr, llm.readI16(r))
-		case ggufTypeUint32:
+		case GGUFTypeUint32:
 			arr = append(arr, llm.readU32(r))
-		case ggufTypeInt32:
+		case GGUFTypeInt32:
 			arr = append(arr, llm.readI32(r))
-		case ggufTypeFloat32:
+		case GGUFTypeFloat32:
 			arr = append(arr, llm.readF32(r))
-		case ggufTypeBool:
+		case GGUFTypeBool:
 			arr = append(arr, llm.readBool(r))
-		case ggufTypeString:
+		case GGUFTypeString:
 			s, err := llm.readStringV1(r)
 			if err != nil {
 				return nil, err
@@ -475,7 +912,7 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
 	return
 }
 
-func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
+func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
 	if llm.Version == 1 {
 		return llm.readArrayV1(r)
 	}
@@ -485,29 +922,29 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
 
 	for i := 0; uint64(i) < n; i++ {
 		switch atype {
-		case ggufTypeUint8:
+		case GGUFTypeUint8:
 			arr = append(arr, llm.readU8(r))
-		case ggufTypeInt8:
+		case GGUFTypeInt8:
 			arr = append(arr, llm.readI8(r))
-		case ggufTypeUint16:
+		case GGUFTypeUint16:
 			arr = append(arr, llm.readU16(r))
-		case ggufTypeInt16:
+		case GGUFTypeInt16:
 			arr = append(arr, llm.readI16(r))
-		case ggufTypeUint32:
+		case GGUFTypeUint32:
 			arr = append(arr, llm.readU32(r))
-		case ggufTypeInt32:
+		case GGUFTypeInt32:
 			arr = append(arr, llm.readI32(r))
-		case ggufTypeUint64:
+		case GGUFTypeUint64:
 			arr = append(arr, llm.readU64(r))
-		case ggufTypeInt64:
+		case GGUFTypeInt64:
 			arr = append(arr, llm.readI64(r))
-		case ggufTypeFloat32:
+		case GGUFTypeFloat32:
 			arr = append(arr, llm.readF32(r))
-		case ggufTypeFloat64:
+		case GGUFTypeFloat64:
 			arr = append(arr, llm.readF64(r))
-		case ggufTypeBool:
+		case GGUFTypeBool:
 			arr = append(arr, llm.readBool(r))
-		case ggufTypeString:
+		case GGUFTypeString:
 			s, err := llm.readString(r)
 			if err != nil {
 				return nil, err

+ 87 - 1
server/images.go

@@ -1,6 +1,7 @@
 package server
 
 import (
+	"archive/zip"
 	"bytes"
 	"context"
 	"crypto/sha256"
@@ -23,6 +24,7 @@ import (
 	"golang.org/x/exp/slices"
 
 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/convert"
 	"github.com/jmorganca/ollama/llm"
 	"github.com/jmorganca/ollama/parser"
 	"github.com/jmorganca/ollama/version"
@@ -316,7 +318,24 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
 				c.Args = blobPath
 			}
 
-			bin, err := os.Open(realpath(modelFileDir, c.Args))
+			pathName := realpath(modelFileDir, c.Args)
+
+			ggufName, err := convertSafetensors(name, pathName)
+			if err != nil {
+				switch {
+				case errors.Is(err, zip.ErrFormat):
+					// it's not a safetensor archive
+				default:
+					return err
+				}
+			}
+
+			if ggufName != "" {
+				pathName = ggufName
+				defer os.RemoveAll(ggufName)
+			}
+
+			bin, err := os.Open(pathName)
 			if err != nil {
 				// not a file on disk so must be a model reference
 				modelpath := ParseModelPath(c.Args)
@@ -592,6 +611,73 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
 	return nil
 }
 
+func convertSafetensors(name, fn string) (string, error) {
+	r, err := zip.OpenReader(fn)
+	if err != nil {
+		return "", err
+	}
+	defer r.Close()
+
+	tempDir, err := os.MkdirTemp("", "ollama-convert")
+	if err != nil {
+		return "", err
+	}
+	defer os.RemoveAll(tempDir)
+
+	for _, f := range r.File {
+		fpath := filepath.Join(tempDir, f.Name)
+		outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
+		if err != nil {
+			return "", err
+		}
+
+		rc, err := f.Open()
+		if err != nil {
+			return "", err
+		}
+
+		_, err = io.Copy(outFile, rc)
+		if err != nil {
+			return "", err
+		}
+
+		outFile.Close()
+		rc.Close()
+	}
+
+	params, err := convert.GetParams(tempDir)
+	if err != nil {
+		return "", err
+	}
+
+	SupportedArchs := []string{
+		"MistralForCausalLM",
+	}
+
+	for _, arch := range params.Architectures {
+		if !slices.Contains(SupportedArchs, arch) {
+			return "", fmt.Errorf("this safetensors model is not yet supported")
+		}
+	}
+
+	t, err := convert.GetSafeTensors(tempDir)
+	if err != nil {
+		return "", err
+	}
+
+	vocab, err := convert.LoadTokens(tempDir)
+	if err != nil {
+		return "", err
+	}
+
+	fn, err = convert.WriteGGUF(name, t, params, vocab)
+	if err != nil {
+		return "", err
+	}
+
+	return fn, nil
+}
+
 func CopyModel(src, dest string) error {
 	srcModelPath := ParseModelPath(src)
 	srcPath, err := srcModelPath.GetManifestPath()