123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- package convert
- import (
- "encoding/json"
- "errors"
- "fmt"
- "io"
- "io/fs"
- "log/slog"
- "strings"
- "github.com/ollama/ollama/fs/ggml"
- )
- type ModelParameters struct {
- Architectures []string `json:"architectures"`
- VocabSize uint32 `json:"vocab_size"`
- TextModel TextParameters `json:"text_config"`
- }
- type TextParameters struct {
- VocabSize uint32 `json:"vocab_size"`
- }
- type AdapterParameters struct {
- Alpha uint32 `json:"lora_alpha"`
- LoraLayers uint32 `json:"lora_layers"`
- LoraParameters struct {
- Rank uint32 `json:"rank"`
- Alpha float32 `json:"alpha"`
- Scale float32 `json:"scale"`
- } `json:"lora_parameters"`
- }
- func (ModelParameters) KV(t *Tokenizer) ggml.KV {
- kv := ggml.KV{
- "general.file_type": uint32(1),
- "general.quantization_version": uint32(2),
- "tokenizer.ggml.pre": t.Pre,
- "tokenizer.ggml.model": t.Vocabulary.Model,
- "tokenizer.ggml.tokens": t.Vocabulary.Tokens,
- "tokenizer.ggml.scores": t.Vocabulary.Scores,
- "tokenizer.ggml.token_type": t.Vocabulary.Types,
- }
- if len(t.Merges) > 0 {
- kv["tokenizer.ggml.merges"] = t.Merges
- }
- if t.Template != "" {
- kv["tokenizer.chat_template"] = t.Template
- }
- for _, sv := range t.SpecialVocabulary {
- kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
- kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
- }
- return kv
- }
- func (p AdapterParameters) KV() ggml.KV {
- var alpha float32
- if p.LoraParameters.Alpha == 0 {
- alpha = float32(p.Alpha)
- } else {
- alpha = p.LoraParameters.Alpha
- }
- kv := ggml.KV{
- "adapter.lora.alpha": alpha,
- "adapter.type": "lora",
- "general.file_type": uint32(1),
- "general.type": "adapter",
- "general.version": "v0.2",
- }
- return kv
- }
- func (ModelParameters) specialTokenTypes() []string {
- return []string{
- "bos", "eos", "unk", "sep", "pad", "cls", "mask",
- }
- }
- func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
- return ggml.WriteGGUF(ws, kv, ts)
- }
- func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
- return ggml.WriteGGUF(ws, kv, ts)
- }
- type ModelConverter interface {
- // KV maps parameters to LLM key-values
- KV(*Tokenizer) ggml.KV
- // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
- Tensors([]Tensor) []ggml.Tensor
- // Replacements returns a list of string pairs to replace in tensor names.
- // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
- Replacements() []string
- // specialTokenTypes returns any special token types the model uses
- specialTokenTypes() []string
- // writeFile writes the model to the provided io.WriteSeeker
- writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
- }
- type moreParser interface {
- parseMore(fs.FS) error
- }
- type AdapterConverter interface {
- // KV maps parameters to LLM key-values
- KV(ggml.KV) ggml.KV
- // Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
- Tensors([]Tensor) []ggml.Tensor
- // Replacements returns a list of string pairs to replace in tensor names.
- // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
- Replacements() []string
- writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
- }
- func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
- bts, err := fs.ReadFile(fsys, "adapter_config.json")
- if err != nil {
- return err
- }
- var p AdapterParameters
- if err := json.Unmarshal(bts, &p); err != nil {
- return err
- }
- arch, ok := baseKV["general.architecture"]
- if !ok {
- return errors.New("architecture not set for the base model")
- }
- var conv AdapterConverter
- switch arch {
- case "llama":
- conv = &llamaAdapter{}
- case "gemma2":
- conv = &gemma2Adapter{}
- default:
- return errors.New("unsupported architecture")
- }
- ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
- if err != nil {
- return err
- }
- if err := json.Unmarshal(bts, conv); err != nil {
- return err
- }
- return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
- }
- // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
- // and files it finds in the input path.
- // Supported input model formats include safetensors.
- // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
- func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
- bts, err := fs.ReadFile(fsys, "config.json")
- if err != nil {
- return err
- }
- var p ModelParameters
- if err := json.Unmarshal(bts, &p); err != nil {
- return err
- }
- if len(p.Architectures) < 1 {
- return errors.New("unknown architecture")
- }
- var conv ModelConverter
- switch p.Architectures[0] {
- case "LlamaForCausalLM", "MistralForCausalLM":
- conv = &llamaModel{}
- case "MixtralForCausalLM":
- conv = &mixtralModel{}
- case "GemmaForCausalLM":
- conv = &gemmaModel{}
- case "Gemma2ForCausalLM":
- conv = &gemma2Model{}
- case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
- conv = &gemma3Model{Architecture: p.Architectures[0]}
- case "Phi3ForCausalLM":
- conv = &phi3Model{}
- case "Qwen2ForCausalLM":
- conv = &qwen2Model{}
- case "BertModel":
- conv = &bertModel{}
- case "CohereForCausalLM":
- conv = &commandrModel{}
- default:
- return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
- }
- if err := json.Unmarshal(bts, conv); err != nil {
- return err
- }
- if t, ok := conv.(moreParser); ok {
- if err := t.parseMore(fsys); err != nil {
- return err
- }
- }
- t, err := parseTokenizer(fsys, conv.specialTokenTypes())
- if err != nil {
- return err
- }
- vocabSize := int(p.VocabSize)
- if vocabSize == 0 {
- tVocabSize := int(p.TextModel.VocabSize)
- vocabSize = tVocabSize
- }
- switch {
- case vocabSize == 0:
- slog.Warn("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
- case vocabSize > len(t.Vocabulary.Tokens):
- slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
- for i := range vocabSize - len(t.Vocabulary.Tokens) {
- t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
- t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
- t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
- }
- case vocabSize < len(t.Vocabulary.Tokens):
- return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
- default:
- slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
- }
- ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
- if err != nil {
- return err
- }
- return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
- }
|