model_loader.go 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. package server
  2. import (
  3. "fmt"
  4. "sync"
  5. "github.com/ollama/ollama/llama"
  6. "github.com/ollama/ollama/types/model"
  7. )
  8. type loadedModel struct {
  9. model *llama.Model
  10. modelPath string
  11. }
  12. // modelCache stores loaded models keyed by their full path and params hash
  13. var modelCache sync.Map // map[string]*loadedModel
  14. func LoadModel(name string, params llama.ModelParams) (*loadedModel, error) {
  15. modelName := model.ParseName(name)
  16. if !modelName.IsValid() {
  17. return nil, fmt.Errorf("invalid model name: %s", modelName)
  18. }
  19. modelPath, err := GetModel(modelName.String())
  20. if err != nil {
  21. return nil, fmt.Errorf("model not found: %s", modelName)
  22. }
  23. // Create cache key from model path and params hash
  24. cacheKey := fmt.Sprintf("%s-%+v", modelPath.ModelPath, params)
  25. if cached, ok := modelCache.Load(cacheKey); ok {
  26. return cached.(*loadedModel), nil
  27. }
  28. // Evict existing model if any
  29. evictExistingModel()
  30. model, err := llama.LoadModelFromFile(modelPath.ModelPath, params)
  31. if err != nil {
  32. return nil, fmt.Errorf("failed to load model: %v", err)
  33. }
  34. loaded := &loadedModel{
  35. model: model,
  36. modelPath: modelPath.ModelPath,
  37. }
  38. modelCache.Store(cacheKey, loaded)
  39. return loaded, nil
  40. }
  41. // evictExistingModel removes any currently loaded model from the cache
  42. // Currently only supports a single model in cache at a time
  43. // TODO: Add proper cache eviction policy (LRU/size/TTL based)
  44. func evictExistingModel() {
  45. modelCache.Range(func(key, value any) bool {
  46. if cached, ok := modelCache.LoadAndDelete(key); ok {
  47. llama.FreeModel(cached.(*loadedModel).model)
  48. }
  49. return true
  50. })
  51. }