model_loader.go 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. package server
  2. import (
  3. "fmt"
  4. "sync"
  5. "github.com/ollama/ollama/llama"
  6. "github.com/ollama/ollama/types/model"
  7. )
  8. type loadedModel struct {
  9. model llama.Model
  10. modelPath string
  11. }
  12. type modelLoader struct {
  13. cache sync.Map
  14. }
  15. // modelCache stores loaded models keyed by their full path and params hash
  16. var modelCache sync.Map // map[string]*loadedModel
  17. func (ml *modelLoader) LoadModel(name string, params llama.ModelParams) (*loadedModel, error) {
  18. modelName := model.ParseName(name)
  19. if !modelName.IsValid() {
  20. return nil, fmt.Errorf("invalid model name: %s", modelName)
  21. }
  22. modelPath, err := GetModel(modelName.String())
  23. if err != nil {
  24. return nil, fmt.Errorf("model not found: %s", modelName)
  25. }
  26. // Create cache key from model path and params hash
  27. cacheKey := fmt.Sprintf("%s-%+v", modelPath.ModelPath, params)
  28. if cached, ok := modelCache.Load(cacheKey); ok {
  29. return cached.(*loadedModel), nil
  30. }
  31. // Evict existing model if any
  32. ml.evictExistingModel()
  33. model, err := llama.LoadModelFromFile(modelPath.ModelPath, params)
  34. if err != nil {
  35. return nil, fmt.Errorf("failed to load model: %v", err)
  36. }
  37. loaded := &loadedModel{
  38. model: *model,
  39. modelPath: modelPath.ModelPath,
  40. }
  41. modelCache.Store(cacheKey, loaded)
  42. return loaded, nil
  43. }
  44. // evictExistingModel removes any currently loaded model from the cache
  45. // Currently only supports a single model in cache at a time
  46. // TODO: Add proper cache eviction policy (LRU/size/TTL based)
  47. func (ml *modelLoader) evictExistingModel() {
  48. ml.cache.Range(func(key, value any) bool {
  49. if cached, ok := ml.cache.LoadAndDelete(key); ok {
  50. llama.FreeModel(&cached.(*loadedModel).model)
  51. }
  52. return true
  53. })
  54. }