1 month ago · 543240fb5f
--- a/model/model.go
+++ b/model/model.go
@@ -22,6 +22,8 @@ import (
 
				 	"github.com/ollama/ollama/model/input"
			
 
				 )
			
 
				 
			
 
				+var ErrNoVisionModel = errors.New("this model is missing data required for image input")
			
 
				+
			
 
				 // Model implements a specific model architecture, defining the forward pass and any model-specific configuration
			
 
				 type Model interface {
			
 
				 	Forward(ml.Context, input.Options) (ml.Tensor, error)
			
--- a/model/models/gemma3/model.go
+++ b/model/models/gemma3/model.go
@@ -84,6 +84,10 @@ func New(c ml.Config) (model.Model, error) {
 
				 }
			
 
				 
			
 
				 func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) {
			
 
				+	if len(m.VisionModel.Layers) == 0 {
			
 
				+		return nil, model.ErrNoVisionModel
			
 
				+	}
			
 
				+
			
 
				 	image, _, err := image.Decode(bytes.NewReader(multimodalData))
			
 
				 	if err != nil {
			
 
				 		return nil, err
			
--- a/model/models/mllama/model.go
+++ b/model/models/mllama/model.go
@@ -63,6 +63,10 @@ func New(c ml.Config) (model.Model, error) {
 
				 }
			
 
				 
			
 
				 func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error) {
			
 
				+	if len(m.VisionModel.Transformer.Layers) == 0 || len(m.GlobalTransformer.Layers) == 0 {
			
 
				+		return nil, model.ErrNoVisionModel
			
 
				+	}
			
 
				+
			
 
				 	image, _, err := image.Decode(bytes.NewReader(multimodalData))
			
 
				 	if err != nil {
			
 
				 		return nil, err