소스 검색

remove image tags

Michael Yang 1 년 전
부모
커밋
d125510b4b
2개의 변경된 파일15개의 추가작업 그리고 10개의 파일을 삭제
  1. 1 2
      server/images.go
  2. 14 8
      server/routes.go

+ 1 - 2
server/images.go

@@ -184,10 +184,9 @@ func (m *Model) ChatPrompts(msgs []api.Message) (*ChatHistory, error) {
 			for i := range msg.Images {
 			for i := range msg.Images {
 				currentVars.Prompt += fmt.Sprintf(" [img-%d]", len(images)+i)
 				currentVars.Prompt += fmt.Sprintf(" [img-%d]", len(images)+i)
 				currentVars.Images = append(currentVars.Images, llm.ImageData{
 				currentVars.Images = append(currentVars.Images, llm.ImageData{
-					ID:   i,
+					ID:   len(images) + i,
 					Data: msg.Images[i],
 					Data: msg.Images[i],
 				})
 				})
-
 			}
 			}
 
 
 			images = append(images, currentVars.Images...)
 			images = append(images, currentVars.Images...)

+ 14 - 8
server/routes.go

@@ -1254,7 +1254,8 @@ func trimmedPrompt(ctx context.Context, chat *ChatHistory, model *Model) (string
 	var images []llm.ImageData
 	var images []llm.ImageData
 	// reverse iterate through the prompts to build the prompt string in a way that fits the max context length
 	// reverse iterate through the prompts to build the prompt string in a way that fits the max context length
 	for i := len(chat.Prompts) - 1; i >= 0; i-- {
 	for i := len(chat.Prompts) - 1; i >= 0; i-- {
-		promptText, err := promptString(model, chat.Prompts[i], i == len(chat.Prompts)-1)
+		prompt := chat.Prompts[i]
+		promptText, err := promptString(model, prompt, i == len(chat.Prompts)-1)
 		if err != nil {
 		if err != nil {
 			return "", nil, err
 			return "", nil, err
 		}
 		}
@@ -1268,15 +1269,20 @@ func trimmedPrompt(ctx context.Context, chat *ChatHistory, model *Model) (string
 			break // reached max context length, stop adding more prompts
 			break // reached max context length, stop adding more prompts
 		}
 		}
 
 
-		totalTokenLength += len(encodedTokens)
-		systemPromptIncluded = systemPromptIncluded || chat.Prompts[i].System != ""
-		promptsToAdd = append(promptsToAdd, promptInfo{vars: chat.Prompts[i], tokenLen: len(encodedTokens)})
+		for j := range prompt.Images {
+			if totalTokenLength+768 > loaded.NumCtx {
+				// this decreases the token length but overestimating is fine
+				prompt.Prompt = strings.ReplaceAll(prompt.Prompt, fmt.Sprintf(" [img-%d]", prompt.Images[j].ID), "")
+				continue
+			}
 
 
-		images = append(images, chat.Prompts[i].Images...)
+			totalTokenLength += 768
+			images = append(images, prompt.Images[j])
+		}
 
 
-		// clip has a projection dimension of 768
-		// TODO: use kv['clip.vision.projection_dim'] from projection instead
-		totalTokenLength += 768 * len(chat.Prompts[i].Images)
+		totalTokenLength += len(encodedTokens)
+		systemPromptIncluded = systemPromptIncluded || prompt.System != ""
+		promptsToAdd = append(promptsToAdd, promptInfo{vars: prompt, tokenLen: len(encodedTokens)})
 	}
 	}
 
 
 	// ensure the system prompt is included, if not already
 	// ensure the system prompt is included, if not already