123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- //go:build integration
- package server
- import (
- "context"
- "os"
- "strings"
- "sync"
- "testing"
- "time"
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/require"
- "github.com/jmorganca/ollama/api"
- "github.com/jmorganca/ollama/llm"
- )
- // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
- // package to avoid circular dependencies
- // WARNING - these tests will fail on mac if you don't manually copy ggml-metal.metal to this dir (./server)
- //
- // TODO - Fix this ^^
- var (
- req = [2]api.GenerateRequest{
- {
- Model: "orca-mini",
- Prompt: "tell me a short story about agi?",
- Options: map[string]interface{}{},
- }, {
- Model: "orca-mini",
- Prompt: "what is the origin of the us thanksgiving holiday?",
- Options: map[string]interface{}{},
- },
- }
- resp = [2]string{
- "once upon a time",
- "united states thanksgiving",
- }
- )
- func TestIntegrationSimpleOrcaMini(t *testing.T) {
- SkipIFNoTestData(t)
- workDir, err := os.MkdirTemp("", "ollama")
- require.NoError(t, err)
- defer os.RemoveAll(workDir)
- require.NoError(t, llm.Init(workDir))
- ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
- defer cancel()
- opts := api.DefaultOptions()
- opts.Seed = 42
- opts.Temperature = 0.0
- model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
- defer llmRunner.Close()
- response := OneShotPromptResponse(t, ctx, req[0], model, llmRunner)
- assert.Contains(t, strings.ToLower(response), resp[0])
- }
- // TODO
- // The server always loads a new runner and closes the old one, which forces serial execution
- // At present this test case fails with concurrency problems. Eventually we should try to
- // get true concurrency working with n_parallel support in the backend
- func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
- SkipIFNoTestData(t)
- t.Skip("concurrent prediction on single runner not currently supported")
- workDir, err := os.MkdirTemp("", "ollama")
- require.NoError(t, err)
- defer os.RemoveAll(workDir)
- require.NoError(t, llm.Init(workDir))
- ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
- defer cancel()
- opts := api.DefaultOptions()
- opts.Seed = 42
- opts.Temperature = 0.0
- var wg sync.WaitGroup
- wg.Add(len(req))
- model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
- defer llmRunner.Close()
- for i := 0; i < len(req); i++ {
- go func(i int) {
- defer wg.Done()
- response := OneShotPromptResponse(t, ctx, req[i], model, llmRunner)
- t.Logf("Prompt: %s\nResponse: %s", req[0].Prompt, response)
- assert.Contains(t, strings.ToLower(response), resp[i], "error in thread %d (%s)", i, req[i].Prompt)
- }(i)
- }
- wg.Wait()
- }
- func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
- SkipIFNoTestData(t)
- workDir, err := os.MkdirTemp("", "ollama")
- require.NoError(t, err)
- defer os.RemoveAll(workDir)
- require.NoError(t, llm.Init(workDir))
- ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
- defer cancel()
- opts := api.DefaultOptions()
- opts.Seed = 42
- opts.Temperature = 0.0
- var wg sync.WaitGroup
- wg.Add(len(req))
- t.Logf("Running %d concurrently", len(req))
- for i := 0; i < len(req); i++ {
- go func(i int) {
- defer wg.Done()
- model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
- defer llmRunner.Close()
- response := OneShotPromptResponse(t, ctx, req[i], model, llmRunner)
- t.Logf("Prompt: %s\nResponse: %s", req[0].Prompt, response)
- assert.Contains(t, strings.ToLower(response), resp[i], "error in thread %d (%s)", i, req[i].Prompt)
- }(i)
- }
- wg.Wait()
- }
- // TODO - create a parallel test with 2 different models once we support concurrency
|