llm_test.go 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. //go:build integration
  2. package integration
  3. import (
  4. "context"
  5. "net/http"
  6. "sync"
  7. "testing"
  8. "time"
  9. "github.com/ollama/ollama/api"
  10. )
  11. // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
  12. // package to avoid circular dependencies
  13. // WARNING - these tests will fail on mac if you don't manually copy ggml-metal.metal to this dir (./server)
  14. //
  15. // TODO - Fix this ^^
  16. var (
  17. stream = false
  18. req = [2]api.GenerateRequest{
  19. {
  20. Model: "orca-mini",
  21. Prompt: "why is the ocean blue?",
  22. Stream: &stream,
  23. Options: map[string]interface{}{
  24. "seed": 42,
  25. "temperature": 0.0,
  26. },
  27. }, {
  28. Model: "orca-mini",
  29. Prompt: "what is the origin of the us thanksgiving holiday?",
  30. Stream: &stream,
  31. Options: map[string]interface{}{
  32. "seed": 42,
  33. "temperature": 0.0,
  34. },
  35. },
  36. }
  37. resp = [2][]string{
  38. []string{"sunlight"},
  39. []string{"england", "english", "massachusetts", "pilgrims"},
  40. }
  41. )
  42. func TestIntegrationSimpleOrcaMini(t *testing.T) {
  43. ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
  44. defer cancel()
  45. GenerateTestHelper(ctx, t, &http.Client{}, req[0], resp[0])
  46. }
  47. // TODO
  48. // The server always loads a new runner and closes the old one, which forces serial execution
  49. // At present this test case fails with concurrency problems. Eventually we should try to
  50. // get true concurrency working with n_parallel support in the backend
  51. func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
  52. var wg sync.WaitGroup
  53. wg.Add(len(req))
  54. ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
  55. defer cancel()
  56. for i := 0; i < len(req); i++ {
  57. go func(i int) {
  58. defer wg.Done()
  59. GenerateTestHelper(ctx, t, &http.Client{}, req[i], resp[i])
  60. }(i)
  61. }
  62. wg.Wait()
  63. }
  64. // TODO - create a parallel test with 2 different models once we support concurrency