llm_test.go 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. //go:build integration
  2. package integration
  3. import (
  4. "context"
  5. "net/http"
  6. "sync"
  7. "testing"
  8. "time"
  9. "ollama.com/api"
  10. )
  11. // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
  12. // package to avoid circular dependencies
  13. var (
  14. stream = false
  15. req = [2]api.GenerateRequest{
  16. {
  17. Model: "orca-mini",
  18. Prompt: "why is the ocean blue?",
  19. Stream: &stream,
  20. Options: map[string]interface{}{
  21. "seed": 42,
  22. "temperature": 0.0,
  23. },
  24. }, {
  25. Model: "orca-mini",
  26. Prompt: "what is the origin of the us thanksgiving holiday?",
  27. Stream: &stream,
  28. Options: map[string]interface{}{
  29. "seed": 42,
  30. "temperature": 0.0,
  31. },
  32. },
  33. }
  34. resp = [2][]string{
  35. []string{"sunlight"},
  36. []string{"england", "english", "massachusetts", "pilgrims"},
  37. }
  38. )
  39. func TestIntegrationSimpleOrcaMini(t *testing.T) {
  40. ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
  41. defer cancel()
  42. GenerateTestHelper(ctx, t, &http.Client{}, req[0], resp[0])
  43. }
  44. // TODO
  45. // The server always loads a new runner and closes the old one, which forces serial execution
  46. // At present this test case fails with concurrency problems. Eventually we should try to
  47. // get true concurrency working with n_parallel support in the backend
  48. func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
  49. var wg sync.WaitGroup
  50. wg.Add(len(req))
  51. ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
  52. defer cancel()
  53. for i := 0; i < len(req); i++ {
  54. go func(i int) {
  55. defer wg.Done()
  56. GenerateTestHelper(ctx, t, &http.Client{}, req[i], resp[i])
  57. }(i)
  58. }
  59. wg.Wait()
  60. }
  61. // TODO - create a parallel test with 2 different models once we support concurrency