Explorar el Código

Give unicode test more time to run (#7437)

* Give unicode test more time to run

Some slower GPUs (or partial CPU/GPU loads) can take more than the default 30s to complete this test

* Give more time for concurrency test

CPU inference can be very slow under stress
Daniel Hiltgen hace 6 meses
padre
commit
921779bb10
Se han modificado 2 ficheros con 14 adiciones y 4 borrados
  1. 12 3
      integration/basic_test.go
  2. 2 1
      integration/concurrency_test.go

+ 12 - 3
integration/basic_test.go

@@ -31,7 +31,7 @@ func TestOrcaMiniBlueSky(t *testing.T) {
 }
 
 func TestUnicode(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancel()
 	// Set up the test data
 	req := api.GenerateRequest{
@@ -42,9 +42,15 @@ func TestUnicode(t *testing.T) {
 		Options: map[string]interface{}{
 			"temperature": 0,
 			"seed":        123,
+			// Workaround deepseek context shifting bug
+			"num_ctx":     8192,
+			"num_predict": 2048,
 		},
 	}
-	GenerateTestHelper(ctx, t, req, []string{"散射", "频率"})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	require.NoError(t, PullIfMissing(ctx, client, req.Model))
+	DoGenerate(ctx, t, client, req, []string{"散射", "频率"}, 120*time.Second, 120*time.Second)
 }
 
 func TestExtendedUnicodeOutput(t *testing.T) {
@@ -60,7 +66,10 @@ func TestExtendedUnicodeOutput(t *testing.T) {
 			"seed":        123,
 		},
 	}
-	GenerateTestHelper(ctx, t, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	require.NoError(t, PullIfMissing(ctx, client, req.Model))
+	DoGenerate(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second)
 }
 
 func TestUnicodeModelDir(t *testing.T) {

+ 2 - 1
integration/concurrency_test.go

@@ -60,7 +60,8 @@ func TestMultiModelConcurrency(t *testing.T) {
 	for i := 0; i < len(req); i++ {
 		go func(i int) {
 			defer wg.Done()
-			DoGenerate(ctx, t, client, req[i], resp[i], 60*time.Second, 10*time.Second)
+			// Note: CPU based inference can crawl so don't give up too quickly
+			DoGenerate(ctx, t, client, req[i], resp[i], 90*time.Second, 30*time.Second)
 		}(i)
 	}
 	wg.Wait()