فهرست منبع

Fix context exhaustion integration test for small gpus

On the smaller GPUs, the initial model load of llama2 took over 30s (the
default timeout for the DoGenerate helper)
Daniel Hiltgen 10 ماه پیش
والد
کامیت
73e2c8f68f
1فایلهای تغییر یافته به همراه7 افزوده شده و 2 حذف شده
  1. 7 2
      integration/context_test.go

+ 7 - 2
integration/context_test.go

@@ -12,7 +12,7 @@ import (
 
 func TestContextExhaustion(t *testing.T) {
 	// Longer needed for small footprint GPUs
-	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	// Set up the test data
 	req := api.GenerateRequest{
@@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) {
 			"num_ctx":     128,
 		},
 	}
-	GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("PullIfMissing failed: %v", err)
+	}
+	DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived"}, 120*time.Second, 10*time.Second)
 }