|
@@ -62,27 +62,28 @@ A stream of JSON objects:
|
|
|
|
|
|
The final response in the stream also includes additional data about the generation:
|
|
The final response in the stream also includes additional data about the generation:
|
|
|
|
|
|
|
|
+- `total_duration`: time spent generating the response
|
|
|
|
+- `load_duration`: time spent in nanoseconds loading the model
|
|
|
|
+- `sample_count`: number of samples generated
|
|
|
|
+- `sample_duration`: time spent generating samples
|
|
|
|
+- `prompt_eval_count`: number of tokens in the prompt
|
|
|
|
+- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
|
|
|
|
+- `eval_count`: number of tokens the response
|
|
|
|
+- `eval_duration`: time in nanoseconds spent generating the response
|
|
|
|
+
|
|
|
|
+To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
|
|
|
|
+
|
|
```json
|
|
```json
|
|
{
|
|
{
|
|
"model": "llama2:7b",
|
|
"model": "llama2:7b",
|
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
|
"created_at": "2023-08-04T19:22:45.499127Z",
|
|
"done": true,
|
|
"done": true,
|
|
-
|
|
|
|
- // total time in nanoseconds spent generating the response
|
|
|
|
"total_duration": 5589157167,
|
|
"total_duration": 5589157167,
|
|
-
|
|
|
|
- // time spent in nanoseconds loading the model
|
|
|
|
"load_duration": 3013701500,
|
|
"load_duration": 3013701500,
|
|
-
|
|
|
|
- // Sample: how fast tokens were sampled
|
|
|
|
"sample_count": 114,
|
|
"sample_count": 114,
|
|
"sample_duration": 81442000,
|
|
"sample_duration": 81442000,
|
|
-
|
|
|
|
- // Prompt stats: how fast the prompt was evaluated
|
|
|
|
"prompt_eval_count": 46,
|
|
"prompt_eval_count": 46,
|
|
"prompt_eval_duration": 1160282000,
|
|
"prompt_eval_duration": 1160282000,
|
|
-
|
|
|
|
- // Eval stats: how fast tokens were generated by the model
|
|
|
|
"eval_count": 113,
|
|
"eval_count": 113,
|
|
"eval_duration": 1325948000
|
|
"eval_duration": 1325948000
|
|
}
|
|
}
|