1 year ago · 371d4e5df3
--- a/docs/api.md
+++ b/docs/api.md
@@ -62,27 +62,28 @@ A stream of JSON objects:
 
															 The final response in the stream also includes additional data about the generation:
														
 
															+- `total_duration`: time spent generating the response
														
 
															+- `load_duration`: time spent in nanoseconds loading the model
														
 
															+- `sample_count`: number of samples generated
														
 
															+- `sample_duration`: time spent generating samples
														
 
															+- `prompt_eval_count`: number of tokens in the prompt
														
 
															+- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
														
 
															+- `eval_count`: number of tokens the response
														
 
															+- `eval_duration`: time in nanoseconds spent generating the response
														
 
															+
														
 
															+To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
														
 
															+
														
 
															 ```json
														
 
															 {
														
 
															   "model": "llama2:7b",
														
 
															   "created_at": "2023-08-04T19:22:45.499127Z",
														
 
															   "done": true,
														
 
															-
														
 
															-  // total time in nanoseconds spent generating the response
														
 
															   "total_duration": 5589157167,
														
 
															-
														
 
															-  // time spent in nanoseconds loading the model
														
 
															   "load_duration": 3013701500,
														
 
															-
														
 
															-  // Sample: how fast tokens were sampled
														
 
															   "sample_count": 114,
														
 
															   "sample_duration": 81442000,
														
 
															-
														
 
															-  // Prompt stats: how fast the prompt was evaluated
														
 
															   "prompt_eval_count": 46,
														
 
															   "prompt_eval_duration": 1160282000,
														
 
															-
														
 
															-  // Eval stats: how fast tokens were generated by the model
														
 
															   "eval_count": 113,
														
 
															   "eval_duration": 1325948000
														
 
															 }