1 年之前 · a2405ec831
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -0,0 +1,46 @@
 
				+import json
			
 
				+import requests
			
 
				+
			
 
				+# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
			
 
				+model = "llama2"  # TODO: update this for whatever model you wish to use
			
 
				+
			
 
				+
			
 
				+def chat(messages):
			
 
				+    r = requests.post(
			
 
				+        "http://0.0.0.0:11434/api/chat",
			
 
				+        json={"model": model, "messages": messages, "stream": True},
			
 
				+    )
			
 
				+    r.raise_for_status()
			
 
				+    output = ""
			
 
				+
			
 
				+    for line in r.iter_lines():
			
 
				+        body = json.loads(line)
			
 
				+        if "error" in body:
			
 
				+            raise Exception(body["error"])
			
 
				+        if body.get("done") is False:
			
 
				+            message = body.get("message", "")
			
 
				+            content = message.get("content", "")
			
 
				+            output += content
			
 
				+            # the response streams one token at a time, print that as we receive it
			
 
				+            print(content, end="", flush=True)
			
 
				+
			
 
				+
			
 
				+        if body.get("done", False):
			
 
				+            message["content"] = output
			
 
				+            return message
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    messages = []
			
 
				+    
			
 
				+    while True:
			
 
				+        user_input = input("Enter a prompt: ")
			
 
				+        print()
			
 
				+        messages.append({"role": "user", "content": user_input})
			
 
				+        message = chat(messages)
			
 
				+        messages.append(message)
			
 
				+        print("\n\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -0,0 +1,24 @@
 
				+# Simple Chat Example
			
 
				+
			
 
				+The **chat** endpoint is one of two ways to generate text from an LLM with Ollama. At a high level you provide the endpoint an array of objects with a role and content specified. Then with each output and prompt, you add more of those role/content objects, which builds up the history.
			
 
				+
			
 
				+## Review the Code
			
 
				+
			
 
				+You can see in the **chat** function that actually calling the endpoint is done simply with:
			
 
				+
			
 
				+```python
			
 
				+r = requests.post(
			
 
				+  "http://0.0.0.0:11434/api/chat",
			
 
				+  json={"model": model, "messages": messages, "stream": True},
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+With the **generate** endpoint, you need to provide a `prompt`. But with **chat**, you provide `messages`. And the resulting stream of responses includes a `message` object with a `content` field.
			
 
				+
			
 
				+The final JSON object doesn't provide the full content, so you will need to build the content yourself.
			
 
				+
			
 
				+In the **main** function, we collect `user_input` and add it as a message to our messages and that is passed to the chat function. When the LLM is done responding the output is added as another message.
			
 
				+
			
 
				+## Next Steps
			
 
				+
			
 
				+In this example, all generations are kept. You might want to experiment with summarizing everything older than 10 conversations to enable longer history with less context being used.