client.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import json
  2. import requests
  3. # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
  4. model = 'stablelm-zephyr' # TODO: update this for whatever model you wish to use
  5. def generate(prompt, context):
  6. r = requests.post('http://localhost:11434/api/generate',
  7. json={
  8. 'model': model,
  9. 'prompt': prompt,
  10. 'context': context,
  11. },
  12. stream=True)
  13. r.raise_for_status()
  14. for line in r.iter_lines():
  15. body = json.loads(line)
  16. response_part = body.get('response', '')
  17. # the response streams one token at a time, print that as we receive it
  18. print(response_part, end='', flush=True)
  19. if 'error' in body:
  20. raise Exception(body['error'])
  21. if body.get('done', False):
  22. return body['context']
  23. def main():
  24. context = [] # the context stores a conversation history, you can use this to make the model more context aware
  25. while True:
  26. user_input = input("Enter a prompt: ")
  27. if not user_input:
  28. exit()
  29. print()
  30. context = generate(user_input, context)
  31. print()
  32. if __name__ == "__main__":
  33. main()