1234567891011121314151617181920212223242526272829303132333435363738 |
- import json
- import requests
- # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
- model = 'llama2' # TODO: update this for whatever model you wish to use
- def generate(prompt, context):
- r = requests.post('http://localhost:11434/api/generate',
- json={
- 'model': model,
- 'prompt': prompt,
- 'context': context,
- },
- stream=True)
- r.raise_for_status()
- for line in r.iter_lines():
- body = json.loads(line)
- response_part = body.get('response', '')
- # the response streams one token at a time, print that as we recieve it
- print(response_part, end='', flush=True)
- if 'error' in body:
- raise Exception(body['error'])
- if body.get('done', False):
- return body['context']
- def main():
- context = [] # the context stores a conversation history, you can use this to make the model more context aware
- while True:
- user_input = input("Enter a prompt: ")
- print()
- context = generate(user_input, context)
- print()
- if __name__ == "__main__":
- main()
|