123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- import os
- import json
- import requests
- BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
- # Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
- # The final response object will include statistics and additional data from the request. Use the callback function to override
- # the default handler.
- def generate(model_name, prompt, system=None, template=None, context=None, options=None, callback=None):
- try:
- url = f"{BASE_URL}/api/generate"
- payload = {
- "model": model_name,
- "prompt": prompt,
- "system": system,
- "template": template,
- "context": context,
- "options": options
- }
-
- # Remove keys with None values
- payload = {k: v for k, v in payload.items() if v is not None}
-
- with requests.post(url, json=payload, stream=True) as response:
- response.raise_for_status()
-
- # Creating a variable to hold the context history of the final chunk
- final_context = None
-
- # Variable to hold concatenated response strings if no callback is provided
- full_response = ""
- # Iterating over the response line by line and displaying the details
- for line in response.iter_lines():
- if line:
- # Parsing each line (JSON chunk) and extracting the details
- chunk = json.loads(line)
-
- # If a callback function is provided, call it with the chunk
- if callback:
- callback(chunk)
- else:
- # If this is not the last chunk, add the "response" field value to full_response and print it
- if not chunk.get("done"):
- response_piece = chunk.get("response", "")
- full_response += response_piece
- print(response_piece, end="", flush=True)
-
- # Check if it's the last chunk (done is true)
- if chunk.get("done"):
- final_context = chunk.get("context")
-
- # Return the full response and the final context
- return full_response, final_context
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- return None, None
- # Create a model from a Modelfile. Use the callback function to override the default handler.
- def create(model_name, model_path, callback=None):
- try:
- url = f"{BASE_URL}/api/create"
- payload = {"name": model_name, "path": model_path}
-
- # Making a POST request with the stream parameter set to True to handle streaming responses
- with requests.post(url, json=payload, stream=True) as response:
- response.raise_for_status()
- # Iterating over the response line by line and displaying the status
- for line in response.iter_lines():
- if line:
- # Parsing each line (JSON chunk) and extracting the status
- chunk = json.loads(line)
- if callback:
- callback(chunk)
- else:
- print(f"Status: {chunk.get('status')}")
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- # Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
- # calls to will share the same download progress. Use the callback function to override the default handler.
- def pull(model_name, insecure=False, callback=None):
- try:
- url = f"{BASE_URL}/api/pull"
- payload = {
- "name": model_name,
- "insecure": insecure
- }
- # Making a POST request with the stream parameter set to True to handle streaming responses
- with requests.post(url, json=payload, stream=True) as response:
- response.raise_for_status()
- # Iterating over the response line by line and displaying the details
- for line in response.iter_lines():
- if line:
- # Parsing each line (JSON chunk) and extracting the details
- chunk = json.loads(line)
- # If a callback function is provided, call it with the chunk
- if callback:
- callback(chunk)
- else:
- # Print the status message directly to the console
- print(chunk.get('status', ''), end='', flush=True)
-
- # If there's layer data, you might also want to print that (adjust as necessary)
- if 'digest' in chunk:
- print(f" - Digest: {chunk['digest']}", end='', flush=True)
- print(f" - Total: {chunk['total']}", end='', flush=True)
- print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
- else:
- print()
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- # Push a model to the model registry. Use the callback function to override the default handler.
- def push(model_name, insecure=False, callback=None):
- try:
- url = f"{BASE_URL}/api/push"
- payload = {
- "name": model_name,
- "insecure": insecure
- }
- # Making a POST request with the stream parameter set to True to handle streaming responses
- with requests.post(url, json=payload, stream=True) as response:
- response.raise_for_status()
- # Iterating over the response line by line and displaying the details
- for line in response.iter_lines():
- if line:
- # Parsing each line (JSON chunk) and extracting the details
- chunk = json.loads(line)
- # If a callback function is provided, call it with the chunk
- if callback:
- callback(chunk)
- else:
- # Print the status message directly to the console
- print(chunk.get('status', ''), end='', flush=True)
-
- # If there's layer data, you might also want to print that (adjust as necessary)
- if 'digest' in chunk:
- print(f" - Digest: {chunk['digest']}", end='', flush=True)
- print(f" - Total: {chunk['total']}", end='', flush=True)
- print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
- else:
- print()
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- # List models that are available locally.
- def list():
- try:
- response = requests.get(f"{BASE_URL}/api/tags")
- response.raise_for_status()
- data = response.json()
- models = data.get('models', [])
- return models
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- return None
- # Copy a model. Creates a model with another name from an existing model.
- def copy(source, destination):
- try:
- # Create the JSON payload
- payload = {
- "source": source,
- "destination": destination
- }
-
- response = requests.post(f"{BASE_URL}/api/copy", json=payload)
- response.raise_for_status()
-
- # If the request was successful, return a message indicating that the copy was successful
- return "Copy successful"
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- return None
- # Delete a model and its data.
- def delete(model_name):
- try:
- url = f"{BASE_URL}/api/delete"
- payload = {"name": model_name}
- response = requests.delete(url, json=payload)
- response.raise_for_status()
- return "Delete successful"
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- return None
- # Show info about a model.
- def show(model_name):
- try:
- url = f"{BASE_URL}/api/show"
- payload = {"name": model_name}
- response = requests.post(url, json=payload)
- response.raise_for_status()
-
- # Parse the JSON response and return it
- data = response.json()
- return data
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- return None
- def heartbeat():
- try:
- url = f"{BASE_URL}/"
- response = requests.head(url)
- response.raise_for_status()
- return "Ollama is running"
- except requests.exceptions.RequestException as e:
- print(f"An error occurred: {e}")
- return "Ollama is not running"
|