1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- import curses
- import json
- from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search
- import requests
- from sentence_transformers import SentenceTransformer
- from mattsollamatools import chunker
- if __name__ == "__main__":
- chosen_topic = curses.wrapper(menu)
- print("Here is your news summary:\n")
- urls = getUrls(chosen_topic, n=5)
- model = SentenceTransformer('all-MiniLM-L6-v2')
- allEmbeddings = []
- for url in urls:
- article={}
- article['embeddings'] = []
- article['url'] = url
- text = getArticleText(url)
- summary = get_summary(text)
- chunks = chunker(text) # Use the chunk_text function from web_utils
- embeddings = model.encode(chunks)
- for (chunk, embedding) in zip(chunks, embeddings):
- item = {}
- item['source'] = chunk
- item['embedding'] = embedding.tolist() # Convert NumPy array to list
- item['sourcelength'] = len(chunk)
- article['embeddings'].append(item)
-
- allEmbeddings.append(article)
- print(f"{summary}\n")
-
- while True:
- context = []
- # Input a question from the user
- question = input("Enter your question about the news, or type quit: ")
- if question.lower() == 'quit':
- break
- # Embed the user's question
- question_embedding = model.encode([question])
- # Perform KNN search to find the best matches (indices and source text)
- best_matches = knn_search(question_embedding, allEmbeddings, k=10)
- sourcetext=""
- for i, (index, source_text) in enumerate(best_matches, start=1):
- sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"
- systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
- url = "http://localhost:11434/api/generate"
- payload = {
- "model": "mistral-openorca",
- "prompt": question,
- "system": systemPrompt,
- "stream": False,
- "context": context
- }
- # Convert the payload to a JSON string
- payload_json = json.dumps(payload)
- # Set the headers to specify JSON content
- headers = {
- "Content-Type": "application/json"
- }
- # Send the POST request
- response = requests.post(url, data=payload_json, headers=headers)
- # Check the response
- if response.status_code == 200:
- output = json.loads(response.text)
- context = output['context']
- print(output['response']+ "\n")
-
- else:
- print(f"Request failed with status code {response.status_code}")
|