123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- import ollama
- import warnings
- from mattsollamatools import chunker
- from newspaper import Article
- import numpy as np
- from sklearn.neighbors import NearestNeighbors
- import nltk
- warnings.filterwarnings(
- "ignore", category=FutureWarning, module="transformers.tokenization_utils_base"
- )
- nltk.download("punkt_tab", quiet=True)
- def getArticleText(url):
- """Gets the text of an article from a URL.
- Often there are a bunch of ads and menus on pages for a news article.
- This uses newspaper3k to get just the text of just the article.
- """
- article = Article(url)
- article.download()
- article.parse()
- return article.text
- def knn_search(question_embedding, embeddings, k=5):
- """Performs K-nearest neighbors (KNN) search"""
- X = np.array(
- [item["embedding"] for article in embeddings for item in article["embeddings"]]
- )
- source_texts = [
- item["source"] for article in embeddings for item in article["embeddings"]
- ]
- # Fit a KNN model on the embeddings
- knn = NearestNeighbors(n_neighbors=k, metric="cosine")
- knn.fit(X)
- # Find the indices and distances of the k-nearest neighbors.
- _, indices = knn.kneighbors(question_embedding, n_neighbors=k)
- # Get the indices and source texts of the best matches
- best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)]
- return best_matches
- def check(document, claim):
- """Checks if the claim is supported by the document by calling bespoke-minicheck.
- Returns Yes/yes if the claim is supported by the document, No/no otherwise.
- Support for logits will be added in the future.
- bespoke-minicheck's system prompt is defined as:
- 'Determine whether the provided claim is consistent with the corresponding
- document. Consistency in this context implies that all information presented in the claim
- is substantiated by the document. If not, it should be considered inconsistent. Please
- assess the claim's consistency with the document by responding with either "Yes" or "No".'
- bespoke-minicheck's user prompt is defined as:
- "Document: {document}\nClaim: {claim}"
- """
- prompt = f"Document: {document}\nClaim: {claim}"
- response = ollama.generate(
- model="bespoke-minicheck", prompt=prompt, options={"num_predict": 2, "temperature": 0.0}
- )
- return response["response"].strip()
- if __name__ == "__main__":
- allEmbeddings = []
- default_url = "https://www.theverge.com/2024/9/12/24242439/openai-o1-model-reasoning-strawberry-chatgpt"
- user_input = input(
- "Enter the URL of an article you want to chat with, or press Enter for default example: "
- )
- article_url = user_input.strip() if user_input.strip() else default_url
- article = {}
- article["embeddings"] = []
- article["url"] = article_url
- text = getArticleText(article_url)
- chunks = chunker(text)
- # Embed (batch) chunks using ollama
- embeddings = ollama.embed(model="all-minilm", input=chunks)["embeddings"]
- for chunk, embedding in zip(chunks, embeddings):
- item = {}
- item["source"] = chunk
- item["embedding"] = embedding
- item["sourcelength"] = len(chunk)
- article["embeddings"].append(item)
- allEmbeddings.append(article)
- print(f"\nLoaded, chunked, and embedded text from {article_url}.\n")
- while True:
- # Input a question from the user
- # For example, "Who is the chief research officer?"
- question = input("Enter your question or type quit: ")
- if question.lower() == "quit":
- break
- # Embed the user's question using ollama.embed
- question_embedding = ollama.embed(model="all-minilm", input=question)[
- "embeddings"
- ]
- # Perform KNN search to find the best matches (indices and source text)
- best_matches = knn_search(question_embedding, allEmbeddings, k=4)
- sourcetext = "\n\n".join([source_text for (_, source_text) in best_matches])
- print(f"\nRetrieved chunks: \n{sourcetext}\n")
- # Give the retreived chunks and question to the chat model
- system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
- ollama_response = ollama.generate(
- model="llama3.2",
- prompt=question,
- system=system_prompt,
- options={"stream": False},
- )
- answer = ollama_response["response"]
- print(f"LLM Answer:\n{answer}\n")
- # Check each sentence in the response for grounded factuality
- if answer:
- for claim in nltk.sent_tokenize(answer):
- print(f"LLM Claim: {claim}")
- print(
- f"Is this claim supported by the context according to bespoke-minicheck? {check(sourcetext, claim)}\n"
- )
|