extractwp.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import requests
  2. import json
  3. model = "orca2"
  4. systemprompt = "You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints."
  5. schema = {
  6. "people": [
  7. {
  8. "name": {"type": "string", "description": "Name of the person"},
  9. "title": {"type": "string", "description": "Title of the person"},
  10. }
  11. ],
  12. }
  13. # Read the content from the file
  14. words = []
  15. with open("wp.txt") as f:
  16. maxwords = 2000
  17. count = 0
  18. lines = f.readlines()
  19. for line in lines:
  20. for word in line.split(" "):
  21. count += 1
  22. if count > maxwords:
  23. break
  24. words.append(word)
  25. content = ' '.join(words)
  26. # Use the text and schema to set the prompt
  27. prompt = f"Review the source text and determine 10 the most important people to focus on. Then extract the name and title for those people. Output should be in JSON.\n\nSchema: {schema}\n\nSource Text:\n{content}"
  28. # Make the actual request to the model
  29. r = requests.post(
  30. "http://localhost:11434/api/generate",
  31. json={
  32. "model": model,
  33. "system": systemprompt,
  34. "prompt": prompt,
  35. "format": "json",
  36. "stream": False
  37. },
  38. )
  39. # Get the response as JSON.
  40. j = json.loads(r.text)
  41. # Return the result.
  42. print(j["response"])