misc.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. from pathlib import Path
  2. import hashlib
  3. import json
  4. import re
  5. from datetime import timedelta
  6. from typing import Optional, List
  7. def get_last_user_message(messages: List[dict]) -> str:
  8. for message in reversed(messages):
  9. if message["role"] == "user":
  10. if isinstance(message["content"], list):
  11. for item in message["content"]:
  12. if item["type"] == "text":
  13. return item["text"]
  14. return message["content"]
  15. return None
  16. def get_last_assistant_message(messages: List[dict]) -> str:
  17. for message in reversed(messages):
  18. if message["role"] == "assistant":
  19. if isinstance(message["content"], list):
  20. for item in message["content"]:
  21. if item["type"] == "text":
  22. return item["text"]
  23. return message["content"]
  24. return None
  25. def add_or_update_system_message(content: str, messages: List[dict]):
  26. """
  27. Adds a new system message at the beginning of the messages list
  28. or updates the existing system message at the beginning.
  29. :param msg: The message to be added or appended.
  30. :param messages: The list of message dictionaries.
  31. :return: The updated list of message dictionaries.
  32. """
  33. if messages and messages[0].get("role") == "system":
  34. messages[0]["content"] += f"{content}\n{messages[0]['content']}"
  35. else:
  36. # Insert at the beginning
  37. messages.insert(0, {"role": "system", "content": content})
  38. return messages
  39. def get_gravatar_url(email):
  40. # Trim leading and trailing whitespace from
  41. # an email address and force all characters
  42. # to lower case
  43. address = str(email).strip().lower()
  44. # Create a SHA256 hash of the final string
  45. hash_object = hashlib.sha256(address.encode())
  46. hash_hex = hash_object.hexdigest()
  47. # Grab the actual image URL
  48. return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"
  49. def calculate_sha256(file):
  50. sha256 = hashlib.sha256()
  51. # Read the file in chunks to efficiently handle large files
  52. for chunk in iter(lambda: file.read(8192), b""):
  53. sha256.update(chunk)
  54. return sha256.hexdigest()
  55. def calculate_sha256_string(string):
  56. # Create a new SHA-256 hash object
  57. sha256_hash = hashlib.sha256()
  58. # Update the hash object with the bytes of the input string
  59. sha256_hash.update(string.encode("utf-8"))
  60. # Get the hexadecimal representation of the hash
  61. hashed_string = sha256_hash.hexdigest()
  62. return hashed_string
  63. def validate_email_format(email: str) -> bool:
  64. if email.endswith("@localhost"):
  65. return True
  66. return bool(re.match(r"[^@]+@[^@]+\.[^@]+", email))
  67. def sanitize_filename(file_name):
  68. # Convert to lowercase
  69. lower_case_file_name = file_name.lower()
  70. # Remove special characters using regular expression
  71. sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)
  72. # Replace spaces with dashes
  73. final_file_name = re.sub(r"\s+", "-", sanitized_file_name)
  74. return final_file_name
  75. def extract_folders_after_data_docs(path):
  76. # Convert the path to a Path object if it's not already
  77. path = Path(path)
  78. # Extract parts of the path
  79. parts = path.parts
  80. # Find the index of '/data/docs' in the path
  81. try:
  82. index_data_docs = parts.index("data") + 1
  83. index_docs = parts.index("docs", index_data_docs) + 1
  84. except ValueError:
  85. return []
  86. # Exclude the filename and accumulate folder names
  87. tags = []
  88. folders = parts[index_docs:-1]
  89. for idx, part in enumerate(folders):
  90. tags.append("/".join(folders[: idx + 1]))
  91. return tags
  92. def parse_duration(duration: str) -> Optional[timedelta]:
  93. if duration == "-1" or duration == "0":
  94. return None
  95. # Regular expression to find number and unit pairs
  96. pattern = r"(-?\d+(\.\d+)?)(ms|s|m|h|d|w)"
  97. matches = re.findall(pattern, duration)
  98. if not matches:
  99. raise ValueError("Invalid duration string")
  100. total_duration = timedelta()
  101. for number, _, unit in matches:
  102. number = float(number)
  103. if unit == "ms":
  104. total_duration += timedelta(milliseconds=number)
  105. elif unit == "s":
  106. total_duration += timedelta(seconds=number)
  107. elif unit == "m":
  108. total_duration += timedelta(minutes=number)
  109. elif unit == "h":
  110. total_duration += timedelta(hours=number)
  111. elif unit == "d":
  112. total_duration += timedelta(days=number)
  113. elif unit == "w":
  114. total_duration += timedelta(weeks=number)
  115. return total_duration
  116. def parse_ollama_modelfile(model_text):
  117. parameters_meta = {
  118. "mirostat": int,
  119. "mirostat_eta": float,
  120. "mirostat_tau": float,
  121. "num_ctx": int,
  122. "repeat_last_n": int,
  123. "repeat_penalty": float,
  124. "temperature": float,
  125. "seed": int,
  126. "tfs_z": float,
  127. "num_predict": int,
  128. "top_k": int,
  129. "top_p": float,
  130. "num_keep": int,
  131. "typical_p": float,
  132. "presence_penalty": float,
  133. "frequency_penalty": float,
  134. "penalize_newline": bool,
  135. "numa": bool,
  136. "num_batch": int,
  137. "num_gpu": int,
  138. "main_gpu": int,
  139. "low_vram": bool,
  140. "f16_kv": bool,
  141. "vocab_only": bool,
  142. "use_mmap": bool,
  143. "use_mlock": bool,
  144. "num_thread": int,
  145. }
  146. data = {"base_model_id": None, "params": {}}
  147. # Parse base model
  148. base_model_match = re.search(
  149. r"^FROM\s+(\w+)", model_text, re.MULTILINE | re.IGNORECASE
  150. )
  151. if base_model_match:
  152. data["base_model_id"] = base_model_match.group(1)
  153. # Parse template
  154. template_match = re.search(
  155. r'TEMPLATE\s+"""(.+?)"""', model_text, re.DOTALL | re.IGNORECASE
  156. )
  157. if template_match:
  158. data["params"] = {"template": template_match.group(1).strip()}
  159. # Parse stops
  160. stops = re.findall(r'PARAMETER stop "(.*?)"', model_text, re.IGNORECASE)
  161. if stops:
  162. data["params"]["stop"] = stops
  163. # Parse other parameters from the provided list
  164. for param, param_type in parameters_meta.items():
  165. param_match = re.search(rf"PARAMETER {param} (.+)", model_text, re.IGNORECASE)
  166. if param_match:
  167. value = param_match.group(1)
  168. try:
  169. if param_type == int:
  170. value = int(value)
  171. elif param_type == float:
  172. value = float(value)
  173. elif param_type == bool:
  174. value = value.lower() == "true"
  175. except Exception as e:
  176. print(e)
  177. continue
  178. data["params"][param] = value
  179. # Parse adapter
  180. adapter_match = re.search(r"ADAPTER (.+)", model_text, re.IGNORECASE)
  181. if adapter_match:
  182. data["params"]["adapter"] = adapter_match.group(1)
  183. # Parse system description
  184. system_desc_match = re.search(
  185. r'SYSTEM\s+"""(.+?)"""', model_text, re.DOTALL | re.IGNORECASE
  186. )
  187. system_desc_match_single = re.search(
  188. r"SYSTEM\s+([^\n]+)", model_text, re.IGNORECASE
  189. )
  190. if system_desc_match:
  191. data["params"]["system"] = system_desc_match.group(1).strip()
  192. elif system_desc_match_single:
  193. data["params"]["system"] = system_desc_match_single.group(1).strip()
  194. # Parse messages
  195. messages = []
  196. message_matches = re.findall(r"MESSAGE (\w+) (.+)", model_text, re.IGNORECASE)
  197. for role, content in message_matches:
  198. messages.append({"role": role, "content": content})
  199. if messages:
  200. data["params"]["messages"] = messages
  201. return data