misc.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. from pathlib import Path
  2. import hashlib
  3. import re
  4. from datetime import timedelta
  5. from typing import Optional
  6. def get_gravatar_url(email):
  7. # Trim leading and trailing whitespace from
  8. # an email address and force all characters
  9. # to lower case
  10. address = str(email).strip().lower()
  11. # Create a SHA256 hash of the final string
  12. hash_object = hashlib.sha256(address.encode())
  13. hash_hex = hash_object.hexdigest()
  14. # Grab the actual image URL
  15. return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"
  16. def calculate_sha256(file):
  17. sha256 = hashlib.sha256()
  18. # Read the file in chunks to efficiently handle large files
  19. for chunk in iter(lambda: file.read(8192), b""):
  20. sha256.update(chunk)
  21. return sha256.hexdigest()
  22. def calculate_sha256_string(string):
  23. # Create a new SHA-256 hash object
  24. sha256_hash = hashlib.sha256()
  25. # Update the hash object with the bytes of the input string
  26. sha256_hash.update(string.encode("utf-8"))
  27. # Get the hexadecimal representation of the hash
  28. hashed_string = sha256_hash.hexdigest()
  29. return hashed_string
  30. def validate_email_format(email: str) -> bool:
  31. if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
  32. return False
  33. return True
  34. def sanitize_filename(file_name):
  35. # Convert to lowercase
  36. lower_case_file_name = file_name.lower()
  37. # Remove special characters using regular expression
  38. sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)
  39. # Replace spaces with dashes
  40. final_file_name = re.sub(r"\s+", "-", sanitized_file_name)
  41. return final_file_name
  42. def extract_folders_after_data_docs(path):
  43. # Convert the path to a Path object if it's not already
  44. path = Path(path)
  45. # Extract parts of the path
  46. parts = path.parts
  47. # Find the index of '/data/docs' in the path
  48. try:
  49. index_data_docs = parts.index("data") + 1
  50. index_docs = parts.index("docs", index_data_docs) + 1
  51. except ValueError:
  52. return []
  53. # Exclude the filename and accumulate folder names
  54. tags = []
  55. folders = parts[index_docs:-1]
  56. for idx, part in enumerate(folders):
  57. tags.append("/".join(folders[: idx + 1]))
  58. return tags
  59. def parse_duration(duration: str) -> Optional[timedelta]:
  60. if duration == "-1" or duration == "0":
  61. return None
  62. # Regular expression to find number and unit pairs
  63. pattern = r"(-?\d+(\.\d+)?)(ms|s|m|h|d|w)"
  64. matches = re.findall(pattern, duration)
  65. if not matches:
  66. raise ValueError("Invalid duration string")
  67. total_duration = timedelta()
  68. for number, _, unit in matches:
  69. number = float(number)
  70. if unit == "ms":
  71. total_duration += timedelta(milliseconds=number)
  72. elif unit == "s":
  73. total_duration += timedelta(seconds=number)
  74. elif unit == "m":
  75. total_duration += timedelta(minutes=number)
  76. elif unit == "h":
  77. total_duration += timedelta(hours=number)
  78. elif unit == "d":
  79. total_duration += timedelta(days=number)
  80. elif unit == "w":
  81. total_duration += timedelta(weeks=number)
  82. return total_duration