misc.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. from pathlib import Path
  2. import hashlib
  3. import re
  4. def get_gravatar_url(email):
  5. # Trim leading and trailing whitespace from
  6. # an email address and force all characters
  7. # to lower case
  8. address = str(email).strip().lower()
  9. # Create a SHA256 hash of the final string
  10. hash_object = hashlib.sha256(address.encode())
  11. hash_hex = hash_object.hexdigest()
  12. # Grab the actual image URL
  13. return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"
  14. def calculate_sha256(file):
  15. sha256 = hashlib.sha256()
  16. # Read the file in chunks to efficiently handle large files
  17. for chunk in iter(lambda: file.read(8192), b""):
  18. sha256.update(chunk)
  19. return sha256.hexdigest()
  20. def calculate_sha256_string(string):
  21. # Create a new SHA-256 hash object
  22. sha256_hash = hashlib.sha256()
  23. # Update the hash object with the bytes of the input string
  24. sha256_hash.update(string.encode("utf-8"))
  25. # Get the hexadecimal representation of the hash
  26. hashed_string = sha256_hash.hexdigest()
  27. return hashed_string
  28. def validate_email_format(email: str) -> bool:
  29. if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
  30. return False
  31. return True
  32. def sanitize_filename(file_name):
  33. # Convert to lowercase
  34. lower_case_file_name = file_name.lower()
  35. # Remove special characters using regular expression
  36. sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)
  37. # Replace spaces with dashes
  38. final_file_name = re.sub(r"\s+", "-", sanitized_file_name)
  39. return final_file_name
  40. def extract_folders_after_data_docs(path):
  41. # Convert the path to a Path object if it's not already
  42. path = Path(path)
  43. # Extract parts of the path
  44. parts = path.parts
  45. # Find the index of '/data/docs' in the path
  46. try:
  47. index_data_docs = parts.index("data") + 1
  48. index_docs = parts.index("docs", index_data_docs) + 1
  49. except ValueError:
  50. return []
  51. # Exclude the filename and accumulate folder names
  52. tags = []
  53. folders = parts[index_docs:-1]
  54. for idx, part in enumerate(folders):
  55. tags.append("/".join(folders[: idx + 1]))
  56. return tags