Replacing slugify with a new function

This commit is contained in:
KnugiHK
2025-05-17 16:04:31 +08:00
parent 96e483a6b0
commit 3f88f7fe08
4 changed files with 67 additions and 21 deletions

View File

@@ -13,7 +13,7 @@ from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device
from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty
from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable
from Whatsapp_Chat_Exporter.utility import get_chat_condition, safe_name, bytes_to_readable
logger = logging.getLogger(__name__)
@@ -668,8 +668,8 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
# Copy media to separate folder if needed
if separate_media:
chat_display_name = slugify(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0], True)
chat_display_name = safe_name(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0])
current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True)

View File

@@ -9,7 +9,7 @@ from mimetypes import MimeTypes
from markupsafe import escape as htmle
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, get_chat_condition
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, slugify, Device
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name, Device
logger = logging.getLogger(__name__)
@@ -402,8 +402,8 @@ def process_media_item(content, data, media_folder, mime, separate_media):
# Handle separate media option
if separate_media:
chat_display_name = slugify(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
chat_display_name = safe_name(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0])
current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True)

View File

@@ -5,6 +5,7 @@ import json
import os
import unicodedata
import re
import string
import math
import shutil
from bleach import clean as sanitize
@@ -12,7 +13,7 @@ from markupsafe import Markup
from datetime import datetime, timedelta
from enum import IntEnum
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, Union
try:
from enum import StrEnum, IntEnum
except ImportError:
@@ -600,26 +601,28 @@ def setup_template(template: Optional[str], no_avatar: bool, experimental: bool
APPLE_TIME = 978307200
def slugify(value: str, allow_unicode: bool = False) -> str:
def safe_name(text: Union[str|bytes]) -> str:
"""
Convert text to ASCII-only slugs for URL-safe strings.
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Sanitize the input text and generates a safe file name.
This function serves a similar purpose to slugify() from
Django previously used in this project, but is a clean-room
Reimplementation tailored for performance and a narrower
Use case for this project. Licensed under the same terms
As the project (MIT).
Args:
value (str): The string to convert to a slug.
allow_unicode (bool, optional): Whether to allow Unicode characters. Defaults to False.
text (str|bytes): The string to be sanitized.
Returns:
str: The slugified string with only alphanumerics, underscores, or hyphens.
str: The sanitized string with only alphanumerics, underscores, or hyphens.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode(
'ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
if isinstance(text, bytes):
text = text.decode("utf-8", "ignore")
elif not isinstance(text, str):
raise TypeError("value must be a string or bytes")
normalized_text = unicodedata.normalize("NFKC", text)
safe_chars = [char for char in normalized_text if char.isalnum() or char in "-_ ."]
return "-".join(''.join(safe_chars).split())
class WhatsAppIdentifier(StrEnum):