This commit is contained in:
KnugiHK
2025-05-05 17:13:43 +08:00
parent 3220ed2d3f
commit a58dd78be8
4 changed files with 224 additions and 171 deletions

View File

@@ -272,13 +272,16 @@ def validate_args(parser: ArgumentParser, args) -> None:
if not args.android and not args.ios and not args.exported and not args.import_json:
parser.error("You must define the device type.")
if args.no_html and not args.json and not args.text_format:
parser.error("You must either specify a JSON output file, text file output directory or enable HTML output.")
parser.error(
"You must either specify a JSON output file, text file output directory or enable HTML output.")
if args.import_json and (args.android or args.ios or args.exported or args.no_html):
parser.error("You can only use --import with -j and without --no-html, -a, -i, -e.")
parser.error(
"You can only use --import with -j and without --no-html, -a, -i, -e.")
elif args.import_json and not os.path.isfile(args.json):
parser.error("JSON file not found.")
if args.incremental_merge and (args.source_dir is None or args.target_dir is None):
parser.error("You must specify both --source-dir and --target-dir for incremental merge.")
parser.error(
"You must specify both --source-dir and --target-dir for incremental merge.")
if args.android and args.business:
parser.error("WhatsApp Business is only available on iOS for now.")
if "??" not in args.headline:
@@ -289,18 +292,21 @@ def validate_args(parser: ArgumentParser, args) -> None:
(args.json.endswith(".json") and os.path.isfile(args.json)) or
(not args.json.endswith(".json") and os.path.isfile(args.json))
):
parser.error("When --per-chat is enabled, the destination of --json must be a directory.")
parser.error(
"When --per-chat is enabled, the destination of --json must be a directory.")
# vCards validation
if args.enrich_from_vcards is not None and args.default_country_code is None:
parser.error("When --enrich-from-vcards is provided, you must also set --default-country-code")
parser.error(
"When --enrich-from-vcards is provided, you must also set --default-country-code")
# Size validation
if args.size is not None and not isinstance(args.size, int) and not args.size.isnumeric():
try:
args.size = readable_to_bytes(args.size)
except ValueError:
parser.error("The value for --split must be ended in pure bytes or with a proper unit (e.g., 1048576 or 1MB)")
parser.error(
"The value for --split must be ended in pure bytes or with a proper unit (e.g., 1048576 or 1MB)")
# Date filter validation and processing
if args.filter_date is not None:
@@ -316,7 +322,8 @@ def validate_args(parser: ArgumentParser, args) -> None:
# Chat filter validation
if args.filter_chat_include is not None and args.filter_chat_exclude is not None:
parser.error("Chat inclusion and exclusion filters cannot be used together.")
parser.error(
"Chat inclusion and exclusion filters cannot be used together.")
validate_chat_filters(parser, args.filter_chat_include)
validate_chat_filters(parser, args.filter_chat_exclude)
@@ -327,20 +334,23 @@ def validate_chat_filters(parser: ArgumentParser, chat_filter: Optional[List[str
if chat_filter is not None:
for chat in chat_filter:
if not chat.isnumeric():
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
parser.error(
"Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
def process_date_filter(parser: ArgumentParser, args) -> None:
"""Process and validate date filter arguments."""
if " - " in args.filter_date:
start, end = args.filter_date.split(" - ")
start = int(datetime.strptime(start, args.filter_date_format).timestamp())
start = int(datetime.strptime(
start, args.filter_date_format).timestamp())
end = int(datetime.strptime(end, args.filter_date_format).timestamp())
if start < 1009843200 or end < 1009843200:
parser.error("WhatsApp was first released in 2009...")
if start > end:
parser.error("The start date cannot be a moment after the end date.")
parser.error(
"The start date cannot be a moment after the end date.")
if args.android:
args.filter_date = f"BETWEEN {start}000 AND {end}000"
@@ -353,9 +363,11 @@ def process_date_filter(parser: ArgumentParser, args) -> None:
def process_single_date_filter(parser: ArgumentParser, args) -> None:
"""Process single date comparison filters."""
if len(args.filter_date) < 3:
parser.error("Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
parser.error(
"Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
_timestamp = int(datetime.strptime(args.filter_date[2:], args.filter_date_format).timestamp())
_timestamp = int(datetime.strptime(
args.filter_date[2:], args.filter_date_format).timestamp())
if _timestamp < 1009843200:
parser.error("WhatsApp was first released in 2009...")
@@ -371,7 +383,8 @@ def process_single_date_filter(parser: ArgumentParser, args) -> None:
elif args.ios:
args.filter_date = f"<= {_timestamp - APPLE_TIME}"
else:
parser.error("Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
parser.error(
"Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
def setup_contact_store(args) -> Optional['ContactsFromVCards']:
@@ -385,7 +398,8 @@ def setup_contact_store(args) -> Optional['ContactsFromVCards']:
)
exit(1)
contact_store = ContactsFromVCards()
contact_store.load_vcf_file(args.enrich_from_vcards, args.default_country_code)
contact_store.load_vcf_file(
args.enrich_from_vcards, args.default_country_code)
return contact_store
return None
@@ -542,7 +556,8 @@ def handle_media_directory(args) -> None:
media_path = os.path.join(args.output, args.media)
if os.path.isdir(media_path):
print("\nWhatsApp directory already exists in output directory. Skipping...", end="\n")
print(
"\nWhatsApp directory already exists in output directory. Skipping...", end="\n")
else:
if args.move_media:
try:
@@ -737,9 +752,11 @@ def main():
# Extract media from backup if needed
if args.backup is not None:
if not os.path.isdir(args.media):
ios_media_handler.extract_media(args.backup, identifiers, args.decrypt_chunk_size)
ios_media_handler.extract_media(
args.backup, identifiers, args.decrypt_chunk_size)
else:
print("WhatsApp directory already exists, skipping WhatsApp file extraction.")
print(
"WhatsApp directory already exists, skipping WhatsApp file extraction.")
# Set default DB paths if not provided
if args.db is None:

View File

@@ -7,6 +7,7 @@ class Timing:
"""
Handles timestamp formatting with timezone support.
"""
def __init__(self, timezone_offset: Optional[int]) -> None:
"""
Initialize Timing object.
@@ -37,6 +38,7 @@ class TimeZone(tzinfo):
"""
Custom timezone class with fixed offset.
"""
def __init__(self, offset: int) -> None:
"""
Initialize TimeZone object.
@@ -151,6 +153,7 @@ class ChatStore:
"""
Stores chat information and messages.
"""
def __init__(self, type: str, name: Optional[str] = None, media: Optional[str] = None) -> None:
"""
Initialize ChatStore object.
@@ -266,10 +269,12 @@ class ChatStore:
# Merge messages
self._messages.update(other._messages)
class Message:
"""
Represents a single message in a chat.
"""
def __init__(
self,
*,
@@ -318,13 +323,15 @@ class Message:
self.mime = None
self.message_type = message_type
if isinstance(received_timestamp, (int, float)):
self.received_timestamp = timing.format_timestamp(received_timestamp, "%Y/%m/%d %H:%M")
self.received_timestamp = timing.format_timestamp(
received_timestamp, "%Y/%m/%d %H:%M")
elif isinstance(received_timestamp, str):
self.received_timestamp = received_timestamp
else:
self.received_timestamp = None
if isinstance(read_timestamp, (int, float)):
self.read_timestamp = timing.format_timestamp(read_timestamp, "%Y/%m/%d %H:%M")
self.read_timestamp = timing.format_timestamp(
read_timestamp, "%Y/%m/%d %H:%M")
elif isinstance(read_timestamp, str):
self.read_timestamp = read_timestamp
else:
@@ -363,13 +370,13 @@ class Message:
@classmethod
def from_json(cls, data: Dict) -> 'Message':
message = cls(
from_me = data["from_me"],
timestamp = data["timestamp"],
time = data["time"],
key_id = data["key_id"],
message_type = data.get("message_type"),
received_timestamp = data.get("received_timestamp"),
read_timestamp = data.get("read_timestamp")
from_me=data["from_me"],
timestamp=data["timestamp"],
time=data["time"],
key_id=data["key_id"],
message_type=data.get("message_type"),
received_timestamp=data.get("received_timestamp"),
read_timestamp=data.get("read_timestamp")
)
message.media = data.get("media")
message.meta = data.get("meta")

View File

@@ -18,6 +18,7 @@ except ImportError:
# < Python 3.11
# This should be removed when the support for Python 3.10 ends. (31 Oct 2026)
from enum import Enum
class StrEnum(str, Enum):
pass
@@ -155,7 +156,8 @@ def check_update():
else:
with raw:
package_info = json.load(raw)
latest_version = tuple(map(int, package_info["info"]["version"].split(".")))
latest_version = tuple(
map(int, package_info["info"]["version"].split(".")))
__version__ = importlib.metadata.version("whatsapp_chat_exporter")
current_version = tuple(map(int, __version__.split(".")))
if current_version < latest_version:
@@ -184,7 +186,7 @@ def rendering(
headline,
next=False,
previous=False
):
):
if chat.their_avatar_thumb is None and chat.their_avatar is not None:
their_avatar_thumb = chat.their_avatar
else:
@@ -256,7 +258,8 @@ def import_from_json(json_file: str, data: Dict[str, ChatStore]):
message.sticker = msg.get("sticker")
chat.add_message(id, message)
data[jid] = chat
print(f"Importing chats from JSON...({index + 1}/{total_row_number})", end="\r")
print(
f"Importing chats from JSON...({index + 1}/{total_row_number})", end="\r")
def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
@@ -284,14 +287,17 @@ def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_p
with open(source_path, 'rb') as src, open(target_path, 'wb') as dst:
dst.write(src.read())
else:
print(f"Merging '{json_file}' with existing file in target directory...")
print(
f"Merging '{json_file}' with existing file in target directory...")
with open(source_path, 'r') as src_file, open(target_path, 'r') as tgt_file:
source_data = json.load(src_file)
target_data = json.load(tgt_file)
# Parse JSON into ChatStore objects using from_json()
source_chats = {jid: ChatStore.from_json(chat) for jid, chat in source_data.items()}
target_chats = {jid: ChatStore.from_json(chat) for jid, chat in target_data.items()}
source_chats = {jid: ChatStore.from_json(
chat) for jid, chat in source_data.items()}
target_chats = {jid: ChatStore.from_json(
chat) for jid, chat in target_data.items()}
# Merge chats using merge_with()
for jid, chat in source_chats.items():
@@ -301,11 +307,13 @@ def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_p
target_chats[jid] = chat
# Serialize merged data
merged_data = {jid: chat.to_json() for jid, chat in target_chats.items()}
merged_data = {jid: chat.to_json()
for jid, chat in target_chats.items()}
# Check if the merged data differs from the original target data
if json.dumps(merged_data, sort_keys=True) != json.dumps(target_data, sort_keys=True):
print(f"Changes detected in '{json_file}', updating target file...")
print(
f"Changes detected in '{json_file}', updating target file...")
with open(target_path, 'w') as merged_file:
json.dump(
merged_data,
@@ -314,12 +322,14 @@ def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_p
ensure_ascii=not avoid_encoding_json,
)
else:
print(f"No changes detected in '{json_file}', skipping update.")
print(
f"No changes detected in '{json_file}', skipping update.")
# Merge media directories
source_media_path = os.path.join(source_dir, media_dir)
target_media_path = os.path.join(target_dir, media_dir)
print(f"Merging media directories. Source: {source_media_path}, target: {target_media_path}")
print(
f"Merging media directories. Source: {source_media_path}, target: {target_media_path}")
if os.path.exists(source_media_path):
for root, _, files in os.walk(source_media_path):
relative_path = os.path.relpath(root, source_media_path)
@@ -411,23 +421,29 @@ def get_chat_condition(filter: Optional[List[str]], include: bool, columns: List
if filter is not None:
conditions = []
if len(columns) < 2 and jid is not None:
raise ValueError("There must be at least two elements in argument columns if jid is not None")
raise ValueError(
"There must be at least two elements in argument columns if jid is not None")
if jid is not None:
if platform == "android":
is_group = f"{jid}.type == 1"
elif platform == "ios":
is_group = f"{jid} IS NOT NULL"
else:
raise ValueError("Only android and ios are supported for argument platform if jid is not None")
raise ValueError(
"Only android and ios are supported for argument platform if jid is not None")
for index, chat in enumerate(filter):
if include:
conditions.append(f"{' OR' if index > 0 else ''} {columns[0]} LIKE '%{chat}%'")
conditions.append(
f"{' OR' if index > 0 else ''} {columns[0]} LIKE '%{chat}%'")
if len(columns) > 1:
conditions.append(f" OR ({columns[1]} LIKE '%{chat}%' AND {is_group})")
conditions.append(
f" OR ({columns[1]} LIKE '%{chat}%' AND {is_group})")
else:
conditions.append(f"{' AND' if index > 0 else ''} {columns[0]} NOT LIKE '%{chat}%'")
conditions.append(
f"{' AND' if index > 0 else ''} {columns[0]} NOT LIKE '%{chat}%'")
if len(columns) > 1:
conditions.append(f" AND ({columns[1]} NOT LIKE '%{chat}%' AND {is_group})")
conditions.append(
f" AND ({columns[1]} NOT LIKE '%{chat}%' AND {is_group})")
return f"AND ({' '.join(conditions)})"
else:
return ""
@@ -539,7 +555,8 @@ def determine_metadata(content: sqlite3.Row, init_msg: Optional[str]) -> Optiona
elif content["action_type"] == 67:
return # (PM) this contact use secure service from Facebook???
elif content["action_type"] == 69:
return # (PM) this contact use secure service from Facebook??? What's the difference with 67????
# (PM) this contact use secure service from Facebook??? What's the difference with 67????
return
else:
return # Unsupported
return msg
@@ -566,7 +583,8 @@ def get_status_location(output_folder: str, offline_static: str) -> str:
w3css_path = os.path.join(static_folder, "w3.css")
if not os.path.isfile(w3css_path):
with urllib.request.urlopen(w3css) as resp:
with open(w3css_path, "wb") as f: f.write(resp.read())
with open(w3css_path, "wb") as f:
f.write(resp.read())
w3css = os.path.join(offline_static, "w3.css")
@@ -597,6 +615,7 @@ def setup_template(template: Optional[str], no_avatar: bool, experimental: bool
template_env.filters['sanitize_except'] = sanitize_except
return template_env.get_template(template_file)
# iOS Specific
APPLE_TIME = 978307200
@@ -617,24 +636,32 @@ def slugify(value: str, allow_unicode: bool = False) -> str:
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = unicodedata.normalize('NFKD', value).encode(
'ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
class WhatsAppIdentifier(StrEnum):
MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d" # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ChatStorage.sqlite
CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f" # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ContactsV2.sqlite
CALL = "1b432994e958845fffe8e2f190f26d1511534088" # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-CallHistory.sqlite
# AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ChatStorage.sqlite
MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d"
# AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ContactsV2.sqlite
CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f"
# AppDomainGroup-group.net.whatsapp.WhatsApp.shared-CallHistory.sqlite
CALL = "1b432994e958845fffe8e2f190f26d1511534088"
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsApp.shared"
class WhatsAppBusinessIdentifier(StrEnum):
MESSAGE = "724bd3b98b18518b455a87c1f3ac3a0d189c4466" # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ChatStorage.sqlite
CONTACT = "d7246a707f51ddf8b17ee2dddabd9e0a4da5c552" # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ContactsV2.sqlite
CALL = "b463f7c4365eefc5a8723930d97928d4e907c603" # AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-CallHistory.sqlite
# AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ChatStorage.sqlite
MESSAGE = "724bd3b98b18518b455a87c1f3ac3a0d189c4466"
# AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-ContactsV2.sqlite
CONTACT = "d7246a707f51ddf8b17ee2dddabd9e0a4da5c552"
# AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared-CallHistory.sqlite
CALL = "b463f7c4365eefc5a8723930d97928d4e907c603"
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared"
class JidType(IntEnum):
PM = 0
GROUP = 1

View File

@@ -292,7 +292,8 @@ def test_incremental_merge_existing_file_no_changes(mock_filesystem):
incremental_merge(source_dir, target_dir, media_dir, 2, True)
# Verify no write operations occurred on target file
write_calls = [call for call in mock_file.mock_calls if call[0] == "().write"]
write_calls = [
call for call in mock_file.mock_calls if call[0] == "().write"]
assert len(write_calls) == 0
@@ -333,4 +334,5 @@ def test_incremental_merge_media_copy(mock_filesystem):
assert (
mock_filesystem["makedirs"].call_count >= 2
) # At least target dir and media dir
assert mock_filesystem["copy2"].call_count == 2 # Two media files copied
# Two media files copied
assert mock_filesystem["copy2"].call_count == 2