This commit is contained in:
KnugiHK
2025-05-11 18:07:51 +08:00
parent cc410b8503
commit 33149075d3
14 changed files with 372 additions and 311 deletions

View File

@@ -452,7 +452,8 @@ def decrypt_android_backup(args) -> int:
elif "crypt15" in args.backup: elif "crypt15" in args.backup:
crypt = Crypt.CRYPT15 crypt = Crypt.CRYPT15
else: else:
logger.error(f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}") logger.error(
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
return 1 return 1
# Get key # Get key
@@ -505,11 +506,11 @@ def handle_decrypt_error(error: int) -> None:
"""Handle decryption errors with appropriate messages.""" """Handle decryption errors with appropriate messages."""
if error == 1: if error == 1:
logger.error("Dependencies of decrypt_backup and/or extract_encrypted_key" logger.error("Dependencies of decrypt_backup and/or extract_encrypted_key"
" are not present. For details, see README.md.\n") " are not present. For details, see README.md.\n")
exit(3) exit(3)
elif error == 2: elif error == 2:
logger.error("Failed when decompressing the decrypted backup. " logger.error("Failed when decompressing the decrypted backup. "
"Possibly incorrect offsets used in decryption.\n") "Possibly incorrect offsets used in decryption.\n")
exit(4) exit(4)
else: else:
logger.error("Unknown error occurred.\n") logger.error("Unknown error occurred.\n")
@@ -598,7 +599,7 @@ def handle_media_directory(args) -> None:
logger.info(f"Media directory has been moved to the output directory{CLEAR_LINE}") logger.info(f"Media directory has been moved to the output directory{CLEAR_LINE}")
except PermissionError: except PermissionError:
logger.warning("Cannot remove original WhatsApp directory. " logger.warning("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?\n") "Perhaps the directory is opened?\n")
else: else:
logger.info(f"Copying media directory...\r") logger.info(f"Copying media directory...\r")
shutil.copytree(args.media, media_path) shutil.copytree(args.media, media_path)

View File

@@ -121,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
) )
return db return db
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes: def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
"""Decrypt a crypt14 database using multithreading for brute-force offset detection. """Decrypt a crypt14 database using multithreading for brute-force offset detection.
@@ -194,7 +195,8 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
return db return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor: with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations} future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try: try:
for future in concurrent.futures.as_completed(future_to_offset): for future in concurrent.futures.as_completed(future_to_offset):
@@ -217,7 +219,6 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
raise OffsetNotFoundError("Could not find the correct offsets for decryption.") raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes: def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database. """Decrypt a crypt12 database.
@@ -319,7 +320,7 @@ def decrypt_backup(
if crypt is not Crypt.CRYPT15 and len(key) != 158: if crypt is not Crypt.CRYPT15 and len(key) != 158:
raise InvalidKeyError("The key file must be 158 bytes") raise InvalidKeyError("The key file must be 158 bytes")
#signature check, this is check is used in crypt 12 and 14 # signature check, this is check is used in crypt 12 and 14
if crypt != Crypt.CRYPT15: if crypt != Crypt.CRYPT15:
t1 = key[30:62] t1 = key[30:62]
@@ -329,7 +330,6 @@ def decrypt_backup(
if t1 != database[3:35] and crypt == Crypt.CRYPT12: if t1 != database[3:35] and crypt == Crypt.CRYPT12:
raise ValueError("The signature of key file and backup file mismatch") raise ValueError("The signature of key file and backup file mismatch")
if crypt == Crypt.CRYPT15: if crypt == Crypt.CRYPT15:
if keyfile_stream: if keyfile_stream:
main_key, hex_key = _extract_enc_key(key) main_key, hex_key = _extract_enc_key(key)
@@ -353,7 +353,6 @@ def decrypt_backup(
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e: except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
raise DecryptionError(f"Decryption failed: {e}") from e raise DecryptionError(f"Decryption failed: {e}") from e
if not dry_run: if not dry_run:
with open(output, "wb") as f: with open(output, "wb") as f:
f.write(db) f.write(db)

View File

@@ -22,24 +22,26 @@ logger = logging.getLogger(__name__)
def contacts(db, data, enrich_from_vcards): def contacts(db, data, enrich_from_vcards):
""" """
Process WhatsApp contacts from the database. Process WhatsApp contacts from the database.
Args: Args:
db: Database connection db: Database connection
data: Data store object data: Data store object
enrich_from_vcards: Path to vCard file for contact enrichment enrich_from_vcards: Path to vCard file for contact enrichment
Returns: Returns:
bool: False if no contacts found, True otherwise bool: False if no contacts found, True otherwise
""" """
c = db.cursor() c = db.cursor()
c.execute("SELECT count() FROM wa_contacts") c.execute("SELECT count() FROM wa_contacts")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
if total_row_number == 0: if total_row_number == 0:
if enrich_from_vcards is not None: if enrich_from_vcards is not None:
logger.info("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.") logger.info(
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
else: else:
logger.warning("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google") logger.warning(
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
return False return False
else: else:
logger.info(f"Processed {total_row_number} contacts\n") logger.info(f"Processed {total_row_number} contacts\n")
@@ -51,14 +53,14 @@ def contacts(db, data, enrich_from_vcards):
if row["status"] is not None: if row["status"] is not None:
current_chat.status = row["status"] current_chat.status = row["status"]
row = c.fetchone() row = c.fetchone()
return True return True
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty): def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty):
""" """
Process WhatsApp messages from the database. Process WhatsApp messages from the database.
Args: Args:
db: Database connection db: Database connection
data: Data store object data: Data store object
@@ -85,17 +87,17 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
i = 0 i = 0
# Fetch the first row safely # Fetch the first row safely
content = _fetch_row_safely(content_cursor) content = _fetch_row_safely(content_cursor)
while content is not None: while content is not None:
_process_single_message(data, content, table_message, timezone_offset) _process_single_message(data, content, table_message, timezone_offset)
i += 1 i += 1
if i % 1000 == 0: if i % 1000 == 0:
logger.info(f"Processing messages...({i}/{total_row_number})\r") logger.info(f"Processing messages...({i}/{total_row_number})\r")
# Fetch the next row safely # Fetch the next row safely
content = _fetch_row_safely(content_cursor) content = _fetch_row_safely(content_cursor)
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}") logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
@@ -106,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
try: try:
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM messages FROM messages
@@ -123,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message FROM message
@@ -146,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy database schema.""" """Get cursor for legacy database schema."""
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android") filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
cursor.execute(f"""SELECT messages.key_remote_jid, cursor.execute(f"""SELECT messages.key_remote_jid,
messages._id, messages._id,
@@ -209,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new database schema.""" """Get cursor for new database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid, cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
message._id, message._id,
@@ -292,19 +302,20 @@ def _process_single_message(data, content, table_message, timezone_offset):
"""Process a single message row.""" """Process a single message row."""
if content["key_remote_jid"] is None: if content["key_remote_jid"] is None:
return return
# Get or create the chat # Get or create the chat
if not data.get_chat(content["key_remote_jid"]): if not data.get_chat(content["key_remote_jid"]):
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"])) current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
Device.ANDROID, content["chat_subject"]))
else: else:
current_chat = data.get_chat(content["key_remote_jid"]) current_chat = data.get_chat(content["key_remote_jid"])
# Determine sender_jid_row_id # Determine sender_jid_row_id
if "sender_jid_row_id" in content: if "sender_jid_row_id" in content:
sender_jid_row_id = content["sender_jid_row_id"] sender_jid_row_id = content["sender_jid_row_id"]
else: else:
sender_jid_row_id = None sender_jid_row_id = None
# Create message object # Create message object
message = Message( message = Message(
from_me=not sender_jid_row_id and content["key_from_me"], from_me=not sender_jid_row_id and content["key_from_me"],
@@ -316,19 +327,19 @@ def _process_single_message(data, content, table_message, timezone_offset):
received_timestamp=content["received_timestamp"], received_timestamp=content["received_timestamp"],
read_timestamp=content["read_timestamp"] read_timestamp=content["read_timestamp"]
) )
# Handle binary data # Handle binary data
if isinstance(content["data"], bytes): if isinstance(content["data"], bytes):
_process_binary_message(message, content) _process_binary_message(message, content)
current_chat.add_message(content["_id"], message) current_chat.add_message(content["_id"], message)
return return
# Set sender for group chats # Set sender for group chats
if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0: if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0:
_set_group_sender(message, content, data, table_message) _set_group_sender(message, content, data, table_message)
else: else:
message.sender = None message.sender = None
# Handle quoted messages # Handle quoted messages
if content["quoted"] is not None: if content["quoted"] is not None:
message.reply = content["quoted"] message.reply = content["quoted"]
@@ -338,7 +349,7 @@ def _process_single_message(data, content, table_message, timezone_offset):
message.quoted_data = content["quoted_data"] message.quoted_data = content["quoted_data"]
else: else:
message.reply = None message.reply = None
# Handle message caption # Handle message caption
if not table_message and content["media_caption"] is not None: if not table_message and content["media_caption"] is not None:
# Old schema # Old schema
@@ -348,14 +359,14 @@ def _process_single_message(data, content, table_message, timezone_offset):
message.caption = content["data"] message.caption = content["data"]
else: else:
message.caption = None message.caption = None
# Handle message content based on status # Handle message content based on status
if content["status"] == 6: # 6 = Metadata if content["status"] == 6: # 6 = Metadata
_process_metadata_message(message, content, data, table_message) _process_metadata_message(message, content, data, table_message)
else: else:
# Real message # Real message
_process_regular_message(message, content, table_message) _process_regular_message(message, content, table_message)
current_chat.add_message(content["_id"], message) current_chat.add_message(content["_id"], message)
@@ -385,7 +396,7 @@ def _set_group_sender(message, content, data, table_message):
name = data.get_chat(content["remote_resource"]).name name = data.get_chat(content["remote_resource"]).name
if "@" in content["remote_resource"]: if "@" in content["remote_resource"]:
fallback = content["remote_resource"].split('@')[0] fallback = content["remote_resource"].split('@')[0]
message.sender = name or fallback message.sender = name or fallback
@@ -393,7 +404,7 @@ def _process_metadata_message(message, content, data, table_message):
"""Process metadata message.""" """Process metadata message."""
message.meta = True message.meta = True
name = fallback = None name = fallback = None
if table_message: if table_message:
if content["sender_jid_row_id"] > 0: if content["sender_jid_row_id"] > 0:
_jid = content["group_sender_jid"] _jid = content["group_sender_jid"]
@@ -412,12 +423,12 @@ def _process_metadata_message(message, content, data, table_message):
fallback = _jid.split('@')[0] fallback = _jid.split('@')[0]
else: else:
name = "You" name = "You"
message.data = determine_metadata(content, name or fallback) message.data = determine_metadata(content, name or fallback)
if isinstance(message.data, str) and "<br>" in message.data: if isinstance(message.data, str) and "<br>" in message.data:
message.safe = True message.safe = True
if message.data is None: if message.data is None:
if content["video_call"] is not None: # Missed call if content["video_call"] is not None: # Missed call
message.meta = True message.meta = True
@@ -433,7 +444,7 @@ def _process_metadata_message(message, content, data, table_message):
def _process_regular_message(message, content, table_message): def _process_regular_message(message, content, table_message):
"""Process regular (non-metadata) message.""" """Process regular (non-metadata) message."""
message.sticker = content["media_wa_type"] == 20 # Sticker is a message message.sticker = content["media_wa_type"] == 20 # Sticker is a message
if content["key_from_me"] == 1: if content["key_from_me"] == 1:
if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15: if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15:
msg = "Message deleted" msg = "Message deleted"
@@ -458,7 +469,7 @@ def _process_regular_message(message, content, table_message):
msg = content["data"] msg = content["data"]
if msg is not None: if msg is not None:
msg = _format_message_text(msg) msg = _format_message_text(msg)
message.data = msg message.data = msg
@@ -474,7 +485,7 @@ def _format_message_text(text):
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True): def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True):
""" """
Process WhatsApp media files from the database. Process WhatsApp media files from the database.
Args: Args:
db: Database connection db: Database connection
data: Data store object data: Data store object
@@ -487,28 +498,28 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
c = db.cursor() c = db.cursor()
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat) total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing media...(0/{total_row_number})\r") logger.info(f"Processing media...(0/{total_row_number})\r")
try: try:
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat) content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
except sqlite3.OperationalError: except sqlite3.OperationalError:
content_cursor = _get_media_cursor_new(c, filter_empty, filter_date, filter_chat) content_cursor = _get_media_cursor_new(c, filter_empty, filter_date, filter_chat)
content = content_cursor.fetchone() content = content_cursor.fetchone()
mime = MimeTypes() mime = MimeTypes()
# Ensure thumbnails directory exists # Ensure thumbnails directory exists
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True) Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
i = 0 i = 0
while content is not None: while content is not None:
_process_single_media(data, content, media_folder, mime, separate_media) _process_single_media(data, content, media_folder, mime, separate_media)
i += 1 i += 1
if i % 100 == 0: if i % 100 == 0:
logger.info(f"Processing media...({i}/{total_row_number})\r") logger.info(f"Processing media...({i}/{total_row_number})\r")
content = content_cursor.fetchone() content = content_cursor.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}") logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
@@ -519,8 +530,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
try: try:
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message_media FROM message_media
@@ -538,8 +551,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message_media FROM message_media
@@ -563,8 +578,10 @@ def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy media database schema.""" """Get cursor for legacy media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT messages.key_remote_jid, cursor.execute(f"""SELECT messages.key_remote_jid,
message_row_id, message_row_id,
@@ -596,8 +613,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new media database schema.""" """Get cursor for new media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid, cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
message_row_id, message_row_id,
@@ -633,10 +652,10 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
current_chat = data.get_chat(content["key_remote_jid"]) current_chat = data.get_chat(content["key_remote_jid"])
message = current_chat.get_message(content["message_row_id"]) message = current_chat.get_message(content["message_row_id"])
message.media = True message.media = True
if os.path.isfile(file_path): if os.path.isfile(file_path):
message.data = file_path message.data = file_path
# Set mime type # Set mime type
if content["mime_type"] is None: if content["mime_type"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
@@ -646,11 +665,11 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.mime = "application/octet-stream" message.mime = "application/octet-stream"
else: else:
message.mime = content["mime_type"] message.mime = content["mime_type"]
# Copy media to separate folder if needed # Copy media to separate folder if needed
if separate_media: if separate_media:
chat_display_name = slugify(current_chat.name or message.sender chat_display_name = slugify(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0], True) or content["key_remote_jid"].split('@')[0], True)
current_filename = file_path.split("/")[-1] current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name) new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True) Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -661,7 +680,7 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
message.data = "The media is missing" message.data = "The media is missing"
message.mime = "media" message.mime = "media"
message.meta = True message.meta = True
# Handle thumbnail # Handle thumbnail
if content["thumbnail"] is not None: if content["thumbnail"] is not None:
thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png" thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png"
@@ -681,11 +700,11 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
total_row_number = len(rows) total_row_number = len(rows)
logger.info(f"Processing vCards...(0/{total_row_number})\r") logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory if it doesn't exist # Create vCards directory if it doesn't exist
path = os.path.join(media_folder, "vCards") path = os.path.join(media_folder, "vCards")
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows): for index, row in enumerate(rows):
_process_vcard_row(row, path, data) _process_vcard_row(row, path, data)
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
@@ -696,8 +715,10 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema.""" """Execute vCard query for modern WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
@@ -726,8 +747,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for legacy WhatsApp database schema.""" """Execute vCard query for legacy WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
@@ -760,11 +783,11 @@ def _process_vcard_row(row, path, data):
file_name = "".join(x for x in media_name if x.isalnum()) file_name = "".join(x for x in media_name if x.isalnum())
file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore') file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore')
file_path = os.path.join(path, f"{file_name}.vcf") file_path = os.path.join(path, f"{file_name}.vcf")
if not os.path.isfile(file_path): if not os.path.isfile(file_path):
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
f.write(row["vcard"]) f.write(row["vcard"])
message = data.get_chat(row["key_remote_jid"]).get_message(row["message_row_id"]) message = data.get_chat(row["key_remote_jid"]).get_message(row["message_row_id"])
message.data = "This media include the following vCard file(s):<br>" \ message.data = "This media include the following vCard file(s):<br>" \
f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>' f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>'
@@ -776,26 +799,26 @@ def _process_vcard_row(row, path, data):
def calls(db, data, timezone_offset, filter_chat): def calls(db, data, timezone_offset, filter_chat):
"""Process call logs from WhatsApp database.""" """Process call logs from WhatsApp database."""
c = db.cursor() c = db.cursor()
# Check if there are any calls that match the filter # Check if there are any calls that match the filter
total_row_number = _get_calls_count(c, filter_chat) total_row_number = _get_calls_count(c, filter_chat)
if total_row_number == 0: if total_row_number == 0:
return return
logger.info(f"Processing calls...({total_row_number})\r") logger.info(f"Processing calls...({total_row_number})\r")
# Fetch call data # Fetch call data
calls_data = _fetch_calls_data(c, filter_chat) calls_data = _fetch_calls_data(c, filter_chat)
# Create a chat store for all calls # Create a chat store for all calls
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call # Process each call
content = calls_data.fetchone() content = calls_data.fetchone()
while content is not None: while content is not None:
_process_call_record(content, chat, data, timezone_offset) _process_call_record(content, chat, data, timezone_offset)
content = calls_data.fetchone() content = calls_data.fetchone()
# Add the calls chat to the data # Add the calls chat to the data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}") logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
@@ -861,7 +884,7 @@ def _process_call_record(content, chat, data, timezone_offset):
received_timestamp=None, # TODO: Add timestamp received_timestamp=None, # TODO: Add timestamp
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
) )
# Get caller/callee name # Get caller/callee name
_jid = content["raw_string"] _jid = content["raw_string"]
name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None
@@ -870,13 +893,13 @@ def _process_call_record(content, chat, data, timezone_offset):
else: else:
fallback = None fallback = None
call.sender = name or fallback call.sender = name or fallback
# Set metadata # Set metadata
call.meta = True call.meta = True
# Construct call description based on call type and result # Construct call description based on call type and result
call.data = _construct_call_description(content, call) call.data = _construct_call_description(content, call)
# Add call to chat # Add call to chat
chat.add_message(content["_id"], call) chat.add_message(content["_id"], call)
@@ -888,7 +911,7 @@ def _construct_call_description(content, call):
f"call {'to' if call.from_me else 'from'} " f"call {'to' if call.from_me else 'from'} "
f"{call.sender} was " f"{call.sender} was "
) )
if content['call_result'] in (0, 4, 7): if content['call_result'] in (0, 4, 7):
description += "cancelled." if call.from_me else "missed." description += "cancelled." if call.from_me else "missed."
elif content['call_result'] == 2: elif content['call_result'] == 2:
@@ -904,21 +927,21 @@ def _construct_call_description(content, call):
) )
else: else:
description += "in an unknown state." description += "in an unknown state."
return description return description
def create_html( def create_html(
data, data,
output_folder, output_folder,
template=None, template=None,
embedded=False, embedded=False,
offline_static=False, offline_static=False,
maximum_size=None, maximum_size=None,
no_avatar=False, no_avatar=False,
experimental=False, experimental=False,
headline=None headline=None
): ):
"""Generate HTML chat files from data.""" """Generate HTML chat files from data."""
template = setup_template(template, no_avatar, experimental) template = setup_template(template, no_avatar, experimental)
@@ -936,33 +959,33 @@ def create_html(
if len(current_chat) == 0: if len(current_chat) == 0:
# Skip empty chats # Skip empty chats
continue continue
safe_file_name, name = get_file_name(contact, current_chat) safe_file_name, name = get_file_name(contact, current_chat)
if maximum_size is not None: if maximum_size is not None:
_generate_paginated_chat( _generate_paginated_chat(
current_chat, current_chat,
safe_file_name, safe_file_name,
name, name,
contact, contact,
output_folder, output_folder,
template, template,
w3css, w3css,
maximum_size, maximum_size,
headline headline
) )
else: else:
_generate_single_chat( _generate_single_chat(
current_chat, current_chat,
safe_file_name, safe_file_name,
name, name,
contact, contact,
output_folder, output_folder,
template, template,
w3css, w3css,
headline headline
) )
if current % 10 == 0: if current % 10 == 0:
logger.info(f"Generating chats...({current}/{total_row_number})\r") logger.info(f"Generating chats...({current}/{total_row_number})\r")
@@ -990,20 +1013,20 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
current_size = 0 current_size = 0
current_page = 1 current_page = 1
render_box = [] render_box = []
# Use default maximum size if set to 0 # Use default maximum size if set to 0
if maximum_size == 0: if maximum_size == 0:
maximum_size = MAX_SIZE maximum_size = MAX_SIZE
last_msg = current_chat.get_last_message().key_id last_msg = current_chat.get_last_message().key_id
for message in current_chat.values(): for message in current_chat.values():
# Calculate message size # Calculate message size
if message.data is not None and not message.meta and not message.media: if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE current_size += len(message.data) + ROW_SIZE
else: else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size: if current_size > maximum_size:
# Create a new page # Create a new page
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html" output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
@@ -1047,25 +1070,25 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
def create_txt(data, output): def create_txt(data, output):
"""Generate text files from chat data.""" """Generate text files from chat data."""
os.makedirs(output, exist_ok=True) os.makedirs(output, exist_ok=True)
for jik, chat in data.items(): for jik, chat in data.items():
if len(chat) == 0: if len(chat) == 0:
continue continue
# Determine file name # Determine file name
if chat.name is not None: if chat.name is not None:
contact = chat.name.replace('/', '') contact = chat.name.replace('/', '')
else: else:
contact = jik.replace('+', '') contact = jik.replace('+', '')
output_file = os.path.join(output, f"{contact}.txt") output_file = os.path.join(output, f"{contact}.txt")
with open(output_file, "w", encoding="utf8") as f: with open(output_file, "w", encoding="utf8") as f:
for message in chat.values(): for message in chat.values():
# Skip metadata in text format # Skip metadata in text format
if message.meta and message.mime != "media": if message.meta and message.mime != "media":
continue continue
# Format the message # Format the message
formatted_message = _format_message_for_txt(message, contact) formatted_message = _format_message_for_txt(message, contact)
f.write(f"{formatted_message}\n") f.write(f"{formatted_message}\n")
@@ -1074,16 +1097,16 @@ def create_txt(data, output):
def _format_message_for_txt(message, contact): def _format_message_for_txt(message, contact):
"""Format a message for text output.""" """Format a message for text output."""
date = datetime.fromtimestamp(message.timestamp).date() date = datetime.fromtimestamp(message.timestamp).date()
# Determine the sender name # Determine the sender name
if message.from_me: if message.from_me:
name = "You" name = "You"
else: else:
name = message.sender if message.sender else contact name = message.sender if message.sender else contact
prefix = f"[{date} {message.time}] {name}: " prefix = f"[{date} {message.time}] {name}: "
prefix_length = len(prefix) prefix_length = len(prefix)
# Handle different message types # Handle different message types
if message.media and ("/" in message.mime or message.mime == "media"): if message.media and ("/" in message.mime or message.mime == "media"):
if message.data == "The media is missing": if message.data == "The media is missing":
@@ -1095,9 +1118,9 @@ def _format_message_for_txt(message, contact):
message_text = "" message_text = ""
else: else:
message_text = message.data.replace('<br>', f'\n{" " * prefix_length}') message_text = message.data.replace('<br>', f'\n{" " * prefix_length}')
# Add caption if present # Add caption if present
if message.caption is not None: if message.caption is not None:
message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}') message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}')
return f"{prefix}{message_text}" return f"{prefix}{message_text}"

View File

@@ -24,31 +24,32 @@ import struct
import codecs import codecs
from datetime import datetime, timedelta from datetime import datetime, timedelta
class BPListWriter(object): class BPListWriter(object):
def __init__(self, objects): def __init__(self, objects):
self.bplist = "" self.bplist = ""
self.objects = objects self.objects = objects
def binary(self): def binary(self):
'''binary -> string '''binary -> string
Generates bplist Generates bplist
''' '''
self.data = 'bplist00' self.data = 'bplist00'
# TODO: flatten objects and count max length size # TODO: flatten objects and count max length size
# TODO: write objects and save offsets # TODO: write objects and save offsets
# TODO: write offsets # TODO: write offsets
# TODO: write metadata # TODO: write metadata
return self.data return self.data
def write(self, filename): def write(self, filename):
''' '''
Writes bplist to file Writes bplist to file
''' '''
if self.bplist != "": if self.bplist != "":
@@ -57,18 +58,19 @@ class BPListWriter(object):
else: else:
raise Exception('BPlist not yet generated') raise Exception('BPlist not yet generated')
class BPListReader(object): class BPListReader(object):
def __init__(self, s): def __init__(self, s):
self.data = s self.data = s
self.objects = [] self.objects = []
self.resolved = {} self.resolved = {}
def __unpackIntStruct(self, sz, s): def __unpackIntStruct(self, sz, s):
'''__unpackIntStruct(size, string) -> int '''__unpackIntStruct(size, string) -> int
Unpacks the integer of given size (1, 2 or 4 bytes) from string Unpacks the integer of given size (1, 2 or 4 bytes) from string
''' '''
if sz == 1: if sz == 1:
ot = '!B' ot = '!B'
elif sz == 2: elif sz == 2:
ot = '!H' ot = '!H'
@@ -79,17 +81,17 @@ class BPListReader(object):
else: else:
raise Exception('int unpack size '+str(sz)+' unsupported') raise Exception('int unpack size '+str(sz)+' unsupported')
return struct.unpack(ot, s)[0] return struct.unpack(ot, s)[0]
def __unpackInt(self, offset): def __unpackInt(self, offset):
'''__unpackInt(offset) -> int '''__unpackInt(offset) -> int
Unpacks int field from plist at given offset Unpacks int field from plist at given offset
''' '''
return self.__unpackIntMeta(offset)[1] return self.__unpackIntMeta(offset)[1]
def __unpackIntMeta(self, offset): def __unpackIntMeta(self, offset):
'''__unpackIntMeta(offset) -> (size, int) '''__unpackIntMeta(offset) -> (size, int)
Unpacks int field from plist at given offset and returns its size and value Unpacks int field from plist at given offset and returns its size and value
''' '''
obj_header = self.data[offset] obj_header = self.data[offset]
@@ -99,7 +101,7 @@ class BPListReader(object):
def __resolveIntSize(self, obj_info, offset): def __resolveIntSize(self, obj_info, offset):
'''__resolveIntSize(obj_info, offset) -> (count, offset) '''__resolveIntSize(obj_info, offset) -> (count, offset)
Calculates count of objref* array entries and returns count and offset to first element Calculates count of objref* array entries and returns count and offset to first element
''' '''
if obj_info == 0x0F: if obj_info == 0x0F:
@@ -112,10 +114,10 @@ class BPListReader(object):
def __unpackFloatStruct(self, sz, s): def __unpackFloatStruct(self, sz, s):
'''__unpackFloatStruct(size, string) -> float '''__unpackFloatStruct(size, string) -> float
Unpacks the float of given size (4 or 8 bytes) from string Unpacks the float of given size (4 or 8 bytes) from string
''' '''
if sz == 4: if sz == 4:
ot = '!f' ot = '!f'
elif sz == 8: elif sz == 8:
ot = '!d' ot = '!d'
@@ -125,7 +127,7 @@ class BPListReader(object):
def __unpackFloat(self, offset): def __unpackFloat(self, offset):
'''__unpackFloat(offset) -> float '''__unpackFloat(offset) -> float
Unpacks float field from plist at given offset Unpacks float field from plist at given offset
''' '''
obj_header = self.data[offset] obj_header = self.data[offset]
@@ -135,70 +137,79 @@ class BPListReader(object):
def __unpackDate(self, offset): def __unpackDate(self, offset):
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0]) td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td) return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
def __unpackItem(self, offset): def __unpackItem(self, offset):
'''__unpackItem(offset) '''__unpackItem(offset)
Unpacks and returns an item from plist Unpacks and returns an item from plist
''' '''
obj_header = self.data[offset] obj_header = self.data[offset]
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F) obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
if obj_type == 0x00: if obj_type == 0x00:
if obj_info == 0x00: # null 0000 0000 if obj_info == 0x00: # null 0000 0000
return None return None
elif obj_info == 0x08: # bool 0000 1000 // false elif obj_info == 0x08: # bool 0000 1000 // false
return False return False
elif obj_info == 0x09: # bool 0000 1001 // true elif obj_info == 0x09: # bool 0000 1001 // true
return True return True
elif obj_info == 0x0F: # fill 0000 1111 // fill byte elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
else: else:
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed') raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackInt(offset) return self.__unpackInt(offset)
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackFloat(offset) return self.__unpackFloat(offset)
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
return self.__unpackDate(offset) return self.__unpackDate(offset)
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
elif obj_type == 0x40:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] # XXX: we return data as str return self.data[objref:objref+obj_count] # XXX: we return data as str
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
elif obj_type == 0x50:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] return self.data[objref:objref+obj_count]
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
elif obj_type == 0x60:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count*2].decode('utf-16be') return self.data[objref:objref+obj_count*2].decode('utf-16be')
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
# FIXME: Accept as a string for now # FIXME: Accept as a string for now
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] return self.data[objref:objref+obj_count]
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xA0:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
arr = [] arr = []
for i in range(obj_count): for i in range(obj_count):
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) arr.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
return arr return arr
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xC0:
# XXX: not serializable via apple implementation # XXX: not serializable via apple implementation
raise Exception("0xC0 Not Implemented") # FIXME: implement raise Exception("0xC0 Not Implemented") # FIXME: implement
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xD0:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
keys = [] keys = []
for i in range(obj_count): for i in range(obj_count):
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) keys.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values = [] values = []
objref += obj_count*self.object_ref_size objref += obj_count*self.object_ref_size
for i in range(obj_count): for i in range(obj_count):
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) values.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
dic = {} dic = {}
for i in range(obj_count): for i in range(obj_count):
dic[keys[i]] = values[i] dic[keys[i]] = values[i]
return dic return dic
else: else:
raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset)) raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset))
def __resolveObject(self, idx): def __resolveObject(self, idx):
try: try:
return self.resolved[idx] return self.resolved[idx]
@@ -212,7 +223,7 @@ class BPListReader(object):
return newArr return newArr
if type(obj) == dict: if type(obj) == dict:
newDic = {} newDic = {}
for k,v in obj.items(): for k, v in obj.items():
key_resolved = self.__resolveObject(k) key_resolved = self.__resolveObject(k)
if isinstance(key_resolved, str): if isinstance(key_resolved, str):
rk = key_resolved rk = key_resolved
@@ -225,15 +236,16 @@ class BPListReader(object):
else: else:
self.resolved[idx] = obj self.resolved[idx] = obj
return obj return obj
def parse(self): def parse(self):
# read header # read header
if self.data[:8] != b'bplist00': if self.data[:8] != b'bplist00':
raise Exception('Bad magic') raise Exception('Bad magic')
# read trailer # read trailer
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:]) self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset '!6xBB4xI4xI4xI', self.data[-32:])
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
# read offset table # read offset table
self.offset_table = self.data[self.table_offset:-32] self.offset_table = self.data[self.table_offset:-32]
@@ -243,40 +255,45 @@ class BPListReader(object):
offset_entry = ot[:self.offset_size] offset_entry = ot[:self.offset_size]
ot = ot[self.offset_size:] ot = ot[self.offset_size:]
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry)) self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
#print "** plist offsets:",self.offsets # print "** plist offsets:",self.offsets
# read object table # read object table
self.objects = [] self.objects = []
k = 0 k = 0
for i in self.offsets: for i in self.offsets:
obj = self.__unpackItem(i) obj = self.__unpackItem(i)
#print "** plist unpacked",k,type(obj),obj,"at",i # print "** plist unpacked",k,type(obj),obj,"at",i
k += 1 k += 1
self.objects.append(obj) self.objects.append(obj)
# rebuild object tree # rebuild object tree
#for i in range(len(self.objects)): # for i in range(len(self.objects)):
# self.__resolveObject(i) # self.__resolveObject(i)
# return root object # return root object
return self.__resolveObject(self.top_object) return self.__resolveObject(self.top_object)
@classmethod @classmethod
def plistWithString(cls, s): def plistWithString(cls, s):
parser = cls(s) parser = cls(s)
return parser.parse() return parser.parse()
# helpers for testing # helpers for testing
def plist(obj): def plist(obj):
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0 from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None) b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(
obj, NSPropertyListBinaryFormat_v1_0, 0, None)
return str(b.bytes()) return str(b.bytes())
def unplist(s): def unplist(s):
from Foundation import NSData, NSPropertyListSerialization from Foundation import NSData, NSPropertyListSerialization
d = NSData.dataWithBytes_length_(s, len(s)) d = NSData.dataWithBytes_length_(s, len(s))
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None) return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
if __name__ == "__main__": if __name__ == "__main__":
import os import os
import sys import sys

View File

@@ -14,12 +14,12 @@ logger = logging.getLogger(__name__)
def messages(path, data, assume_first_as_me=False): def messages(path, data, assume_first_as_me=False):
""" """
Extracts messages from an exported WhatsApp chat file. Extracts messages from an exported WhatsApp chat file.
Args: Args:
path: Path to the exported chat file path: Path to the exported chat file
data: Data container object to store the parsed chat data: Data container object to store the parsed chat
assume_first_as_me: If True, assumes the first message is sent from the user without asking assume_first_as_me: If True, assumes the first message is sent from the user without asking
Returns: Returns:
Updated data container with extracted messages Updated data container with extracted messages
""" """
@@ -27,16 +27,16 @@ def messages(path, data, assume_first_as_me=False):
chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED)) chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED))
you = "" # Will store the username of the current user you = "" # Will store the username of the current user
user_identification_done = False # Flag to track if user identification has been done user_identification_done = False # Flag to track if user identification has been done
# First pass: count total lines for progress reporting # First pass: count total lines for progress reporting
with open(path, "r", encoding="utf8") as file: with open(path, "r", encoding="utf8") as file:
total_row_number = sum(1 for _ in file) total_row_number = sum(1 for _ in file)
# Second pass: process the messages # Second pass: process the messages
with open(path, "r", encoding="utf8") as file: with open(path, "r", encoding="utf8") as file:
for index, line in enumerate(file): for index, line in enumerate(file):
you, user_identification_done = process_line( you, user_identification_done = process_line(
line, index, chat, path, you, line, index, chat, path, you,
assume_first_as_me, user_identification_done assume_first_as_me, user_identification_done
) )
@@ -51,31 +51,31 @@ def messages(path, data, assume_first_as_me=False):
def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done): def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done):
""" """
Process a single line from the chat file Process a single line from the chat file
Returns: Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag) Tuple of (updated_you_value, updated_user_identification_done_flag)
""" """
parts = line.split(" - ", 1) parts = line.split(" - ", 1)
# Check if this is a new message (has timestamp format) # Check if this is a new message (has timestamp format)
if len(parts) > 1: if len(parts) > 1:
time = parts[0] time = parts[0]
you, user_identification_done = process_new_message( you, user_identification_done = process_new_message(
time, parts[1], index, chat, you, file_path, time, parts[1], index, chat, you, file_path,
assume_first_as_me, user_identification_done assume_first_as_me, user_identification_done
) )
else: else:
# This is a continuation of the previous message # This is a continuation of the previous message
process_message_continuation(line, index, chat) process_message_continuation(line, index, chat)
return you, user_identification_done return you, user_identification_done
def process_new_message(time, content, index, chat, you, file_path, def process_new_message(time, content, index, chat, you, file_path,
assume_first_as_me, user_identification_done): assume_first_as_me, user_identification_done):
""" """
Process a line that contains a new message Process a line that contains a new message
Returns: Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag) Tuple of (updated_you_value, updated_user_identification_done_flag)
""" """
@@ -88,7 +88,7 @@ def process_new_message(time, content, index, chat, you, file_path,
received_timestamp=None, received_timestamp=None,
read_timestamp=None read_timestamp=None
) )
# Check if this is a system message (no name:message format) # Check if this is a system message (no name:message format)
if ":" not in content: if ":" not in content:
msg.data = content msg.data = content
@@ -96,7 +96,7 @@ def process_new_message(time, content, index, chat, you, file_path,
else: else:
# Process user message # Process user message
name, message = content.strip().split(":", 1) name, message = content.strip().split(":", 1)
# Handle user identification # Handle user identification
if you == "": if you == "":
if chat.name is None: if chat.name is None:
@@ -113,17 +113,17 @@ def process_new_message(time, content, index, chat, you, file_path,
# If we know the chat name, anyone else must be "you" # If we know the chat name, anyone else must be "you"
if name != chat.name: if name != chat.name:
you = name you = name
# Set the chat name if needed # Set the chat name if needed
if chat.name is None and name != you: if chat.name is None and name != you:
chat.name = name chat.name = name
# Determine if this message is from the current user # Determine if this message is from the current user
msg.from_me = (name == you) msg.from_me = (name == you)
# Process message content # Process message content
process_message_content(msg, message, file_path) process_message_content(msg, message, file_path)
chat.add_message(index, msg) chat.add_message(index, msg)
return you, user_identification_done return you, user_identification_done
@@ -144,11 +144,11 @@ def process_attached_file(msg, message, file_path):
"""Process an attached file in a message""" """Process an attached file in a message"""
mime = MimeTypes() mime = MimeTypes()
msg.media = True msg.media = True
# Extract file path and check if it exists # Extract file path and check if it exists
file_name = message.split("(file attached)")[0].strip() file_name = message.split("(file attached)")[0].strip()
attached_file_path = os.path.join(os.path.dirname(file_path), file_name) attached_file_path = os.path.join(os.path.dirname(file_path), file_name)
if os.path.isfile(attached_file_path): if os.path.isfile(attached_file_path):
msg.data = attached_file_path msg.data = attached_file_path
guess = mime.guess_type(attached_file_path)[0] guess = mime.guess_type(attached_file_path)[0]
@@ -165,9 +165,9 @@ def process_message_continuation(line, index, chat):
lookback = index - 1 lookback = index - 1
while lookback not in chat.keys(): while lookback not in chat.keys():
lookback -= 1 lookback -= 1
msg = chat.get_message(lookback) msg = chat.get_message(lookback)
# Add the continuation line to the message # Add the continuation line to the message
if msg.media: if msg.media:
msg.caption = line.strip() msg.caption = line.strip()
@@ -182,4 +182,4 @@ def prompt_for_user_identification(name):
if ans == "y": if ans == "y":
return name return name
elif ans == "n": elif ans == "n":
return "" return ""

View File

@@ -21,14 +21,14 @@ def contacts(db, data):
c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Pre-processing contacts...({total_row_number})\r") logger.info(f"Pre-processing contacts...({total_row_number})\r")
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
zwhatsapp_id = content["ZWHATSAPPID"] zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"): if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net" zwhatsapp_id += "@s.whatsapp.net"
current_chat = ChatStore(Device.IOS) current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"] current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat) data.add_chat(zwhatsapp_id, current_chat)
@@ -40,7 +40,7 @@ def process_contact_avatars(current_chat, media_folder, contact_id):
"""Process and assign avatar images for a contact.""" """Process and assign avatar images for a contact."""
path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}' path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}'
avatars = glob(f"{path}*") avatars = glob(f"{path}*")
if 0 < len(avatars) <= 1: if 0 < len(avatars) <= 1:
current_chat.their_avatar = avatars[0] current_chat.their_avatar = avatars[0]
else: else:
@@ -64,12 +64,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
"""Process WhatsApp messages and contacts from the database.""" """Process WhatsApp messages and contacts from the database."""
c = db.cursor() c = db.cursor()
cursor2 = db.cursor() cursor2 = db.cursor()
# Build the chat filter conditions # Build the chat filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Process contacts first # Process contacts first
contact_query = f""" contact_query = f"""
SELECT count() SELECT count()
@@ -110,13 +112,13 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
GROUP BY ZCONTACTJID; GROUP BY ZCONTACTJID;
""" """
c.execute(contacts_query) c.execute(contacts_query)
# Process each contact # Process each contact
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
contact_name = get_contact_name(content) contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
# Add or update chat # Add or update chat
if contact_id not in data: if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder)) current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
@@ -124,11 +126,11 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
current_chat = data.get_chat(contact_id) current_chat = data.get_chat(contact_id)
current_chat.name = contact_name current_chat.name = contact_name
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg") current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
# Process avatar images # Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
content = c.fetchone() content = c.fetchone()
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}") logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
# Get message count # Get message count
@@ -147,7 +149,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
c.execute(message_count_query) c.execute(message_count_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Processing messages...(0/{total_row_number})\r") logger.info(f"Processing messages...(0/{total_row_number})\r")
# Fetch messages # Fetch messages
messages_query = f""" messages_query = f"""
SELECT ZCONTACTJID, SELECT ZCONTACTJID,
@@ -175,7 +177,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
ORDER BY ZMESSAGEDATE ASC; ORDER BY ZMESSAGEDATE ASC;
""" """
c.execute(messages_query) c.execute(messages_query)
# Process each message # Process each message
i = 0 i = 0
content = c.fetchone() content = c.fetchone()
@@ -183,14 +185,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
contact_id = content["ZCONTACTJID"] contact_id = content["ZCONTACTJID"]
message_pk = content["Z_PK"] message_pk = content["Z_PK"]
is_group_message = content["ZGROUPINFO"] is not None is_group_message = content["ZGROUPINFO"] is not None
# Ensure chat exists # Ensure chat exists
if contact_id not in data: if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS)) current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
process_contact_avatars(current_chat, media_folder, contact_id) process_contact_avatars(current_chat, media_folder, contact_id)
else: else:
current_chat = data.get_chat(contact_id) current_chat = data.get_chat(contact_id)
# Create message object # Create message object
ts = APPLE_TIME + content["ZMESSAGEDATE"] ts = APPLE_TIME + content["ZMESSAGEDATE"]
message = Message( message = Message(
@@ -203,14 +205,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None, received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp read_timestamp=None # TODO: Add timestamp
) )
# Process message data # Process message data
invalid = process_message_data(message, content, is_group_message, data, cursor2) invalid = process_message_data(message, content, is_group_message, data, cursor2)
# Add valid messages to chat # Add valid messages to chat
if not invalid: if not invalid:
current_chat.add_message(message_pk, message) current_chat.add_message(message_pk, message)
# Update progress # Update progress
i += 1 i += 1
if i % 1000 == 0: if i % 1000 == 0:
@@ -236,11 +238,11 @@ def process_message_data(message, content, is_group_message, data, cursor2):
message.sender = name or fallback message.sender = name or fallback
else: else:
message.sender = None message.sender = None
# Handle metadata messages # Handle metadata messages
if content["ZMESSAGETYPE"] == 6: if content["ZMESSAGETYPE"] == 6:
return process_metadata_message(message, content, is_group_message) return process_metadata_message(message, content, is_group_message)
# Handle quoted replies # Handle quoted replies
if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14") and False: if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14") and False:
quoted = content["ZMETADATA"][2:19] quoted = content["ZMETADATA"][2:19]
@@ -250,17 +252,17 @@ def process_message_data(message, content, is_group_message, data, cursor2):
WHERE ZSTANZAID LIKE '{message.reply}%'""") WHERE ZSTANZAID LIKE '{message.reply}%'""")
quoted_content = cursor2.fetchone() quoted_content = cursor2.fetchone()
if quoted_content and "ZTEXT" in quoted_content: if quoted_content and "ZTEXT" in quoted_content:
message.quoted_data = quoted_content["ZTEXT"] message.quoted_data = quoted_content["ZTEXT"]
else: else:
message.quoted_data = None message.quoted_data = None
# Handle stickers # Handle stickers
if content["ZMESSAGETYPE"] == 15: if content["ZMESSAGETYPE"] == 15:
message.sticker = True message.sticker = True
# Process message text # Process message text
process_message_text(message, content) process_message_text(message, content)
return False # Message is valid return False # Message is valid
@@ -305,19 +307,21 @@ def process_message_text(message, content):
msg = content["ZTEXT"] msg = content["ZTEXT"]
if msg is not None: if msg is not None:
msg = msg.replace("\r\n", "<br>").replace("\n", "<br>") msg = msg.replace("\r\n", "<br>").replace("\n", "<br>")
message.data = msg message.data = msg
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False): def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False):
"""Process media files from WhatsApp messages.""" """Process media files from WhatsApp messages."""
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Get media count # Get media count
media_count_query = f""" media_count_query = f"""
SELECT count() SELECT count()
@@ -336,7 +340,7 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
c.execute(media_count_query) c.execute(media_count_query)
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
logger.info(f"Processing media...(0/{total_row_number})\r") logger.info(f"Processing media...(0/{total_row_number})\r")
# Fetch media items # Fetch media items
media_query = f""" media_query = f"""
SELECT ZCONTACTJID, SELECT ZCONTACTJID,
@@ -360,14 +364,14 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
ORDER BY ZCONTACTJID ASC ORDER BY ZCONTACTJID ASC
""" """
c.execute(media_query) c.execute(media_query)
# Process each media item # Process each media item
mime = MimeTypes() mime = MimeTypes()
i = 0 i = 0
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
process_media_item(content, data, media_folder, mime, separate_media) process_media_item(content, data, media_folder, mime, separate_media)
# Update progress # Update progress
i += 1 i += 1
if i % 100 == 0: if i % 100 == 0:
@@ -382,23 +386,24 @@ def process_media_item(content, data, media_folder, mime, separate_media):
current_chat = data.get_chat(content["ZCONTACTJID"]) current_chat = data.get_chat(content["ZCONTACTJID"])
message = current_chat.get_message(content["ZMESSAGE"]) message = current_chat.get_message(content["ZMESSAGE"])
message.media = True message.media = True
if current_chat.media_base == "": if current_chat.media_base == "":
current_chat.media_base = media_folder + "/" current_chat.media_base = media_folder + "/"
if os.path.isfile(file_path): if os.path.isfile(file_path):
message.data = '/'.join(file_path.split("/")[1:]) message.data = '/'.join(file_path.split("/")[1:])
# Set MIME type # Set MIME type
if content["ZVCARDSTRING"] is None: if content["ZVCARDSTRING"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
message.mime = guess if guess is not None else "application/octet-stream" message.mime = guess if guess is not None else "application/octet-stream"
else: else:
message.mime = content["ZVCARDSTRING"] message.mime = content["ZVCARDSTRING"]
# Handle separate media option # Handle separate media option
if separate_media: if separate_media:
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True) chat_display_name = slugify(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
current_filename = file_path.split("/")[-1] current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name) new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True) Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -410,7 +415,7 @@ def process_media_item(content, data, media_folder, mime, separate_media):
message.data = "The media is missing" message.data = "The media is missing"
message.mime = "media" message.mime = "media"
message.meta = True message.meta = True
# Add caption if available # Add caption if available
if content["ZTITLE"] is not None: if content["ZTITLE"] is not None:
message.caption = content["ZTITLE"] message.caption = content["ZTITLE"]
@@ -419,12 +424,14 @@ def process_media_item(content, data, media_folder, mime, separate_media):
def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty): def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
"""Process vCard contacts from WhatsApp messages.""" """Process vCard contacts from WhatsApp messages."""
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Fetch vCard mentions # Fetch vCard mentions
vcard_query = f""" vcard_query = f"""
SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
@@ -450,7 +457,7 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
contents = c.fetchall() contents = c.fetchall()
total_row_number = len(contents) total_row_number = len(contents)
logger.info(f"Processing vCards...(0/{total_row_number})\r") logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory # Create vCards directory
path = f'{media_folder}/Message/vCards' path = f'{media_folder}/Message/vCards'
Path(path).mkdir(parents=True, exist_ok=True) Path(path).mkdir(parents=True, exist_ok=True)
@@ -484,9 +491,10 @@ def process_vcard_item(content, path, data):
f.write(vcard_string) f.write(vcard_string)
# Create vCard summary and update message # Create vCard summary and update message
vcard_summary = "This media include the following vCard file(s):<br>" vcard_summary = "This media include the following vCard file(s):<br>"
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)]) vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
fp in zip(vcard_names, file_paths)])
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"]) message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
message.data = vcard_summary message.data = vcard_summary
message.mime = "text/x-vcard" message.mime = "text/x-vcard"
@@ -498,11 +506,13 @@ def process_vcard_item(content, path, data):
def calls(db, data, timezone_offset, filter_chat): def calls(db, data, timezone_offset, filter_chat):
"""Process WhatsApp call records.""" """Process WhatsApp call records."""
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios") filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
# Get call count # Get call count
call_count_query = f""" call_count_query = f"""
SELECT count() SELECT count()
@@ -515,9 +525,9 @@ def calls(db, data, timezone_offset, filter_chat):
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
if total_row_number == 0: if total_row_number == 0:
return return
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n") logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
# Fetch call records # Fetch call records
calls_query = f""" calls_query = f"""
SELECT ZCALLIDSTRING, SELECT ZCALLIDSTRING,
@@ -538,16 +548,16 @@ def calls(db, data, timezone_offset, filter_chat):
{chat_filter_exclude} {chat_filter_exclude}
""" """
c.execute(calls_query) c.execute(calls_query)
# Create calls chat # Create calls chat
chat = ChatStore(Device.ANDROID, "WhatsApp Calls") chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call # Process each call
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
process_call_record(content, chat, data, timezone_offset) process_call_record(content, chat, data, timezone_offset)
content = c.fetchone() content = c.fetchone()
# Add calls chat to data # Add calls chat to data
data.add_chat("000000000000000", chat) data.add_chat("000000000000000", chat)
@@ -562,7 +572,7 @@ def process_call_record(content, chat, data, timezone_offset):
key_id=content["ZCALLIDSTRING"], key_id=content["ZCALLIDSTRING"],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET
) )
# Set sender info # Set sender info
_jid = content["ZGROUPCALLCREATORUSERJIDSTRING"] _jid = content["ZGROUPCALLCREATORUSERJIDSTRING"]
name = data.get_chat(_jid).name if _jid in data else None name = data.get_chat(_jid).name if _jid in data else None
@@ -571,11 +581,11 @@ def process_call_record(content, chat, data, timezone_offset):
else: else:
fallback = None fallback = None
call.sender = name or fallback call.sender = name or fallback
# Set call metadata # Set call metadata
call.meta = True call.meta = True
call.data = format_call_data(call, content) call.data = format_call_data(call, content)
# Add call to chat # Add call to chat
chat.add_message(call.key_id, call) chat.add_message(call.key_id, call)
@@ -589,7 +599,7 @@ def format_call_data(call, content):
f"call {'to' if call.from_me else 'from'} " f"call {'to' if call.from_me else 'from'} "
f"{call.sender} was " f"{call.sender} was "
) )
# Call outcome # Call outcome
if content['ZOUTCOME'] in (1, 4): if content['ZOUTCOME'] in (1, 4):
call_data += "not answered." if call.from_me else "missed." call_data += "not answered." if call.from_me else "missed."
@@ -604,5 +614,5 @@ def format_call_data(call, content):
) )
else: else:
call_data += "in an unknown state." call_data += "in an unknown state."
return call_data return call_data

View File

@@ -18,6 +18,7 @@ else:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class BackupExtractor: class BackupExtractor:
""" """
A class to handle the extraction of WhatsApp data from iOS backups, A class to handle the extraction of WhatsApp data from iOS backups,
@@ -61,9 +62,9 @@ class BackupExtractor:
""" """
if not support_encrypted: if not support_encrypted:
logger.error("You don't have the dependencies to handle encrypted backup." logger.error("You don't have the dependencies to handle encrypted backup."
"Read more on how to deal with encrypted backup:" "Read more on how to deal with encrypted backup:"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage" "https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
) )
return return
logger.info(f"Encryption detected on the backup!{CLEAR_LINE}") logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
@@ -116,12 +117,12 @@ class BackupExtractor:
exit(6) exit(6)
else: else:
logger.info(f"Done{CLEAR_LINE}") logger.info(f"Done{CLEAR_LINE}")
def _extract_decrypted_files(self): def _extract_decrypted_files(self):
"""Extract all WhatsApp files after decryption""" """Extract all WhatsApp files after decryption"""
def extract_progress_handler(file_id, domain, relative_path, n, total_files): def extract_progress_handler(file_id, domain, relative_path, n, total_files):
if n % 100 == 0: if n % 100 == 0:
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r") logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
return True return True
self.backup.extract_files( self.backup.extract_files(
@@ -234,4 +235,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
""" """
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size) extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
extractor.extract() extractor.extract()

View File

@@ -33,6 +33,7 @@ CLEAR_LINE = "\x1b[K\n"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def convert_time_unit(time_second: int) -> str: def convert_time_unit(time_second: int) -> str:
"""Converts a time duration in seconds to a human-readable string. """Converts a time duration in seconds to a human-readable string.
@@ -168,7 +169,7 @@ def check_update():
"===============Update===============\n" "===============Update===============\n"
"A newer version of WhatsApp Chat Exporter is available.\n" "A newer version of WhatsApp Chat Exporter is available.\n"
f"Current version: {__version__}\n" f"Current version: {__version__}\n"
f"Latest version: {package_info['info']['version']}\n" f"Latest version: {package_info['info']['version']}\n"
) )
if platform == "win32": if platform == "win32":
logger.info("Update with: pip install --upgrade whatsapp-chat-exporter\n") logger.info("Update with: pip install --upgrade whatsapp-chat-exporter\n")

View File

@@ -6,19 +6,20 @@ Contributed by @magpires https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issue
import re import re
import argparse import argparse
def process_phone_number(raw_phone): def process_phone_number(raw_phone):
""" """
Process the raw phone string from the VCARD and return two formatted numbers: Process the raw phone string from the VCARD and return two formatted numbers:
- The original formatted number, and - The original formatted number, and
- A modified formatted number with the extra (ninth) digit removed, if applicable. - A modified formatted number with the extra (ninth) digit removed, if applicable.
Desired output: Desired output:
For a number with a 9-digit subscriber: For a number with a 9-digit subscriber:
Original: "+55 {area} {first 5 of subscriber}-{last 4 of subscriber}" Original: "+55 {area} {first 5 of subscriber}-{last 4 of subscriber}"
Modified: "+55 {area} {subscriber[1:5]}-{subscriber[5:]}" Modified: "+55 {area} {subscriber[1:5]}-{subscriber[5:]}"
For example, for an input that should represent "027912345678", the outputs are: For example, for an input that should represent "027912345678", the outputs are:
"+55 27 91234-5678" and "+55 27 1234-5678" "+55 27 91234-5678" and "+55 27 1234-5678"
This function handles numbers that may already include a "+55" prefix. This function handles numbers that may already include a "+55" prefix.
It expects that after cleaning, a valid number (without the country code) should have either 10 digits It expects that after cleaning, a valid number (without the country code) should have either 10 digits
(2 for area + 8 for subscriber) or 11 digits (2 for area + 9 for subscriber). (2 for area + 8 for subscriber) or 11 digits (2 for area + 9 for subscriber).
@@ -26,18 +27,18 @@ def process_phone_number(raw_phone):
""" """
# Store the original input for processing # Store the original input for processing
number_to_process = raw_phone.strip() number_to_process = raw_phone.strip()
# Remove all non-digit characters # Remove all non-digit characters
digits = re.sub(r'\D', '', number_to_process) digits = re.sub(r'\D', '', number_to_process)
# If the number starts with '55', remove it for processing # If the number starts with '55', remove it for processing
if digits.startswith("55") and len(digits) > 11: if digits.startswith("55") and len(digits) > 11:
digits = digits[2:] digits = digits[2:]
# Remove trunk zero if present # Remove trunk zero if present
if digits.startswith("0"): if digits.startswith("0"):
digits = digits[1:] digits = digits[1:]
# After cleaning, we expect a valid number to have either 10 or 11 digits # After cleaning, we expect a valid number to have either 10 or 11 digits
# If there are extra digits, use the last 11 (for a 9-digit subscriber) or last 10 (for an 8-digit subscriber) # If there are extra digits, use the last 11 (for a 9-digit subscriber) or last 10 (for an 8-digit subscriber)
if len(digits) > 11: if len(digits) > 11:
@@ -46,7 +47,7 @@ def process_phone_number(raw_phone):
elif len(digits) > 10 and len(digits) < 11: elif len(digits) > 10 and len(digits) < 11:
# In some cases with an 8-digit subscriber, take the last 10 digits # In some cases with an 8-digit subscriber, take the last 10 digits
digits = digits[-10:] digits = digits[-10:]
# Check if we have a valid number after processing # Check if we have a valid number after processing
if len(digits) not in (10, 11): if len(digits) not in (10, 11):
return None, None return None, None
@@ -70,6 +71,7 @@ def process_phone_number(raw_phone):
return original_formatted, modified_formatted return original_formatted, modified_formatted
def process_vcard(input_vcard, output_vcard): def process_vcard(input_vcard, output_vcard):
""" """
Process a VCARD file to standardize telephone entries and add a second TEL line Process a VCARD file to standardize telephone entries and add a second TEL line
@@ -77,13 +79,13 @@ def process_vcard(input_vcard, output_vcard):
""" """
with open(input_vcard, 'r', encoding='utf-8') as file: with open(input_vcard, 'r', encoding='utf-8') as file:
lines = file.readlines() lines = file.readlines()
output_lines = [] output_lines = []
# Regex to capture any telephone line. # Regex to capture any telephone line.
# It matches lines starting with "TEL:" or "TEL;TYPE=..." or with prefixes like "item1.TEL:". # It matches lines starting with "TEL:" or "TEL;TYPE=..." or with prefixes like "item1.TEL:".
phone_pattern = re.compile(r'^(?P<prefix>.*TEL(?:;TYPE=[^:]+)?):(?P<number>.*)$') phone_pattern = re.compile(r'^(?P<prefix>.*TEL(?:;TYPE=[^:]+)?):(?P<number>.*)$')
for line in lines: for line in lines:
stripped_line = line.rstrip("\n") stripped_line = line.rstrip("\n")
match = phone_pattern.match(stripped_line) match = phone_pattern.match(stripped_line)
@@ -99,10 +101,11 @@ def process_vcard(input_vcard, output_vcard):
output_lines.append(f"TEL;TYPE=CELL:{mod_formatted}\n") output_lines.append(f"TEL;TYPE=CELL:{mod_formatted}\n")
else: else:
output_lines.append(line) output_lines.append(line)
with open(output_vcard, 'w', encoding='utf-8') as file: with open(output_vcard, 'w', encoding='utf-8') as file:
file.writelines(output_lines) file.writelines(output_lines)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers." description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."
@@ -110,6 +113,6 @@ if __name__ == '__main__':
parser.add_argument('input_vcard', type=str, help='Input VCARD file') parser.add_argument('input_vcard', type=str, help='Input VCARD file')
parser.add_argument('output_vcard', type=str, help='Output VCARD file') parser.add_argument('output_vcard', type=str, help='Output VCARD file')
args = parser.parse_args() args = parser.parse_args()
process_vcard(args.input_vcard, args.output_vcard) process_vcard(args.input_vcard, args.output_vcard)
print(f"VCARD processed and saved to {args.output_vcard}") print(f"VCARD processed and saved to {args.output_vcard}")

View File

@@ -26,6 +26,7 @@ def _extract_encrypted_key(keyfile):
return _generate_hmac_of_hmac(key_stream) return _generate_hmac_of_hmac(key_stream)
if __name__ == "__main__": if __name__ == "__main__":
key = open("encrypted_backup.key", "rb").read() key = open("encrypted_backup.key", "rb").read()
database = open("wa.db.crypt15", "rb").read() database = open("wa.db.crypt15", "rb").read()

View File

@@ -6,11 +6,12 @@ from unittest.mock import patch
from scripts.brazilian_number_processing import process_phone_number, process_vcard from scripts.brazilian_number_processing import process_phone_number, process_vcard
class TestVCardProcessor(unittest.TestCase): class TestVCardProcessor(unittest.TestCase):
def test_process_phone_number(self): def test_process_phone_number(self):
"""Test the process_phone_number function with various inputs.""" """Test the process_phone_number function with various inputs."""
# Test cases for 9-digit subscriber numbers # Test cases for 9-digit subscriber numbers
test_cases_9_digit = [ test_cases_9_digit = [
# Standard 11-digit number (2 area + 9 subscriber) # Standard 11-digit number (2 area + 9 subscriber)
@@ -30,7 +31,7 @@ class TestVCardProcessor(unittest.TestCase):
# With extra non-digit characters # With extra non-digit characters
("+55-27-9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"), ("+55-27-9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"),
] ]
# Test cases for 8-digit subscriber numbers # Test cases for 8-digit subscriber numbers
test_cases_8_digit = [ test_cases_8_digit = [
# Standard 10-digit number (2 area + 8 subscriber) # Standard 10-digit number (2 area + 8 subscriber)
@@ -46,7 +47,7 @@ class TestVCardProcessor(unittest.TestCase):
# With country code and trunk zero # With country code and trunk zero
("+55 0 27 1234-5678", "+55 27 1234-5678", None), ("+55 0 27 1234-5678", "+55 27 1234-5678", None),
] ]
# Edge cases # Edge cases
edge_cases = [ edge_cases = [
# Too few digits # Too few digits
@@ -60,19 +61,19 @@ class TestVCardProcessor(unittest.TestCase):
# Unusual formatting but valid number # Unusual formatting but valid number
("(+55) [27] 9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"), ("(+55) [27] 9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"),
] ]
# Run tests for all cases # Run tests for all cases
all_cases = test_cases_9_digit + test_cases_8_digit + edge_cases all_cases = test_cases_9_digit + test_cases_8_digit + edge_cases
for raw_phone, expected_orig, expected_mod in all_cases: for raw_phone, expected_orig, expected_mod in all_cases:
with self.subTest(raw_phone=raw_phone): with self.subTest(raw_phone=raw_phone):
orig, mod = process_phone_number(raw_phone) orig, mod = process_phone_number(raw_phone)
self.assertEqual(orig, expected_orig) self.assertEqual(orig, expected_orig)
self.assertEqual(mod, expected_mod) self.assertEqual(mod, expected_mod)
def test_process_vcard(self): def test_process_vcard(self):
"""Test the process_vcard function with various VCARD formats.""" """Test the process_vcard function with various VCARD formats."""
# Test case 1: Standard TEL entries # Test case 1: Standard TEL entries
vcard1 = """BEGIN:VCARD vcard1 = """BEGIN:VCARD
VERSION:3.0 VERSION:3.0
@@ -202,26 +203,26 @@ END:VCARD
(vcard5, expected5), (vcard5, expected5),
(vcard6, expected6) (vcard6, expected6)
] ]
for i, (input_vcard, expected_output) in enumerate(test_cases): for i, (input_vcard, expected_output) in enumerate(test_cases):
with self.subTest(case=i+1): with self.subTest(case=i+1):
# Create temporary files for input and output # Create temporary files for input and output
with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file: with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file:
input_file.write(input_vcard) input_file.write(input_vcard)
input_path = input_file.name input_path = input_file.name
output_path = input_path + '.out' output_path = input_path + '.out'
try: try:
# Process the VCARD # Process the VCARD
process_vcard(input_path, output_path) process_vcard(input_path, output_path)
# Read and verify the output # Read and verify the output
with open(output_path, 'r', encoding='utf-8') as output_file: with open(output_path, 'r', encoding='utf-8') as output_file:
actual_output = output_file.read() actual_output = output_file.read()
self.assertEqual(actual_output, expected_output) self.assertEqual(actual_output, expected_output)
finally: finally:
# Clean up temporary files # Clean up temporary files
if os.path.exists(input_path): if os.path.exists(input_path):
@@ -231,7 +232,7 @@ END:VCARD
def test_script_argument_handling(self): def test_script_argument_handling(self):
"""Test the script's command-line argument handling.""" """Test the script's command-line argument handling."""
test_input = """BEGIN:VCARD test_input = """BEGIN:VCARD
VERSION:3.0 VERSION:3.0
N:Test;User;;; N:Test;User;;;
@@ -239,16 +240,17 @@ FN:User Test
TEL:+5527912345678 TEL:+5527912345678
END:VCARD END:VCARD
""" """
# Create a temporary input file # Create a temporary input file
with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file: with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file:
input_file.write(test_input) input_file.write(test_input)
input_path = input_file.name input_path = input_file.name
output_path = input_path + '.out' output_path = input_path + '.out'
try: try:
test_args = ['python' if os.name == 'nt' else 'python3', 'scripts/brazilian_number_processing.py', input_path, output_path] test_args = ['python' if os.name == 'nt' else 'python3',
'scripts/brazilian_number_processing.py', input_path, output_path]
# We're just testing that the argument parsing works # We're just testing that the argument parsing works
subprocess.call( subprocess.call(
test_args, test_args,
@@ -257,7 +259,7 @@ END:VCARD
) )
# Check if the output file was created # Check if the output file was created
self.assertTrue(os.path.exists(output_path)) self.assertTrue(os.path.exists(output_path))
finally: finally:
# Clean up temporary files # Clean up temporary files
if os.path.exists(input_path): if os.path.exists(input_path):
@@ -265,5 +267,6 @@ END:VCARD
if os.path.exists(output_path): if os.path.exists(output_path):
os.unlink(output_path) os.unlink(output_path)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -178,14 +178,14 @@ def test_incremental_merge_new_file(mock_filesystem):
source_dir = "/source" source_dir = "/source"
target_dir = "/target" target_dir = "/target"
media_dir = "media" media_dir = "media"
# Setup mock filesystem # Setup mock filesystem
mock_filesystem["exists"].side_effect = lambda x: x == "/source" mock_filesystem["exists"].side_effect = lambda x: x == "/source"
mock_filesystem["listdir"].return_value = ["chat.json"] mock_filesystem["listdir"].return_value = ["chat.json"]
# Run the function # Run the function
incremental_merge(source_dir, target_dir, media_dir, 2, True) incremental_merge(source_dir, target_dir, media_dir, 2, True)
# Verify the operations # Verify the operations
mock_filesystem["makedirs"].assert_called_once_with(target_dir, exist_ok=True) mock_filesystem["makedirs"].assert_called_once_with(target_dir, exist_ok=True)
mock_filesystem["copy2"].assert_called_once_with( mock_filesystem["copy2"].assert_called_once_with(

View File

@@ -43,9 +43,9 @@ def test_nuitka_binary():
"--assume-yes-for-downloads", "--assume-yes-for-downloads",
"--follow-imports", "--follow-imports",
"Whatsapp_Chat_Exporter/__main__.py", "Whatsapp_Chat_Exporter/__main__.py",
"--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility "--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility
] ]
compile_result = subprocess.run( compile_result = subprocess.run(
nuitka_command, nuitka_command,
capture_output=True, capture_output=True,

View File

@@ -8,12 +8,15 @@ def test_readVCardsFile():
data_dir = os.path.join(os.path.dirname(__file__), "data") data_dir = os.path.join(os.path.dirname(__file__), "data")
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0 assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0
def test_create_number_to_name_dicts(): def test_create_number_to_name_dicts():
pass pass
def test_fuzzy_match_numbers(): def test_fuzzy_match_numbers():
pass pass
def test_normalize_number(): def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567' assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567' assert normalize_number('001531234567', '2') == '1531234567'