mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-28 21:30:43 +00:00
autopep8
This commit is contained in:
@@ -452,7 +452,8 @@ def decrypt_android_backup(args) -> int:
|
||||
elif "crypt15" in args.backup:
|
||||
crypt = Crypt.CRYPT15
|
||||
else:
|
||||
logger.error(f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
|
||||
logger.error(
|
||||
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
|
||||
return 1
|
||||
|
||||
# Get key
|
||||
@@ -505,11 +506,11 @@ def handle_decrypt_error(error: int) -> None:
|
||||
"""Handle decryption errors with appropriate messages."""
|
||||
if error == 1:
|
||||
logger.error("Dependencies of decrypt_backup and/or extract_encrypted_key"
|
||||
" are not present. For details, see README.md.\n")
|
||||
" are not present. For details, see README.md.\n")
|
||||
exit(3)
|
||||
elif error == 2:
|
||||
logger.error("Failed when decompressing the decrypted backup. "
|
||||
"Possibly incorrect offsets used in decryption.\n")
|
||||
"Possibly incorrect offsets used in decryption.\n")
|
||||
exit(4)
|
||||
else:
|
||||
logger.error("Unknown error occurred.\n")
|
||||
@@ -598,7 +599,7 @@ def handle_media_directory(args) -> None:
|
||||
logger.info(f"Media directory has been moved to the output directory{CLEAR_LINE}")
|
||||
except PermissionError:
|
||||
logger.warning("Cannot remove original WhatsApp directory. "
|
||||
"Perhaps the directory is opened?\n")
|
||||
"Perhaps the directory is opened?\n")
|
||||
else:
|
||||
logger.info(f"Copying media directory...\r")
|
||||
shutil.copytree(args.media, media_path)
|
||||
|
||||
@@ -121,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
|
||||
)
|
||||
return db
|
||||
|
||||
|
||||
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
|
||||
"""Decrypt a crypt14 database using multithreading for brute-force offset detection.
|
||||
|
||||
@@ -194,7 +195,8 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
|
||||
return db
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
|
||||
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations}
|
||||
future_to_offset = {executor.submit(attempt_decrypt, offset)
|
||||
: offset for offset in offset_combinations}
|
||||
|
||||
try:
|
||||
for future in concurrent.futures.as_completed(future_to_offset):
|
||||
@@ -217,7 +219,6 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
|
||||
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
|
||||
|
||||
|
||||
|
||||
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
|
||||
"""Decrypt a crypt12 database.
|
||||
|
||||
@@ -319,7 +320,7 @@ def decrypt_backup(
|
||||
if crypt is not Crypt.CRYPT15 and len(key) != 158:
|
||||
raise InvalidKeyError("The key file must be 158 bytes")
|
||||
|
||||
#signature check, this is check is used in crypt 12 and 14
|
||||
# signature check, this is check is used in crypt 12 and 14
|
||||
if crypt != Crypt.CRYPT15:
|
||||
t1 = key[30:62]
|
||||
|
||||
@@ -329,7 +330,6 @@ def decrypt_backup(
|
||||
if t1 != database[3:35] and crypt == Crypt.CRYPT12:
|
||||
raise ValueError("The signature of key file and backup file mismatch")
|
||||
|
||||
|
||||
if crypt == Crypt.CRYPT15:
|
||||
if keyfile_stream:
|
||||
main_key, hex_key = _extract_enc_key(key)
|
||||
@@ -353,7 +353,6 @@ def decrypt_backup(
|
||||
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
|
||||
raise DecryptionError(f"Decryption failed: {e}") from e
|
||||
|
||||
|
||||
if not dry_run:
|
||||
with open(output, "wb") as f:
|
||||
f.write(db)
|
||||
|
||||
@@ -22,24 +22,26 @@ logger = logging.getLogger(__name__)
|
||||
def contacts(db, data, enrich_from_vcards):
|
||||
"""
|
||||
Process WhatsApp contacts from the database.
|
||||
|
||||
|
||||
Args:
|
||||
db: Database connection
|
||||
data: Data store object
|
||||
enrich_from_vcards: Path to vCard file for contact enrichment
|
||||
|
||||
|
||||
Returns:
|
||||
bool: False if no contacts found, True otherwise
|
||||
"""
|
||||
c = db.cursor()
|
||||
c.execute("SELECT count() FROM wa_contacts")
|
||||
total_row_number = c.fetchone()[0]
|
||||
|
||||
|
||||
if total_row_number == 0:
|
||||
if enrich_from_vcards is not None:
|
||||
logger.info("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
|
||||
logger.info(
|
||||
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
|
||||
else:
|
||||
logger.warning("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
|
||||
logger.warning(
|
||||
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
|
||||
return False
|
||||
else:
|
||||
logger.info(f"Processed {total_row_number} contacts\n")
|
||||
@@ -51,14 +53,14 @@ def contacts(db, data, enrich_from_vcards):
|
||||
if row["status"] is not None:
|
||||
current_chat.status = row["status"]
|
||||
row = c.fetchone()
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty):
|
||||
"""
|
||||
Process WhatsApp messages from the database.
|
||||
|
||||
|
||||
Args:
|
||||
db: Database connection
|
||||
data: Data store object
|
||||
@@ -85,17 +87,17 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
i = 0
|
||||
# Fetch the first row safely
|
||||
content = _fetch_row_safely(content_cursor)
|
||||
|
||||
|
||||
while content is not None:
|
||||
_process_single_message(data, content, table_message, timezone_offset)
|
||||
|
||||
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
logger.info(f"Processing messages...({i}/{total_row_number})\r")
|
||||
|
||||
|
||||
# Fetch the next row safely
|
||||
content = _fetch_row_safely(content_cursor)
|
||||
|
||||
|
||||
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
|
||||
|
||||
|
||||
@@ -106,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
try:
|
||||
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
|
||||
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT count()
|
||||
FROM messages
|
||||
@@ -123,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
except sqlite3.OperationalError:
|
||||
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
||||
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT count()
|
||||
FROM message
|
||||
@@ -146,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
|
||||
"""Get cursor for legacy database schema."""
|
||||
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
|
||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
||||
|
||||
cursor.execute(f"""SELECT messages.key_remote_jid,
|
||||
messages._id,
|
||||
@@ -209,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
"""Get cursor for new database schema."""
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||
|
||||
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
|
||||
message._id,
|
||||
@@ -292,19 +302,20 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
||||
"""Process a single message row."""
|
||||
if content["key_remote_jid"] is None:
|
||||
return
|
||||
|
||||
|
||||
# Get or create the chat
|
||||
if not data.get_chat(content["key_remote_jid"]):
|
||||
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"]))
|
||||
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
|
||||
Device.ANDROID, content["chat_subject"]))
|
||||
else:
|
||||
current_chat = data.get_chat(content["key_remote_jid"])
|
||||
|
||||
|
||||
# Determine sender_jid_row_id
|
||||
if "sender_jid_row_id" in content:
|
||||
sender_jid_row_id = content["sender_jid_row_id"]
|
||||
else:
|
||||
sender_jid_row_id = None
|
||||
|
||||
|
||||
# Create message object
|
||||
message = Message(
|
||||
from_me=not sender_jid_row_id and content["key_from_me"],
|
||||
@@ -316,19 +327,19 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
||||
received_timestamp=content["received_timestamp"],
|
||||
read_timestamp=content["read_timestamp"]
|
||||
)
|
||||
|
||||
|
||||
# Handle binary data
|
||||
if isinstance(content["data"], bytes):
|
||||
_process_binary_message(message, content)
|
||||
current_chat.add_message(content["_id"], message)
|
||||
return
|
||||
|
||||
|
||||
# Set sender for group chats
|
||||
if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0:
|
||||
_set_group_sender(message, content, data, table_message)
|
||||
else:
|
||||
message.sender = None
|
||||
|
||||
|
||||
# Handle quoted messages
|
||||
if content["quoted"] is not None:
|
||||
message.reply = content["quoted"]
|
||||
@@ -338,7 +349,7 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
||||
message.quoted_data = content["quoted_data"]
|
||||
else:
|
||||
message.reply = None
|
||||
|
||||
|
||||
# Handle message caption
|
||||
if not table_message and content["media_caption"] is not None:
|
||||
# Old schema
|
||||
@@ -348,14 +359,14 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
||||
message.caption = content["data"]
|
||||
else:
|
||||
message.caption = None
|
||||
|
||||
|
||||
# Handle message content based on status
|
||||
if content["status"] == 6: # 6 = Metadata
|
||||
_process_metadata_message(message, content, data, table_message)
|
||||
else:
|
||||
# Real message
|
||||
_process_regular_message(message, content, table_message)
|
||||
|
||||
|
||||
current_chat.add_message(content["_id"], message)
|
||||
|
||||
|
||||
@@ -385,7 +396,7 @@ def _set_group_sender(message, content, data, table_message):
|
||||
name = data.get_chat(content["remote_resource"]).name
|
||||
if "@" in content["remote_resource"]:
|
||||
fallback = content["remote_resource"].split('@')[0]
|
||||
|
||||
|
||||
message.sender = name or fallback
|
||||
|
||||
|
||||
@@ -393,7 +404,7 @@ def _process_metadata_message(message, content, data, table_message):
|
||||
"""Process metadata message."""
|
||||
message.meta = True
|
||||
name = fallback = None
|
||||
|
||||
|
||||
if table_message:
|
||||
if content["sender_jid_row_id"] > 0:
|
||||
_jid = content["group_sender_jid"]
|
||||
@@ -412,12 +423,12 @@ def _process_metadata_message(message, content, data, table_message):
|
||||
fallback = _jid.split('@')[0]
|
||||
else:
|
||||
name = "You"
|
||||
|
||||
|
||||
message.data = determine_metadata(content, name or fallback)
|
||||
|
||||
|
||||
if isinstance(message.data, str) and "<br>" in message.data:
|
||||
message.safe = True
|
||||
|
||||
|
||||
if message.data is None:
|
||||
if content["video_call"] is not None: # Missed call
|
||||
message.meta = True
|
||||
@@ -433,7 +444,7 @@ def _process_metadata_message(message, content, data, table_message):
|
||||
def _process_regular_message(message, content, table_message):
|
||||
"""Process regular (non-metadata) message."""
|
||||
message.sticker = content["media_wa_type"] == 20 # Sticker is a message
|
||||
|
||||
|
||||
if content["key_from_me"] == 1:
|
||||
if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15:
|
||||
msg = "Message deleted"
|
||||
@@ -458,7 +469,7 @@ def _process_regular_message(message, content, table_message):
|
||||
msg = content["data"]
|
||||
if msg is not None:
|
||||
msg = _format_message_text(msg)
|
||||
|
||||
|
||||
message.data = msg
|
||||
|
||||
|
||||
@@ -474,7 +485,7 @@ def _format_message_text(text):
|
||||
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True):
|
||||
"""
|
||||
Process WhatsApp media files from the database.
|
||||
|
||||
|
||||
Args:
|
||||
db: Database connection
|
||||
data: Data store object
|
||||
@@ -487,28 +498,28 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
c = db.cursor()
|
||||
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
|
||||
logger.info(f"Processing media...(0/{total_row_number})\r")
|
||||
|
||||
|
||||
try:
|
||||
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
|
||||
except sqlite3.OperationalError:
|
||||
content_cursor = _get_media_cursor_new(c, filter_empty, filter_date, filter_chat)
|
||||
|
||||
|
||||
content = content_cursor.fetchone()
|
||||
mime = MimeTypes()
|
||||
|
||||
|
||||
# Ensure thumbnails directory exists
|
||||
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
i = 0
|
||||
while content is not None:
|
||||
_process_single_media(data, content, media_folder, mime, separate_media)
|
||||
|
||||
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
logger.info(f"Processing media...({i}/{total_row_number})\r")
|
||||
|
||||
|
||||
content = content_cursor.fetchone()
|
||||
|
||||
|
||||
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
|
||||
|
||||
|
||||
@@ -519,8 +530,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
try:
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT count()
|
||||
FROM message_media
|
||||
@@ -538,8 +551,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
except sqlite3.OperationalError:
|
||||
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT count()
|
||||
FROM message_media
|
||||
@@ -563,8 +578,10 @@ def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
|
||||
"""Get cursor for legacy media database schema."""
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT messages.key_remote_jid,
|
||||
message_row_id,
|
||||
@@ -596,8 +613,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
"""Get cursor for new media database schema."""
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
|
||||
message_row_id,
|
||||
@@ -633,10 +652,10 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
current_chat = data.get_chat(content["key_remote_jid"])
|
||||
message = current_chat.get_message(content["message_row_id"])
|
||||
message.media = True
|
||||
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
message.data = file_path
|
||||
|
||||
|
||||
# Set mime type
|
||||
if content["mime_type"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
@@ -646,11 +665,11 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
message.mime = "application/octet-stream"
|
||||
else:
|
||||
message.mime = content["mime_type"]
|
||||
|
||||
|
||||
# Copy media to separate folder if needed
|
||||
if separate_media:
|
||||
chat_display_name = slugify(current_chat.name or message.sender
|
||||
or content["key_remote_jid"].split('@')[0], True)
|
||||
chat_display_name = slugify(current_chat.name or message.sender
|
||||
or content["key_remote_jid"].split('@')[0], True)
|
||||
current_filename = file_path.split("/")[-1]
|
||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||
@@ -661,7 +680,7 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
message.data = "The media is missing"
|
||||
message.mime = "media"
|
||||
message.meta = True
|
||||
|
||||
|
||||
# Handle thumbnail
|
||||
if content["thumbnail"] is not None:
|
||||
thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png"
|
||||
@@ -681,11 +700,11 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
|
||||
total_row_number = len(rows)
|
||||
logger.info(f"Processing vCards...(0/{total_row_number})\r")
|
||||
|
||||
|
||||
# Create vCards directory if it doesn't exist
|
||||
path = os.path.join(media_folder, "vCards")
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
_process_vcard_row(row, path, data)
|
||||
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
|
||||
@@ -696,8 +715,10 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
|
||||
"""Execute vCard query for modern WhatsApp database schema."""
|
||||
|
||||
# Build the filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
||||
|
||||
@@ -726,8 +747,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
|
||||
"""Execute vCard query for legacy WhatsApp database schema."""
|
||||
|
||||
# Build the filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
|
||||
@@ -760,11 +783,11 @@ def _process_vcard_row(row, path, data):
|
||||
file_name = "".join(x for x in media_name if x.isalnum())
|
||||
file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore')
|
||||
file_path = os.path.join(path, f"{file_name}.vcf")
|
||||
|
||||
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row["vcard"])
|
||||
|
||||
|
||||
message = data.get_chat(row["key_remote_jid"]).get_message(row["message_row_id"])
|
||||
message.data = "This media include the following vCard file(s):<br>" \
|
||||
f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>'
|
||||
@@ -776,26 +799,26 @@ def _process_vcard_row(row, path, data):
|
||||
def calls(db, data, timezone_offset, filter_chat):
|
||||
"""Process call logs from WhatsApp database."""
|
||||
c = db.cursor()
|
||||
|
||||
|
||||
# Check if there are any calls that match the filter
|
||||
total_row_number = _get_calls_count(c, filter_chat)
|
||||
if total_row_number == 0:
|
||||
return
|
||||
|
||||
|
||||
logger.info(f"Processing calls...({total_row_number})\r")
|
||||
|
||||
|
||||
# Fetch call data
|
||||
calls_data = _fetch_calls_data(c, filter_chat)
|
||||
|
||||
|
||||
# Create a chat store for all calls
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
|
||||
|
||||
# Process each call
|
||||
content = calls_data.fetchone()
|
||||
while content is not None:
|
||||
_process_call_record(content, chat, data, timezone_offset)
|
||||
content = calls_data.fetchone()
|
||||
|
||||
|
||||
# Add the calls chat to the data
|
||||
data.add_chat("000000000000000", chat)
|
||||
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
|
||||
@@ -861,7 +884,7 @@ def _process_call_record(content, chat, data, timezone_offset):
|
||||
received_timestamp=None, # TODO: Add timestamp
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
|
||||
|
||||
# Get caller/callee name
|
||||
_jid = content["raw_string"]
|
||||
name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None
|
||||
@@ -870,13 +893,13 @@ def _process_call_record(content, chat, data, timezone_offset):
|
||||
else:
|
||||
fallback = None
|
||||
call.sender = name or fallback
|
||||
|
||||
|
||||
# Set metadata
|
||||
call.meta = True
|
||||
|
||||
|
||||
# Construct call description based on call type and result
|
||||
call.data = _construct_call_description(content, call)
|
||||
|
||||
|
||||
# Add call to chat
|
||||
chat.add_message(content["_id"], call)
|
||||
|
||||
@@ -888,7 +911,7 @@ def _construct_call_description(content, call):
|
||||
f"call {'to' if call.from_me else 'from'} "
|
||||
f"{call.sender} was "
|
||||
)
|
||||
|
||||
|
||||
if content['call_result'] in (0, 4, 7):
|
||||
description += "cancelled." if call.from_me else "missed."
|
||||
elif content['call_result'] == 2:
|
||||
@@ -904,21 +927,21 @@ def _construct_call_description(content, call):
|
||||
)
|
||||
else:
|
||||
description += "in an unknown state."
|
||||
|
||||
|
||||
return description
|
||||
|
||||
|
||||
def create_html(
|
||||
data,
|
||||
output_folder,
|
||||
template=None,
|
||||
embedded=False,
|
||||
offline_static=False,
|
||||
maximum_size=None,
|
||||
no_avatar=False,
|
||||
experimental=False,
|
||||
headline=None
|
||||
):
|
||||
data,
|
||||
output_folder,
|
||||
template=None,
|
||||
embedded=False,
|
||||
offline_static=False,
|
||||
maximum_size=None,
|
||||
no_avatar=False,
|
||||
experimental=False,
|
||||
headline=None
|
||||
):
|
||||
"""Generate HTML chat files from data."""
|
||||
template = setup_template(template, no_avatar, experimental)
|
||||
|
||||
@@ -936,33 +959,33 @@ def create_html(
|
||||
if len(current_chat) == 0:
|
||||
# Skip empty chats
|
||||
continue
|
||||
|
||||
|
||||
safe_file_name, name = get_file_name(contact, current_chat)
|
||||
|
||||
if maximum_size is not None:
|
||||
_generate_paginated_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
maximum_size,
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
maximum_size,
|
||||
headline
|
||||
)
|
||||
else:
|
||||
_generate_single_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
headline
|
||||
)
|
||||
|
||||
|
||||
if current % 10 == 0:
|
||||
logger.info(f"Generating chats...({current}/{total_row_number})\r")
|
||||
|
||||
@@ -990,20 +1013,20 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
|
||||
current_size = 0
|
||||
current_page = 1
|
||||
render_box = []
|
||||
|
||||
|
||||
# Use default maximum size if set to 0
|
||||
if maximum_size == 0:
|
||||
maximum_size = MAX_SIZE
|
||||
|
||||
|
||||
last_msg = current_chat.get_last_message().key_id
|
||||
|
||||
|
||||
for message in current_chat.values():
|
||||
# Calculate message size
|
||||
if message.data is not None and not message.meta and not message.media:
|
||||
current_size += len(message.data) + ROW_SIZE
|
||||
else:
|
||||
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
|
||||
|
||||
|
||||
if current_size > maximum_size:
|
||||
# Create a new page
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
@@ -1047,25 +1070,25 @@ def _generate_paginated_chat(current_chat, safe_file_name, name, contact, output
|
||||
def create_txt(data, output):
|
||||
"""Generate text files from chat data."""
|
||||
os.makedirs(output, exist_ok=True)
|
||||
|
||||
|
||||
for jik, chat in data.items():
|
||||
if len(chat) == 0:
|
||||
continue
|
||||
|
||||
|
||||
# Determine file name
|
||||
if chat.name is not None:
|
||||
contact = chat.name.replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
|
||||
|
||||
output_file = os.path.join(output, f"{contact}.txt")
|
||||
|
||||
|
||||
with open(output_file, "w", encoding="utf8") as f:
|
||||
for message in chat.values():
|
||||
# Skip metadata in text format
|
||||
if message.meta and message.mime != "media":
|
||||
continue
|
||||
|
||||
|
||||
# Format the message
|
||||
formatted_message = _format_message_for_txt(message, contact)
|
||||
f.write(f"{formatted_message}\n")
|
||||
@@ -1074,16 +1097,16 @@ def create_txt(data, output):
|
||||
def _format_message_for_txt(message, contact):
|
||||
"""Format a message for text output."""
|
||||
date = datetime.fromtimestamp(message.timestamp).date()
|
||||
|
||||
|
||||
# Determine the sender name
|
||||
if message.from_me:
|
||||
name = "You"
|
||||
else:
|
||||
name = message.sender if message.sender else contact
|
||||
|
||||
|
||||
prefix = f"[{date} {message.time}] {name}: "
|
||||
prefix_length = len(prefix)
|
||||
|
||||
|
||||
# Handle different message types
|
||||
if message.media and ("/" in message.mime or message.mime == "media"):
|
||||
if message.data == "The media is missing":
|
||||
@@ -1095,9 +1118,9 @@ def _format_message_for_txt(message, contact):
|
||||
message_text = ""
|
||||
else:
|
||||
message_text = message.data.replace('<br>', f'\n{" " * prefix_length}')
|
||||
|
||||
|
||||
# Add caption if present
|
||||
if message.caption is not None:
|
||||
message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}')
|
||||
|
||||
|
||||
return f"{prefix}{message_text}"
|
||||
|
||||
@@ -24,31 +24,32 @@ import struct
|
||||
import codecs
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
class BPListWriter(object):
|
||||
def __init__(self, objects):
|
||||
self.bplist = ""
|
||||
self.objects = objects
|
||||
|
||||
|
||||
def binary(self):
|
||||
'''binary -> string
|
||||
|
||||
|
||||
Generates bplist
|
||||
'''
|
||||
self.data = 'bplist00'
|
||||
|
||||
|
||||
# TODO: flatten objects and count max length size
|
||||
|
||||
|
||||
# TODO: write objects and save offsets
|
||||
|
||||
|
||||
# TODO: write offsets
|
||||
|
||||
|
||||
# TODO: write metadata
|
||||
|
||||
|
||||
return self.data
|
||||
|
||||
|
||||
def write(self, filename):
|
||||
'''
|
||||
|
||||
|
||||
Writes bplist to file
|
||||
'''
|
||||
if self.bplist != "":
|
||||
@@ -57,18 +58,19 @@ class BPListWriter(object):
|
||||
else:
|
||||
raise Exception('BPlist not yet generated')
|
||||
|
||||
|
||||
class BPListReader(object):
|
||||
def __init__(self, s):
|
||||
self.data = s
|
||||
self.objects = []
|
||||
self.resolved = {}
|
||||
|
||||
|
||||
def __unpackIntStruct(self, sz, s):
|
||||
'''__unpackIntStruct(size, string) -> int
|
||||
|
||||
|
||||
Unpacks the integer of given size (1, 2 or 4 bytes) from string
|
||||
'''
|
||||
if sz == 1:
|
||||
if sz == 1:
|
||||
ot = '!B'
|
||||
elif sz == 2:
|
||||
ot = '!H'
|
||||
@@ -79,17 +81,17 @@ class BPListReader(object):
|
||||
else:
|
||||
raise Exception('int unpack size '+str(sz)+' unsupported')
|
||||
return struct.unpack(ot, s)[0]
|
||||
|
||||
|
||||
def __unpackInt(self, offset):
|
||||
'''__unpackInt(offset) -> int
|
||||
|
||||
|
||||
Unpacks int field from plist at given offset
|
||||
'''
|
||||
return self.__unpackIntMeta(offset)[1]
|
||||
|
||||
def __unpackIntMeta(self, offset):
|
||||
'''__unpackIntMeta(offset) -> (size, int)
|
||||
|
||||
|
||||
Unpacks int field from plist at given offset and returns its size and value
|
||||
'''
|
||||
obj_header = self.data[offset]
|
||||
@@ -99,7 +101,7 @@ class BPListReader(object):
|
||||
|
||||
def __resolveIntSize(self, obj_info, offset):
|
||||
'''__resolveIntSize(obj_info, offset) -> (count, offset)
|
||||
|
||||
|
||||
Calculates count of objref* array entries and returns count and offset to first element
|
||||
'''
|
||||
if obj_info == 0x0F:
|
||||
@@ -112,10 +114,10 @@ class BPListReader(object):
|
||||
|
||||
def __unpackFloatStruct(self, sz, s):
|
||||
'''__unpackFloatStruct(size, string) -> float
|
||||
|
||||
|
||||
Unpacks the float of given size (4 or 8 bytes) from string
|
||||
'''
|
||||
if sz == 4:
|
||||
if sz == 4:
|
||||
ot = '!f'
|
||||
elif sz == 8:
|
||||
ot = '!d'
|
||||
@@ -125,7 +127,7 @@ class BPListReader(object):
|
||||
|
||||
def __unpackFloat(self, offset):
|
||||
'''__unpackFloat(offset) -> float
|
||||
|
||||
|
||||
Unpacks float field from plist at given offset
|
||||
'''
|
||||
obj_header = self.data[offset]
|
||||
@@ -135,70 +137,79 @@ class BPListReader(object):
|
||||
|
||||
def __unpackDate(self, offset):
|
||||
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
|
||||
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td)
|
||||
return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
|
||||
|
||||
def __unpackItem(self, offset):
|
||||
'''__unpackItem(offset)
|
||||
|
||||
|
||||
Unpacks and returns an item from plist
|
||||
'''
|
||||
obj_header = self.data[offset]
|
||||
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
|
||||
if obj_type == 0x00:
|
||||
if obj_info == 0x00: # null 0000 0000
|
||||
if obj_type == 0x00:
|
||||
if obj_info == 0x00: # null 0000 0000
|
||||
return None
|
||||
elif obj_info == 0x08: # bool 0000 1000 // false
|
||||
elif obj_info == 0x08: # bool 0000 1000 // false
|
||||
return False
|
||||
elif obj_info == 0x09: # bool 0000 1001 // true
|
||||
elif obj_info == 0x09: # bool 0000 1001 // true
|
||||
return True
|
||||
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
|
||||
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
|
||||
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
|
||||
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
|
||||
else:
|
||||
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed')
|
||||
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
|
||||
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||
return self.__unpackInt(offset)
|
||||
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||
return self.__unpackFloat(offset)
|
||||
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
|
||||
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
|
||||
return self.__unpackDate(offset)
|
||||
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
|
||||
# data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
|
||||
elif obj_type == 0x40:
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count] # XXX: we return data as str
|
||||
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
|
||||
return self.data[objref:objref+obj_count] # XXX: we return data as str
|
||||
# string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
|
||||
elif obj_type == 0x50:
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count]
|
||||
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
|
||||
# string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
|
||||
elif obj_type == 0x60:
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count*2].decode('utf-16be')
|
||||
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
|
||||
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
|
||||
# FIXME: Accept as a string for now
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count]
|
||||
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||
# array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||
elif obj_type == 0xA0:
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
arr = []
|
||||
for i in range(obj_count):
|
||||
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
arr.append(self.__unpackIntStruct(
|
||||
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
return arr
|
||||
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||
# set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||
elif obj_type == 0xC0:
|
||||
# XXX: not serializable via apple implementation
|
||||
raise Exception("0xC0 Not Implemented") # FIXME: implement
|
||||
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
|
||||
raise Exception("0xC0 Not Implemented") # FIXME: implement
|
||||
# dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
|
||||
elif obj_type == 0xD0:
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
keys = []
|
||||
for i in range(obj_count):
|
||||
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
keys.append(self.__unpackIntStruct(
|
||||
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
values = []
|
||||
objref += obj_count*self.object_ref_size
|
||||
for i in range(obj_count):
|
||||
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
values.append(self.__unpackIntStruct(
|
||||
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
dic = {}
|
||||
for i in range(obj_count):
|
||||
dic[keys[i]] = values[i]
|
||||
return dic
|
||||
else:
|
||||
raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset))
|
||||
|
||||
|
||||
def __resolveObject(self, idx):
|
||||
try:
|
||||
return self.resolved[idx]
|
||||
@@ -212,7 +223,7 @@ class BPListReader(object):
|
||||
return newArr
|
||||
if type(obj) == dict:
|
||||
newDic = {}
|
||||
for k,v in obj.items():
|
||||
for k, v in obj.items():
|
||||
key_resolved = self.__resolveObject(k)
|
||||
if isinstance(key_resolved, str):
|
||||
rk = key_resolved
|
||||
@@ -225,15 +236,16 @@ class BPListReader(object):
|
||||
else:
|
||||
self.resolved[idx] = obj
|
||||
return obj
|
||||
|
||||
|
||||
def parse(self):
|
||||
# read header
|
||||
if self.data[:8] != b'bplist00':
|
||||
raise Exception('Bad magic')
|
||||
|
||||
|
||||
# read trailer
|
||||
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:])
|
||||
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
|
||||
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
|
||||
'!6xBB4xI4xI4xI', self.data[-32:])
|
||||
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
|
||||
|
||||
# read offset table
|
||||
self.offset_table = self.data[self.table_offset:-32]
|
||||
@@ -243,40 +255,45 @@ class BPListReader(object):
|
||||
offset_entry = ot[:self.offset_size]
|
||||
ot = ot[self.offset_size:]
|
||||
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
|
||||
#print "** plist offsets:",self.offsets
|
||||
|
||||
# print "** plist offsets:",self.offsets
|
||||
|
||||
# read object table
|
||||
self.objects = []
|
||||
k = 0
|
||||
for i in self.offsets:
|
||||
obj = self.__unpackItem(i)
|
||||
#print "** plist unpacked",k,type(obj),obj,"at",i
|
||||
# print "** plist unpacked",k,type(obj),obj,"at",i
|
||||
k += 1
|
||||
self.objects.append(obj)
|
||||
|
||||
|
||||
# rebuild object tree
|
||||
#for i in range(len(self.objects)):
|
||||
# for i in range(len(self.objects)):
|
||||
# self.__resolveObject(i)
|
||||
|
||||
|
||||
# return root object
|
||||
return self.__resolveObject(self.top_object)
|
||||
|
||||
|
||||
@classmethod
|
||||
def plistWithString(cls, s):
|
||||
parser = cls(s)
|
||||
return parser.parse()
|
||||
|
||||
# helpers for testing
|
||||
|
||||
|
||||
def plist(obj):
|
||||
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
|
||||
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None)
|
||||
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(
|
||||
obj, NSPropertyListBinaryFormat_v1_0, 0, None)
|
||||
return str(b.bytes())
|
||||
|
||||
|
||||
def unplist(s):
|
||||
from Foundation import NSData, NSPropertyListSerialization
|
||||
d = NSData.dataWithBytes_length_(s, len(s))
|
||||
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -14,12 +14,12 @@ logger = logging.getLogger(__name__)
|
||||
def messages(path, data, assume_first_as_me=False):
|
||||
"""
|
||||
Extracts messages from an exported WhatsApp chat file.
|
||||
|
||||
|
||||
Args:
|
||||
path: Path to the exported chat file
|
||||
data: Data container object to store the parsed chat
|
||||
assume_first_as_me: If True, assumes the first message is sent from the user without asking
|
||||
|
||||
|
||||
Returns:
|
||||
Updated data container with extracted messages
|
||||
"""
|
||||
@@ -27,16 +27,16 @@ def messages(path, data, assume_first_as_me=False):
|
||||
chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED))
|
||||
you = "" # Will store the username of the current user
|
||||
user_identification_done = False # Flag to track if user identification has been done
|
||||
|
||||
|
||||
# First pass: count total lines for progress reporting
|
||||
with open(path, "r", encoding="utf8") as file:
|
||||
total_row_number = sum(1 for _ in file)
|
||||
|
||||
|
||||
# Second pass: process the messages
|
||||
with open(path, "r", encoding="utf8") as file:
|
||||
for index, line in enumerate(file):
|
||||
you, user_identification_done = process_line(
|
||||
line, index, chat, path, you,
|
||||
line, index, chat, path, you,
|
||||
assume_first_as_me, user_identification_done
|
||||
)
|
||||
|
||||
@@ -51,31 +51,31 @@ def messages(path, data, assume_first_as_me=False):
|
||||
def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done):
|
||||
"""
|
||||
Process a single line from the chat file
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (updated_you_value, updated_user_identification_done_flag)
|
||||
"""
|
||||
parts = line.split(" - ", 1)
|
||||
|
||||
|
||||
# Check if this is a new message (has timestamp format)
|
||||
if len(parts) > 1:
|
||||
time = parts[0]
|
||||
you, user_identification_done = process_new_message(
|
||||
time, parts[1], index, chat, you, file_path,
|
||||
time, parts[1], index, chat, you, file_path,
|
||||
assume_first_as_me, user_identification_done
|
||||
)
|
||||
else:
|
||||
# This is a continuation of the previous message
|
||||
process_message_continuation(line, index, chat)
|
||||
|
||||
|
||||
return you, user_identification_done
|
||||
|
||||
|
||||
def process_new_message(time, content, index, chat, you, file_path,
|
||||
def process_new_message(time, content, index, chat, you, file_path,
|
||||
assume_first_as_me, user_identification_done):
|
||||
"""
|
||||
Process a line that contains a new message
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (updated_you_value, updated_user_identification_done_flag)
|
||||
"""
|
||||
@@ -88,7 +88,7 @@ def process_new_message(time, content, index, chat, you, file_path,
|
||||
received_timestamp=None,
|
||||
read_timestamp=None
|
||||
)
|
||||
|
||||
|
||||
# Check if this is a system message (no name:message format)
|
||||
if ":" not in content:
|
||||
msg.data = content
|
||||
@@ -96,7 +96,7 @@ def process_new_message(time, content, index, chat, you, file_path,
|
||||
else:
|
||||
# Process user message
|
||||
name, message = content.strip().split(":", 1)
|
||||
|
||||
|
||||
# Handle user identification
|
||||
if you == "":
|
||||
if chat.name is None:
|
||||
@@ -113,17 +113,17 @@ def process_new_message(time, content, index, chat, you, file_path,
|
||||
# If we know the chat name, anyone else must be "you"
|
||||
if name != chat.name:
|
||||
you = name
|
||||
|
||||
|
||||
# Set the chat name if needed
|
||||
if chat.name is None and name != you:
|
||||
chat.name = name
|
||||
|
||||
|
||||
# Determine if this message is from the current user
|
||||
msg.from_me = (name == you)
|
||||
|
||||
|
||||
# Process message content
|
||||
process_message_content(msg, message, file_path)
|
||||
|
||||
|
||||
chat.add_message(index, msg)
|
||||
return you, user_identification_done
|
||||
|
||||
@@ -144,11 +144,11 @@ def process_attached_file(msg, message, file_path):
|
||||
"""Process an attached file in a message"""
|
||||
mime = MimeTypes()
|
||||
msg.media = True
|
||||
|
||||
|
||||
# Extract file path and check if it exists
|
||||
file_name = message.split("(file attached)")[0].strip()
|
||||
attached_file_path = os.path.join(os.path.dirname(file_path), file_name)
|
||||
|
||||
|
||||
if os.path.isfile(attached_file_path):
|
||||
msg.data = attached_file_path
|
||||
guess = mime.guess_type(attached_file_path)[0]
|
||||
@@ -165,9 +165,9 @@ def process_message_continuation(line, index, chat):
|
||||
lookback = index - 1
|
||||
while lookback not in chat.keys():
|
||||
lookback -= 1
|
||||
|
||||
|
||||
msg = chat.get_message(lookback)
|
||||
|
||||
|
||||
# Add the continuation line to the message
|
||||
if msg.media:
|
||||
msg.caption = line.strip()
|
||||
@@ -182,4 +182,4 @@ def prompt_for_user_identification(name):
|
||||
if ans == "y":
|
||||
return name
|
||||
elif ans == "n":
|
||||
return ""
|
||||
return ""
|
||||
|
||||
@@ -21,14 +21,14 @@ def contacts(db, data):
|
||||
c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Pre-processing contacts...({total_row_number})\r")
|
||||
|
||||
|
||||
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
zwhatsapp_id = content["ZWHATSAPPID"]
|
||||
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
|
||||
zwhatsapp_id += "@s.whatsapp.net"
|
||||
|
||||
|
||||
current_chat = ChatStore(Device.IOS)
|
||||
current_chat.status = content["ZABOUTTEXT"]
|
||||
data.add_chat(zwhatsapp_id, current_chat)
|
||||
@@ -40,7 +40,7 @@ def process_contact_avatars(current_chat, media_folder, contact_id):
|
||||
"""Process and assign avatar images for a contact."""
|
||||
path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}'
|
||||
avatars = glob(f"{path}*")
|
||||
|
||||
|
||||
if 0 < len(avatars) <= 1:
|
||||
current_chat.their_avatar = avatars[0]
|
||||
else:
|
||||
@@ -64,12 +64,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
"""Process WhatsApp messages and contacts from the database."""
|
||||
c = db.cursor()
|
||||
cursor2 = db.cursor()
|
||||
|
||||
|
||||
# Build the chat filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
||||
|
||||
|
||||
# Process contacts first
|
||||
contact_query = f"""
|
||||
SELECT count()
|
||||
@@ -110,13 +112,13 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
GROUP BY ZCONTACTJID;
|
||||
"""
|
||||
c.execute(contacts_query)
|
||||
|
||||
|
||||
# Process each contact
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
contact_name = get_contact_name(content)
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
|
||||
|
||||
# Add or update chat
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
|
||||
@@ -124,11 +126,11 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
current_chat = data.get_chat(contact_id)
|
||||
current_chat.name = contact_name
|
||||
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
|
||||
|
||||
|
||||
# Process avatar images
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
content = c.fetchone()
|
||||
|
||||
|
||||
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
|
||||
|
||||
# Get message count
|
||||
@@ -147,7 +149,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
c.execute(message_count_query)
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Processing messages...(0/{total_row_number})\r")
|
||||
|
||||
|
||||
# Fetch messages
|
||||
messages_query = f"""
|
||||
SELECT ZCONTACTJID,
|
||||
@@ -175,7 +177,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
ORDER BY ZMESSAGEDATE ASC;
|
||||
"""
|
||||
c.execute(messages_query)
|
||||
|
||||
|
||||
# Process each message
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
@@ -183,14 +185,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
message_pk = content["Z_PK"]
|
||||
is_group_message = content["ZGROUPINFO"] is not None
|
||||
|
||||
|
||||
# Ensure chat exists
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
|
||||
|
||||
# Create message object
|
||||
ts = APPLE_TIME + content["ZMESSAGEDATE"]
|
||||
message = Message(
|
||||
@@ -203,14 +205,14 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
|
||||
|
||||
# Process message data
|
||||
invalid = process_message_data(message, content, is_group_message, data, cursor2)
|
||||
|
||||
|
||||
# Add valid messages to chat
|
||||
if not invalid:
|
||||
current_chat.add_message(message_pk, message)
|
||||
|
||||
|
||||
# Update progress
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
@@ -236,11 +238,11 @@ def process_message_data(message, content, is_group_message, data, cursor2):
|
||||
message.sender = name or fallback
|
||||
else:
|
||||
message.sender = None
|
||||
|
||||
|
||||
# Handle metadata messages
|
||||
if content["ZMESSAGETYPE"] == 6:
|
||||
return process_metadata_message(message, content, is_group_message)
|
||||
|
||||
|
||||
# Handle quoted replies
|
||||
if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14") and False:
|
||||
quoted = content["ZMETADATA"][2:19]
|
||||
@@ -250,17 +252,17 @@ def process_message_data(message, content, is_group_message, data, cursor2):
|
||||
WHERE ZSTANZAID LIKE '{message.reply}%'""")
|
||||
quoted_content = cursor2.fetchone()
|
||||
if quoted_content and "ZTEXT" in quoted_content:
|
||||
message.quoted_data = quoted_content["ZTEXT"]
|
||||
message.quoted_data = quoted_content["ZTEXT"]
|
||||
else:
|
||||
message.quoted_data = None
|
||||
|
||||
|
||||
# Handle stickers
|
||||
if content["ZMESSAGETYPE"] == 15:
|
||||
message.sticker = True
|
||||
|
||||
# Process message text
|
||||
process_message_text(message, content)
|
||||
|
||||
|
||||
return False # Message is valid
|
||||
|
||||
|
||||
@@ -305,19 +307,21 @@ def process_message_text(message, content):
|
||||
msg = content["ZTEXT"]
|
||||
if msg is not None:
|
||||
msg = msg.replace("\r\n", "<br>").replace("\n", "<br>")
|
||||
|
||||
|
||||
message.data = msg
|
||||
|
||||
|
||||
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False):
|
||||
"""Process media files from WhatsApp messages."""
|
||||
c = db.cursor()
|
||||
|
||||
|
||||
# Build filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
||||
|
||||
|
||||
# Get media count
|
||||
media_count_query = f"""
|
||||
SELECT count()
|
||||
@@ -336,7 +340,7 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
c.execute(media_count_query)
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Processing media...(0/{total_row_number})\r")
|
||||
|
||||
|
||||
# Fetch media items
|
||||
media_query = f"""
|
||||
SELECT ZCONTACTJID,
|
||||
@@ -360,14 +364,14 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
ORDER BY ZCONTACTJID ASC
|
||||
"""
|
||||
c.execute(media_query)
|
||||
|
||||
|
||||
# Process each media item
|
||||
mime = MimeTypes()
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
process_media_item(content, data, media_folder, mime, separate_media)
|
||||
|
||||
|
||||
# Update progress
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
@@ -382,23 +386,24 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
current_chat = data.get_chat(content["ZCONTACTJID"])
|
||||
message = current_chat.get_message(content["ZMESSAGE"])
|
||||
message.media = True
|
||||
|
||||
|
||||
if current_chat.media_base == "":
|
||||
current_chat.media_base = media_folder + "/"
|
||||
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
message.data = '/'.join(file_path.split("/")[1:])
|
||||
|
||||
|
||||
# Set MIME type
|
||||
if content["ZVCARDSTRING"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
message.mime = guess if guess is not None else "application/octet-stream"
|
||||
else:
|
||||
message.mime = content["ZVCARDSTRING"]
|
||||
|
||||
|
||||
# Handle separate media option
|
||||
if separate_media:
|
||||
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
|
||||
chat_display_name = slugify(
|
||||
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
|
||||
current_filename = file_path.split("/")[-1]
|
||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||
@@ -410,7 +415,7 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
message.data = "The media is missing"
|
||||
message.mime = "media"
|
||||
message.meta = True
|
||||
|
||||
|
||||
# Add caption if available
|
||||
if content["ZTITLE"] is not None:
|
||||
message.caption = content["ZTITLE"]
|
||||
@@ -419,12 +424,14 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
"""Process vCard contacts from WhatsApp messages."""
|
||||
c = db.cursor()
|
||||
|
||||
|
||||
# Build filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
||||
|
||||
|
||||
# Fetch vCard mentions
|
||||
vcard_query = f"""
|
||||
SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
|
||||
@@ -450,7 +457,7 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
contents = c.fetchall()
|
||||
total_row_number = len(contents)
|
||||
logger.info(f"Processing vCards...(0/{total_row_number})\r")
|
||||
|
||||
|
||||
# Create vCards directory
|
||||
path = f'{media_folder}/Message/vCards'
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
@@ -484,9 +491,10 @@ def process_vcard_item(content, path, data):
|
||||
f.write(vcard_string)
|
||||
|
||||
# Create vCard summary and update message
|
||||
vcard_summary = "This media include the following vCard file(s):<br>"
|
||||
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)])
|
||||
|
||||
vcard_summary = "This media include the following vCard file(s):<br>"
|
||||
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
|
||||
fp in zip(vcard_names, file_paths)])
|
||||
|
||||
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
|
||||
message.data = vcard_summary
|
||||
message.mime = "text/x-vcard"
|
||||
@@ -498,11 +506,13 @@ def process_vcard_item(content, path, data):
|
||||
def calls(db, data, timezone_offset, filter_chat):
|
||||
"""Process WhatsApp call records."""
|
||||
c = db.cursor()
|
||||
|
||||
|
||||
# Build filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
||||
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
||||
|
||||
# Get call count
|
||||
call_count_query = f"""
|
||||
SELECT count()
|
||||
@@ -515,9 +525,9 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
total_row_number = c.fetchone()[0]
|
||||
if total_row_number == 0:
|
||||
return
|
||||
|
||||
|
||||
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
|
||||
|
||||
|
||||
# Fetch call records
|
||||
calls_query = f"""
|
||||
SELECT ZCALLIDSTRING,
|
||||
@@ -538,16 +548,16 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
{chat_filter_exclude}
|
||||
"""
|
||||
c.execute(calls_query)
|
||||
|
||||
|
||||
# Create calls chat
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
|
||||
|
||||
# Process each call
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
process_call_record(content, chat, data, timezone_offset)
|
||||
content = c.fetchone()
|
||||
|
||||
|
||||
# Add calls chat to data
|
||||
data.add_chat("000000000000000", chat)
|
||||
|
||||
@@ -562,7 +572,7 @@ def process_call_record(content, chat, data, timezone_offset):
|
||||
key_id=content["ZCALLIDSTRING"],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET
|
||||
)
|
||||
|
||||
|
||||
# Set sender info
|
||||
_jid = content["ZGROUPCALLCREATORUSERJIDSTRING"]
|
||||
name = data.get_chat(_jid).name if _jid in data else None
|
||||
@@ -571,11 +581,11 @@ def process_call_record(content, chat, data, timezone_offset):
|
||||
else:
|
||||
fallback = None
|
||||
call.sender = name or fallback
|
||||
|
||||
|
||||
# Set call metadata
|
||||
call.meta = True
|
||||
call.data = format_call_data(call, content)
|
||||
|
||||
|
||||
# Add call to chat
|
||||
chat.add_message(call.key_id, call)
|
||||
|
||||
@@ -589,7 +599,7 @@ def format_call_data(call, content):
|
||||
f"call {'to' if call.from_me else 'from'} "
|
||||
f"{call.sender} was "
|
||||
)
|
||||
|
||||
|
||||
# Call outcome
|
||||
if content['ZOUTCOME'] in (1, 4):
|
||||
call_data += "not answered." if call.from_me else "missed."
|
||||
@@ -604,5 +614,5 @@ def format_call_data(call, content):
|
||||
)
|
||||
else:
|
||||
call_data += "in an unknown state."
|
||||
|
||||
return call_data
|
||||
|
||||
return call_data
|
||||
|
||||
@@ -18,6 +18,7 @@ else:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BackupExtractor:
|
||||
"""
|
||||
A class to handle the extraction of WhatsApp data from iOS backups,
|
||||
@@ -61,9 +62,9 @@ class BackupExtractor:
|
||||
"""
|
||||
if not support_encrypted:
|
||||
logger.error("You don't have the dependencies to handle encrypted backup."
|
||||
"Read more on how to deal with encrypted backup:"
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
|
||||
)
|
||||
"Read more on how to deal with encrypted backup:"
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
|
||||
@@ -116,12 +117,12 @@ class BackupExtractor:
|
||||
exit(6)
|
||||
else:
|
||||
logger.info(f"Done{CLEAR_LINE}")
|
||||
|
||||
|
||||
def _extract_decrypted_files(self):
|
||||
"""Extract all WhatsApp files after decryption"""
|
||||
def extract_progress_handler(file_id, domain, relative_path, n, total_files):
|
||||
if n % 100 == 0:
|
||||
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
|
||||
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
|
||||
return True
|
||||
|
||||
self.backup.extract_files(
|
||||
@@ -234,4 +235,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
|
||||
"""
|
||||
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
|
||||
extractor.extract()
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@ CLEAR_LINE = "\x1b[K\n"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def convert_time_unit(time_second: int) -> str:
|
||||
"""Converts a time duration in seconds to a human-readable string.
|
||||
|
||||
@@ -168,7 +169,7 @@ def check_update():
|
||||
"===============Update===============\n"
|
||||
"A newer version of WhatsApp Chat Exporter is available.\n"
|
||||
f"Current version: {__version__}\n"
|
||||
f"Latest version: {package_info['info']['version']}\n"
|
||||
f"Latest version: {package_info['info']['version']}\n"
|
||||
)
|
||||
if platform == "win32":
|
||||
logger.info("Update with: pip install --upgrade whatsapp-chat-exporter\n")
|
||||
|
||||
@@ -6,19 +6,20 @@ Contributed by @magpires https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issue
|
||||
import re
|
||||
import argparse
|
||||
|
||||
|
||||
def process_phone_number(raw_phone):
|
||||
"""
|
||||
Process the raw phone string from the VCARD and return two formatted numbers:
|
||||
- The original formatted number, and
|
||||
- A modified formatted number with the extra (ninth) digit removed, if applicable.
|
||||
|
||||
|
||||
Desired output:
|
||||
For a number with a 9-digit subscriber:
|
||||
Original: "+55 {area} {first 5 of subscriber}-{last 4 of subscriber}"
|
||||
Modified: "+55 {area} {subscriber[1:5]}-{subscriber[5:]}"
|
||||
For example, for an input that should represent "027912345678", the outputs are:
|
||||
"+55 27 91234-5678" and "+55 27 1234-5678"
|
||||
|
||||
|
||||
This function handles numbers that may already include a "+55" prefix.
|
||||
It expects that after cleaning, a valid number (without the country code) should have either 10 digits
|
||||
(2 for area + 8 for subscriber) or 11 digits (2 for area + 9 for subscriber).
|
||||
@@ -26,18 +27,18 @@ def process_phone_number(raw_phone):
|
||||
"""
|
||||
# Store the original input for processing
|
||||
number_to_process = raw_phone.strip()
|
||||
|
||||
|
||||
# Remove all non-digit characters
|
||||
digits = re.sub(r'\D', '', number_to_process)
|
||||
|
||||
|
||||
# If the number starts with '55', remove it for processing
|
||||
if digits.startswith("55") and len(digits) > 11:
|
||||
digits = digits[2:]
|
||||
|
||||
|
||||
# Remove trunk zero if present
|
||||
if digits.startswith("0"):
|
||||
digits = digits[1:]
|
||||
|
||||
|
||||
# After cleaning, we expect a valid number to have either 10 or 11 digits
|
||||
# If there are extra digits, use the last 11 (for a 9-digit subscriber) or last 10 (for an 8-digit subscriber)
|
||||
if len(digits) > 11:
|
||||
@@ -46,7 +47,7 @@ def process_phone_number(raw_phone):
|
||||
elif len(digits) > 10 and len(digits) < 11:
|
||||
# In some cases with an 8-digit subscriber, take the last 10 digits
|
||||
digits = digits[-10:]
|
||||
|
||||
|
||||
# Check if we have a valid number after processing
|
||||
if len(digits) not in (10, 11):
|
||||
return None, None
|
||||
@@ -70,6 +71,7 @@ def process_phone_number(raw_phone):
|
||||
|
||||
return original_formatted, modified_formatted
|
||||
|
||||
|
||||
def process_vcard(input_vcard, output_vcard):
|
||||
"""
|
||||
Process a VCARD file to standardize telephone entries and add a second TEL line
|
||||
@@ -77,13 +79,13 @@ def process_vcard(input_vcard, output_vcard):
|
||||
"""
|
||||
with open(input_vcard, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
|
||||
output_lines = []
|
||||
|
||||
|
||||
# Regex to capture any telephone line.
|
||||
# It matches lines starting with "TEL:" or "TEL;TYPE=..." or with prefixes like "item1.TEL:".
|
||||
phone_pattern = re.compile(r'^(?P<prefix>.*TEL(?:;TYPE=[^:]+)?):(?P<number>.*)$')
|
||||
|
||||
|
||||
for line in lines:
|
||||
stripped_line = line.rstrip("\n")
|
||||
match = phone_pattern.match(stripped_line)
|
||||
@@ -99,10 +101,11 @@ def process_vcard(input_vcard, output_vcard):
|
||||
output_lines.append(f"TEL;TYPE=CELL:{mod_formatted}\n")
|
||||
else:
|
||||
output_lines.append(line)
|
||||
|
||||
|
||||
with open(output_vcard, 'w', encoding='utf-8') as file:
|
||||
file.writelines(output_lines)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."
|
||||
@@ -110,6 +113,6 @@ if __name__ == '__main__':
|
||||
parser.add_argument('input_vcard', type=str, help='Input VCARD file')
|
||||
parser.add_argument('output_vcard', type=str, help='Output VCARD file')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
process_vcard(args.input_vcard, args.output_vcard)
|
||||
print(f"VCARD processed and saved to {args.output_vcard}")
|
||||
print(f"VCARD processed and saved to {args.output_vcard}")
|
||||
|
||||
@@ -26,6 +26,7 @@ def _extract_encrypted_key(keyfile):
|
||||
|
||||
return _generate_hmac_of_hmac(key_stream)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
key = open("encrypted_backup.key", "rb").read()
|
||||
database = open("wa.db.crypt15", "rb").read()
|
||||
|
||||
@@ -6,11 +6,12 @@ from unittest.mock import patch
|
||||
|
||||
from scripts.brazilian_number_processing import process_phone_number, process_vcard
|
||||
|
||||
|
||||
class TestVCardProcessor(unittest.TestCase):
|
||||
|
||||
|
||||
def test_process_phone_number(self):
|
||||
"""Test the process_phone_number function with various inputs."""
|
||||
|
||||
|
||||
# Test cases for 9-digit subscriber numbers
|
||||
test_cases_9_digit = [
|
||||
# Standard 11-digit number (2 area + 9 subscriber)
|
||||
@@ -30,7 +31,7 @@ class TestVCardProcessor(unittest.TestCase):
|
||||
# With extra non-digit characters
|
||||
("+55-27-9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"),
|
||||
]
|
||||
|
||||
|
||||
# Test cases for 8-digit subscriber numbers
|
||||
test_cases_8_digit = [
|
||||
# Standard 10-digit number (2 area + 8 subscriber)
|
||||
@@ -46,7 +47,7 @@ class TestVCardProcessor(unittest.TestCase):
|
||||
# With country code and trunk zero
|
||||
("+55 0 27 1234-5678", "+55 27 1234-5678", None),
|
||||
]
|
||||
|
||||
|
||||
# Edge cases
|
||||
edge_cases = [
|
||||
# Too few digits
|
||||
@@ -60,19 +61,19 @@ class TestVCardProcessor(unittest.TestCase):
|
||||
# Unusual formatting but valid number
|
||||
("(+55) [27] 9.1234_5678", "+55 27 91234-5678", "+55 27 1234-5678"),
|
||||
]
|
||||
|
||||
|
||||
# Run tests for all cases
|
||||
all_cases = test_cases_9_digit + test_cases_8_digit + edge_cases
|
||||
|
||||
|
||||
for raw_phone, expected_orig, expected_mod in all_cases:
|
||||
with self.subTest(raw_phone=raw_phone):
|
||||
orig, mod = process_phone_number(raw_phone)
|
||||
self.assertEqual(orig, expected_orig)
|
||||
self.assertEqual(mod, expected_mod)
|
||||
|
||||
|
||||
def test_process_vcard(self):
|
||||
"""Test the process_vcard function with various VCARD formats."""
|
||||
|
||||
|
||||
# Test case 1: Standard TEL entries
|
||||
vcard1 = """BEGIN:VCARD
|
||||
VERSION:3.0
|
||||
@@ -202,26 +203,26 @@ END:VCARD
|
||||
(vcard5, expected5),
|
||||
(vcard6, expected6)
|
||||
]
|
||||
|
||||
|
||||
for i, (input_vcard, expected_output) in enumerate(test_cases):
|
||||
with self.subTest(case=i+1):
|
||||
# Create temporary files for input and output
|
||||
with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file:
|
||||
input_file.write(input_vcard)
|
||||
input_path = input_file.name
|
||||
|
||||
|
||||
output_path = input_path + '.out'
|
||||
|
||||
|
||||
try:
|
||||
# Process the VCARD
|
||||
process_vcard(input_path, output_path)
|
||||
|
||||
|
||||
# Read and verify the output
|
||||
with open(output_path, 'r', encoding='utf-8') as output_file:
|
||||
actual_output = output_file.read()
|
||||
|
||||
self.assertEqual(actual_output, expected_output)
|
||||
|
||||
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
if os.path.exists(input_path):
|
||||
@@ -231,7 +232,7 @@ END:VCARD
|
||||
|
||||
def test_script_argument_handling(self):
|
||||
"""Test the script's command-line argument handling."""
|
||||
|
||||
|
||||
test_input = """BEGIN:VCARD
|
||||
VERSION:3.0
|
||||
N:Test;User;;;
|
||||
@@ -239,16 +240,17 @@ FN:User Test
|
||||
TEL:+5527912345678
|
||||
END:VCARD
|
||||
"""
|
||||
|
||||
|
||||
# Create a temporary input file
|
||||
with tempfile.NamedTemporaryFile(mode='w+', delete=False, encoding='utf-8') as input_file:
|
||||
input_file.write(test_input)
|
||||
input_path = input_file.name
|
||||
|
||||
|
||||
output_path = input_path + '.out'
|
||||
|
||||
|
||||
try:
|
||||
test_args = ['python' if os.name == 'nt' else 'python3', 'scripts/brazilian_number_processing.py', input_path, output_path]
|
||||
test_args = ['python' if os.name == 'nt' else 'python3',
|
||||
'scripts/brazilian_number_processing.py', input_path, output_path]
|
||||
# We're just testing that the argument parsing works
|
||||
subprocess.call(
|
||||
test_args,
|
||||
@@ -257,7 +259,7 @@ END:VCARD
|
||||
)
|
||||
# Check if the output file was created
|
||||
self.assertTrue(os.path.exists(output_path))
|
||||
|
||||
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
if os.path.exists(input_path):
|
||||
@@ -265,5 +267,6 @@ END:VCARD
|
||||
if os.path.exists(output_path):
|
||||
os.unlink(output_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -178,14 +178,14 @@ def test_incremental_merge_new_file(mock_filesystem):
|
||||
source_dir = "/source"
|
||||
target_dir = "/target"
|
||||
media_dir = "media"
|
||||
|
||||
|
||||
# Setup mock filesystem
|
||||
mock_filesystem["exists"].side_effect = lambda x: x == "/source"
|
||||
mock_filesystem["listdir"].return_value = ["chat.json"]
|
||||
|
||||
|
||||
# Run the function
|
||||
incremental_merge(source_dir, target_dir, media_dir, 2, True)
|
||||
|
||||
|
||||
# Verify the operations
|
||||
mock_filesystem["makedirs"].assert_called_once_with(target_dir, exist_ok=True)
|
||||
mock_filesystem["copy2"].assert_called_once_with(
|
||||
|
||||
@@ -43,9 +43,9 @@ def test_nuitka_binary():
|
||||
"--assume-yes-for-downloads",
|
||||
"--follow-imports",
|
||||
"Whatsapp_Chat_Exporter/__main__.py",
|
||||
"--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility
|
||||
"--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility
|
||||
]
|
||||
|
||||
|
||||
compile_result = subprocess.run(
|
||||
nuitka_command,
|
||||
capture_output=True,
|
||||
|
||||
@@ -8,12 +8,15 @@ def test_readVCardsFile():
|
||||
data_dir = os.path.join(os.path.dirname(__file__), "data")
|
||||
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0
|
||||
|
||||
|
||||
def test_create_number_to_name_dicts():
|
||||
pass
|
||||
|
||||
|
||||
def test_fuzzy_match_numbers():
|
||||
pass
|
||||
|
||||
|
||||
def test_normalize_number():
|
||||
assert normalize_number('0531234567', '1') == '1531234567'
|
||||
assert normalize_number('001531234567', '2') == '1531234567'
|
||||
|
||||
Reference in New Issue
Block a user