This commit is contained in:
KnugiHK
2025-05-11 18:07:51 +08:00
parent cc410b8503
commit 33149075d3
14 changed files with 372 additions and 311 deletions

View File

@@ -452,7 +452,8 @@ def decrypt_android_backup(args) -> int:
elif "crypt15" in args.backup: elif "crypt15" in args.backup:
crypt = Crypt.CRYPT15 crypt = Crypt.CRYPT15
else: else:
logger.error(f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}") logger.error(
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
return 1 return 1
# Get key # Get key

View File

@@ -121,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
) )
return db return db
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes: def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
"""Decrypt a crypt14 database using multithreading for brute-force offset detection. """Decrypt a crypt14 database using multithreading for brute-force offset detection.
@@ -194,7 +195,8 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
return db return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor: with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations} future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try: try:
for future in concurrent.futures.as_completed(future_to_offset): for future in concurrent.futures.as_completed(future_to_offset):
@@ -217,7 +219,6 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
raise OffsetNotFoundError("Could not find the correct offsets for decryption.") raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes: def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database. """Decrypt a crypt12 database.
@@ -319,7 +320,7 @@ def decrypt_backup(
if crypt is not Crypt.CRYPT15 and len(key) != 158: if crypt is not Crypt.CRYPT15 and len(key) != 158:
raise InvalidKeyError("The key file must be 158 bytes") raise InvalidKeyError("The key file must be 158 bytes")
#signature check, this is check is used in crypt 12 and 14 # signature check, this is check is used in crypt 12 and 14
if crypt != Crypt.CRYPT15: if crypt != Crypt.CRYPT15:
t1 = key[30:62] t1 = key[30:62]
@@ -329,7 +330,6 @@ def decrypt_backup(
if t1 != database[3:35] and crypt == Crypt.CRYPT12: if t1 != database[3:35] and crypt == Crypt.CRYPT12:
raise ValueError("The signature of key file and backup file mismatch") raise ValueError("The signature of key file and backup file mismatch")
if crypt == Crypt.CRYPT15: if crypt == Crypt.CRYPT15:
if keyfile_stream: if keyfile_stream:
main_key, hex_key = _extract_enc_key(key) main_key, hex_key = _extract_enc_key(key)
@@ -353,7 +353,6 @@ def decrypt_backup(
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e: except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
raise DecryptionError(f"Decryption failed: {e}") from e raise DecryptionError(f"Decryption failed: {e}") from e
if not dry_run: if not dry_run:
with open(output, "wb") as f: with open(output, "wb") as f:
f.write(db) f.write(db)

View File

@@ -37,9 +37,11 @@ def contacts(db, data, enrich_from_vcards):
if total_row_number == 0: if total_row_number == 0:
if enrich_from_vcards is not None: if enrich_from_vcards is not None:
logger.info("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.") logger.info(
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
else: else:
logger.warning("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google") logger.warning(
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
return False return False
else: else:
logger.info(f"Processed {total_row_number} contacts\n") logger.info(f"Processed {total_row_number} contacts\n")
@@ -106,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
try: try:
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM messages FROM messages
@@ -123,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message FROM message
@@ -146,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy database schema.""" """Get cursor for legacy database schema."""
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android") filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
cursor.execute(f"""SELECT messages.key_remote_jid, cursor.execute(f"""SELECT messages.key_remote_jid,
messages._id, messages._id,
@@ -209,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new database schema.""" """Get cursor for new database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid, cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
message._id, message._id,
@@ -295,7 +305,8 @@ def _process_single_message(data, content, table_message, timezone_offset):
# Get or create the chat # Get or create the chat
if not data.get_chat(content["key_remote_jid"]): if not data.get_chat(content["key_remote_jid"]):
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"])) current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
Device.ANDROID, content["chat_subject"]))
else: else:
current_chat = data.get_chat(content["key_remote_jid"]) current_chat = data.get_chat(content["key_remote_jid"])
@@ -519,8 +530,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
try: try:
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message_media FROM message_media
@@ -538,8 +551,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError: except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count() cursor.execute(f"""SELECT count()
FROM message_media FROM message_media
@@ -563,8 +578,10 @@ def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy media database schema.""" """Get cursor for legacy media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT messages.key_remote_jid, cursor.execute(f"""SELECT messages.key_remote_jid,
message_row_id, message_row_id,
@@ -596,8 +613,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new media database schema.""" """Get cursor for new media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") include_filter = get_chat_condition(
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid, cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
message_row_id, message_row_id,
@@ -696,8 +715,10 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema.""" """Execute vCard query for modern WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android") filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
@@ -726,8 +747,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for legacy WhatsApp database schema.""" """Execute vCard query for legacy WhatsApp database schema."""
# Build the filter conditions # Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android") filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else '' date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast") empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
@@ -918,7 +941,7 @@ def create_html(
no_avatar=False, no_avatar=False,
experimental=False, experimental=False,
headline=None headline=None
): ):
"""Generate HTML chat files from data.""" """Generate HTML chat files from data."""
template = setup_template(template, no_avatar, experimental) template = setup_template(template, no_avatar, experimental)

View File

@@ -24,6 +24,7 @@ import struct
import codecs import codecs
from datetime import datetime, timedelta from datetime import datetime, timedelta
class BPListWriter(object): class BPListWriter(object):
def __init__(self, objects): def __init__(self, objects):
self.bplist = "" self.bplist = ""
@@ -57,6 +58,7 @@ class BPListWriter(object):
else: else:
raise Exception('BPlist not yet generated') raise Exception('BPlist not yet generated')
class BPListReader(object): class BPListReader(object):
def __init__(self, s): def __init__(self, s):
self.data = s self.data = s
@@ -135,7 +137,7 @@ class BPListReader(object):
def __unpackDate(self, offset): def __unpackDate(self, offset):
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0]) td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td) return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
def __unpackItem(self, offset): def __unpackItem(self, offset):
'''__unpackItem(offset) '''__unpackItem(offset)
@@ -154,44 +156,53 @@ class BPListReader(object):
elif obj_info == 0x0F: # fill 0000 1111 // fill byte elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
else: else:
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed') raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackInt(offset) return self.__unpackInt(offset)
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackFloat(offset) return self.__unpackFloat(offset)
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
return self.__unpackDate(offset) return self.__unpackDate(offset)
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
elif obj_type == 0x40:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] # XXX: we return data as str return self.data[objref:objref+obj_count] # XXX: we return data as str
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
elif obj_type == 0x50:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] return self.data[objref:objref+obj_count]
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
elif obj_type == 0x60:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count*2].decode('utf-16be') return self.data[objref:objref+obj_count*2].decode('utf-16be')
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
# FIXME: Accept as a string for now # FIXME: Accept as a string for now
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] return self.data[objref:objref+obj_count]
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xA0:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
arr = [] arr = []
for i in range(obj_count): for i in range(obj_count):
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) arr.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
return arr return arr
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xC0:
# XXX: not serializable via apple implementation # XXX: not serializable via apple implementation
raise Exception("0xC0 Not Implemented") # FIXME: implement raise Exception("0xC0 Not Implemented") # FIXME: implement
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xD0:
obj_count, objref = self.__resolveIntSize(obj_info, offset) obj_count, objref = self.__resolveIntSize(obj_info, offset)
keys = [] keys = []
for i in range(obj_count): for i in range(obj_count):
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) keys.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values = [] values = []
objref += obj_count*self.object_ref_size objref += obj_count*self.object_ref_size
for i in range(obj_count): for i in range(obj_count):
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size])) values.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
dic = {} dic = {}
for i in range(obj_count): for i in range(obj_count):
dic[keys[i]] = values[i] dic[keys[i]] = values[i]
@@ -212,7 +223,7 @@ class BPListReader(object):
return newArr return newArr
if type(obj) == dict: if type(obj) == dict:
newDic = {} newDic = {}
for k,v in obj.items(): for k, v in obj.items():
key_resolved = self.__resolveObject(k) key_resolved = self.__resolveObject(k)
if isinstance(key_resolved, str): if isinstance(key_resolved, str):
rk = key_resolved rk = key_resolved
@@ -232,8 +243,9 @@ class BPListReader(object):
raise Exception('Bad magic') raise Exception('Bad magic')
# read trailer # read trailer
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:]) self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset '!6xBB4xI4xI4xI', self.data[-32:])
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
# read offset table # read offset table
self.offset_table = self.data[self.table_offset:-32] self.offset_table = self.data[self.table_offset:-32]
@@ -243,19 +255,19 @@ class BPListReader(object):
offset_entry = ot[:self.offset_size] offset_entry = ot[:self.offset_size]
ot = ot[self.offset_size:] ot = ot[self.offset_size:]
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry)) self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
#print "** plist offsets:",self.offsets # print "** plist offsets:",self.offsets
# read object table # read object table
self.objects = [] self.objects = []
k = 0 k = 0
for i in self.offsets: for i in self.offsets:
obj = self.__unpackItem(i) obj = self.__unpackItem(i)
#print "** plist unpacked",k,type(obj),obj,"at",i # print "** plist unpacked",k,type(obj),obj,"at",i
k += 1 k += 1
self.objects.append(obj) self.objects.append(obj)
# rebuild object tree # rebuild object tree
#for i in range(len(self.objects)): # for i in range(len(self.objects)):
# self.__resolveObject(i) # self.__resolveObject(i)
# return root object # return root object
@@ -267,16 +279,21 @@ class BPListReader(object):
return parser.parse() return parser.parse()
# helpers for testing # helpers for testing
def plist(obj): def plist(obj):
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0 from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None) b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(
obj, NSPropertyListBinaryFormat_v1_0, 0, None)
return str(b.bytes()) return str(b.bytes())
def unplist(s): def unplist(s):
from Foundation import NSData, NSPropertyListSerialization from Foundation import NSData, NSPropertyListSerialization
d = NSData.dataWithBytes_length_(s, len(s)) d = NSData.dataWithBytes_length_(s, len(s))
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None) return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
if __name__ == "__main__": if __name__ == "__main__":
import os import os
import sys import sys

View File

@@ -66,8 +66,10 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
cursor2 = db.cursor() cursor2 = db.cursor()
# Build the chat filter conditions # Build the chat filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Process contacts first # Process contacts first
@@ -314,8 +316,10 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Get media count # Get media count
@@ -398,7 +402,8 @@ def process_media_item(content, data, media_folder, mime, separate_media):
# Handle separate media option # Handle separate media option
if separate_media: if separate_media:
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True) chat_display_name = slugify(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
current_filename = file_path.split("/")[-1] current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name) new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True) Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -421,8 +426,10 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios") filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else '' date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Fetch vCard mentions # Fetch vCard mentions
@@ -485,7 +492,8 @@ def process_vcard_item(content, path, data):
# Create vCard summary and update message # Create vCard summary and update message
vcard_summary = "This media include the following vCard file(s):<br>" vcard_summary = "This media include the following vCard file(s):<br>"
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)]) vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
fp in zip(vcard_names, file_paths)])
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"]) message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
message.data = vcard_summary message.data = vcard_summary
@@ -500,8 +508,10 @@ def calls(db, data, timezone_offset, filter_chat):
c = db.cursor() c = db.cursor()
# Build filter conditions # Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios") chat_filter_include = get_chat_condition(
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios") filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
# Get call count # Get call count
call_count_query = f""" call_count_query = f"""

View File

@@ -18,6 +18,7 @@ else:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class BackupExtractor: class BackupExtractor:
""" """
A class to handle the extraction of WhatsApp data from iOS backups, A class to handle the extraction of WhatsApp data from iOS backups,
@@ -234,4 +235,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
""" """
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size) extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
extractor.extract() extractor.extract()

View File

@@ -33,6 +33,7 @@ CLEAR_LINE = "\x1b[K\n"
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def convert_time_unit(time_second: int) -> str: def convert_time_unit(time_second: int) -> str:
"""Converts a time duration in seconds to a human-readable string. """Converts a time duration in seconds to a human-readable string.

View File

@@ -6,6 +6,7 @@ Contributed by @magpires https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issue
import re import re
import argparse import argparse
def process_phone_number(raw_phone): def process_phone_number(raw_phone):
""" """
Process the raw phone string from the VCARD and return two formatted numbers: Process the raw phone string from the VCARD and return two formatted numbers:
@@ -70,6 +71,7 @@ def process_phone_number(raw_phone):
return original_formatted, modified_formatted return original_formatted, modified_formatted
def process_vcard(input_vcard, output_vcard): def process_vcard(input_vcard, output_vcard):
""" """
Process a VCARD file to standardize telephone entries and add a second TEL line Process a VCARD file to standardize telephone entries and add a second TEL line
@@ -103,6 +105,7 @@ def process_vcard(input_vcard, output_vcard):
with open(output_vcard, 'w', encoding='utf-8') as file: with open(output_vcard, 'w', encoding='utf-8') as file:
file.writelines(output_lines) file.writelines(output_lines)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers." description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."

View File

@@ -26,6 +26,7 @@ def _extract_encrypted_key(keyfile):
return _generate_hmac_of_hmac(key_stream) return _generate_hmac_of_hmac(key_stream)
if __name__ == "__main__": if __name__ == "__main__":
key = open("encrypted_backup.key", "rb").read() key = open("encrypted_backup.key", "rb").read()
database = open("wa.db.crypt15", "rb").read() database = open("wa.db.crypt15", "rb").read()

View File

@@ -6,6 +6,7 @@ from unittest.mock import patch
from scripts.brazilian_number_processing import process_phone_number, process_vcard from scripts.brazilian_number_processing import process_phone_number, process_vcard
class TestVCardProcessor(unittest.TestCase): class TestVCardProcessor(unittest.TestCase):
def test_process_phone_number(self): def test_process_phone_number(self):
@@ -248,7 +249,8 @@ END:VCARD
output_path = input_path + '.out' output_path = input_path + '.out'
try: try:
test_args = ['python' if os.name == 'nt' else 'python3', 'scripts/brazilian_number_processing.py', input_path, output_path] test_args = ['python' if os.name == 'nt' else 'python3',
'scripts/brazilian_number_processing.py', input_path, output_path]
# We're just testing that the argument parsing works # We're just testing that the argument parsing works
subprocess.call( subprocess.call(
test_args, test_args,
@@ -265,5 +267,6 @@ END:VCARD
if os.path.exists(output_path): if os.path.exists(output_path):
os.unlink(output_path) os.unlink(output_path)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -8,12 +8,15 @@ def test_readVCardsFile():
data_dir = os.path.join(os.path.dirname(__file__), "data") data_dir = os.path.join(os.path.dirname(__file__), "data")
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0 assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0
def test_create_number_to_name_dicts(): def test_create_number_to_name_dicts():
pass pass
def test_fuzzy_match_numbers(): def test_fuzzy_match_numbers():
pass pass
def test_normalize_number(): def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567' assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567' assert normalize_number('001531234567', '2') == '1531234567'