This commit is contained in:
KnugiHK
2025-05-11 18:07:51 +08:00
parent cc410b8503
commit 33149075d3
14 changed files with 372 additions and 311 deletions

View File

@@ -452,7 +452,8 @@ def decrypt_android_backup(args) -> int:
elif "crypt15" in args.backup:
crypt = Crypt.CRYPT15
else:
logger.error(f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
logger.error(
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
return 1
# Get key
@@ -505,11 +506,11 @@ def handle_decrypt_error(error: int) -> None:
"""Handle decryption errors with appropriate messages."""
if error == 1:
logger.error("Dependencies of decrypt_backup and/or extract_encrypted_key"
" are not present. For details, see README.md.\n")
" are not present. For details, see README.md.\n")
exit(3)
elif error == 2:
logger.error("Failed when decompressing the decrypted backup. "
"Possibly incorrect offsets used in decryption.\n")
"Possibly incorrect offsets used in decryption.\n")
exit(4)
else:
logger.error("Unknown error occurred.\n")
@@ -598,7 +599,7 @@ def handle_media_directory(args) -> None:
logger.info(f"Media directory has been moved to the output directory{CLEAR_LINE}")
except PermissionError:
logger.warning("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?\n")
"Perhaps the directory is opened?\n")
else:
logger.info(f"Copying media directory...\r")
shutil.copytree(args.media, media_path)

View File

@@ -121,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
)
return db
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
"""Decrypt a crypt14 database using multithreading for brute-force offset detection.
@@ -194,7 +195,8 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations}
future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try:
for future in concurrent.futures.as_completed(future_to_offset):
@@ -217,7 +219,6 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database.
@@ -319,7 +320,7 @@ def decrypt_backup(
if crypt is not Crypt.CRYPT15 and len(key) != 158:
raise InvalidKeyError("The key file must be 158 bytes")
#signature check, this is check is used in crypt 12 and 14
# signature check, this is check is used in crypt 12 and 14
if crypt != Crypt.CRYPT15:
t1 = key[30:62]
@@ -329,7 +330,6 @@ def decrypt_backup(
if t1 != database[3:35] and crypt == Crypt.CRYPT12:
raise ValueError("The signature of key file and backup file mismatch")
if crypt == Crypt.CRYPT15:
if keyfile_stream:
main_key, hex_key = _extract_enc_key(key)
@@ -353,7 +353,6 @@ def decrypt_backup(
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
raise DecryptionError(f"Decryption failed: {e}") from e
if not dry_run:
with open(output, "wb") as f:
f.write(db)

View File

@@ -37,9 +37,11 @@ def contacts(db, data, enrich_from_vcards):
if total_row_number == 0:
if enrich_from_vcards is not None:
logger.info("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
logger.info(
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
else:
logger.warning("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
logger.warning(
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
return False
else:
logger.info(f"Processed {total_row_number} contacts\n")
@@ -106,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
try:
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM messages
@@ -123,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM message
@@ -146,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy database schema."""
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
cursor.execute(f"""SELECT messages.key_remote_jid,
messages._id,
@@ -209,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
message._id,
@@ -295,7 +305,8 @@ def _process_single_message(data, content, table_message, timezone_offset):
# Get or create the chat
if not data.get_chat(content["key_remote_jid"]):
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"]))
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
Device.ANDROID, content["chat_subject"]))
else:
current_chat = data.get_chat(content["key_remote_jid"])
@@ -519,8 +530,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
try:
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM message_media
@@ -538,8 +551,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
except sqlite3.OperationalError:
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT count()
FROM message_media
@@ -563,8 +578,10 @@ def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for legacy media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
cursor.execute(f"""SELECT messages.key_remote_jid,
message_row_id,
@@ -596,8 +613,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
"""Get cursor for new media database schema."""
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
include_filter = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
exclude_filter = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
message_row_id,
@@ -650,7 +669,7 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
# Copy media to separate folder if needed
if separate_media:
chat_display_name = slugify(current_chat.name or message.sender
or content["key_remote_jid"].split('@')[0], True)
or content["key_remote_jid"].split('@')[0], True)
current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -696,8 +715,10 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema."""
# Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
@@ -726,8 +747,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for legacy WhatsApp database schema."""
# Build the filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
@@ -909,16 +932,16 @@ def _construct_call_description(content, call):
def create_html(
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None,
no_avatar=False,
experimental=False,
headline=None
):
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None,
no_avatar=False,
experimental=False,
headline=None
):
"""Generate HTML chat files from data."""
template = setup_template(template, no_avatar, experimental)

View File

@@ -24,6 +24,7 @@ import struct
import codecs
from datetime import datetime, timedelta
class BPListWriter(object):
def __init__(self, objects):
self.bplist = ""
@@ -57,6 +58,7 @@ class BPListWriter(object):
else:
raise Exception('BPlist not yet generated')
class BPListReader(object):
def __init__(self, s):
self.data = s
@@ -68,7 +70,7 @@ class BPListReader(object):
Unpacks the integer of given size (1, 2 or 4 bytes) from string
'''
if sz == 1:
if sz == 1:
ot = '!B'
elif sz == 2:
ot = '!H'
@@ -115,7 +117,7 @@ class BPListReader(object):
Unpacks the float of given size (4 or 8 bytes) from string
'''
if sz == 4:
if sz == 4:
ot = '!f'
elif sz == 8:
ot = '!d'
@@ -135,7 +137,7 @@ class BPListReader(object):
def __unpackDate(self, offset):
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td)
return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
def __unpackItem(self, offset):
'''__unpackItem(offset)
@@ -144,54 +146,63 @@ class BPListReader(object):
'''
obj_header = self.data[offset]
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
if obj_type == 0x00:
if obj_info == 0x00: # null 0000 0000
if obj_type == 0x00:
if obj_info == 0x00: # null 0000 0000
return None
elif obj_info == 0x08: # bool 0000 1000 // false
elif obj_info == 0x08: # bool 0000 1000 // false
return False
elif obj_info == 0x09: # bool 0000 1001 // true
elif obj_info == 0x09: # bool 0000 1001 // true
return True
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
else:
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackInt(offset)
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
return self.__unpackFloat(offset)
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
return self.__unpackDate(offset)
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
# data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
elif obj_type == 0x40:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count] # XXX: we return data as str
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
return self.data[objref:objref+obj_count] # XXX: we return data as str
# string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
elif obj_type == 0x50:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count]
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
# string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
elif obj_type == 0x60:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count*2].decode('utf-16be')
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
# FIXME: Accept as a string for now
obj_count, objref = self.__resolveIntSize(obj_info, offset)
return self.data[objref:objref+obj_count]
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
# array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xA0:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
arr = []
for i in range(obj_count):
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
arr.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
return arr
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
# set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xC0:
# XXX: not serializable via apple implementation
raise Exception("0xC0 Not Implemented") # FIXME: implement
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
raise Exception("0xC0 Not Implemented") # FIXME: implement
# dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
elif obj_type == 0xD0:
obj_count, objref = self.__resolveIntSize(obj_info, offset)
keys = []
for i in range(obj_count):
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
keys.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values = []
objref += obj_count*self.object_ref_size
for i in range(obj_count):
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
values.append(self.__unpackIntStruct(
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
dic = {}
for i in range(obj_count):
dic[keys[i]] = values[i]
@@ -212,7 +223,7 @@ class BPListReader(object):
return newArr
if type(obj) == dict:
newDic = {}
for k,v in obj.items():
for k, v in obj.items():
key_resolved = self.__resolveObject(k)
if isinstance(key_resolved, str):
rk = key_resolved
@@ -232,8 +243,9 @@ class BPListReader(object):
raise Exception('Bad magic')
# read trailer
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:])
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
'!6xBB4xI4xI4xI', self.data[-32:])
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
# read offset table
self.offset_table = self.data[self.table_offset:-32]
@@ -243,19 +255,19 @@ class BPListReader(object):
offset_entry = ot[:self.offset_size]
ot = ot[self.offset_size:]
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
#print "** plist offsets:",self.offsets
# print "** plist offsets:",self.offsets
# read object table
self.objects = []
k = 0
for i in self.offsets:
obj = self.__unpackItem(i)
#print "** plist unpacked",k,type(obj),obj,"at",i
# print "** plist unpacked",k,type(obj),obj,"at",i
k += 1
self.objects.append(obj)
# rebuild object tree
#for i in range(len(self.objects)):
# for i in range(len(self.objects)):
# self.__resolveObject(i)
# return root object
@@ -267,16 +279,21 @@ class BPListReader(object):
return parser.parse()
# helpers for testing
def plist(obj):
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None)
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(
obj, NSPropertyListBinaryFormat_v1_0, 0, None)
return str(b.bytes())
def unplist(s):
from Foundation import NSData, NSPropertyListSerialization
d = NSData.dataWithBytes_length_(s, len(s))
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
if __name__ == "__main__":
import os
import sys

View File

@@ -66,8 +66,10 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
cursor2 = db.cursor()
# Build the chat filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Process contacts first
@@ -314,8 +316,10 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
c = db.cursor()
# Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Get media count
@@ -398,7 +402,8 @@ def process_media_item(content, data, media_folder, mime, separate_media):
# Handle separate media option
if separate_media:
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
chat_display_name = slugify(
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
current_filename = file_path.split("/")[-1]
new_folder = os.path.join(media_folder, "separated", chat_display_name)
Path(new_folder).mkdir(parents=True, exist_ok=True)
@@ -421,8 +426,10 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
c = db.cursor()
# Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
# Fetch vCard mentions
@@ -485,7 +492,8 @@ def process_vcard_item(content, path, data):
# Create vCard summary and update message
vcard_summary = "This media include the following vCard file(s):<br>"
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)])
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
fp in zip(vcard_names, file_paths)])
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
message.data = vcard_summary
@@ -500,8 +508,10 @@ def calls(db, data, timezone_offset, filter_chat):
c = db.cursor()
# Build filter conditions
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_include = get_chat_condition(
filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
chat_filter_exclude = get_chat_condition(
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
# Get call count
call_count_query = f"""

View File

@@ -18,6 +18,7 @@ else:
logger = logging.getLogger(__name__)
class BackupExtractor:
"""
A class to handle the extraction of WhatsApp data from iOS backups,
@@ -61,9 +62,9 @@ class BackupExtractor:
"""
if not support_encrypted:
logger.error("You don't have the dependencies to handle encrypted backup."
"Read more on how to deal with encrypted backup:"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
"Read more on how to deal with encrypted backup:"
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
)
return
logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
@@ -234,4 +235,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
"""
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
extractor.extract()

View File

@@ -33,6 +33,7 @@ CLEAR_LINE = "\x1b[K\n"
logger = logging.getLogger(__name__)
def convert_time_unit(time_second: int) -> str:
"""Converts a time duration in seconds to a human-readable string.

View File

@@ -6,6 +6,7 @@ Contributed by @magpires https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issue
import re
import argparse
def process_phone_number(raw_phone):
"""
Process the raw phone string from the VCARD and return two formatted numbers:
@@ -70,6 +71,7 @@ def process_phone_number(raw_phone):
return original_formatted, modified_formatted
def process_vcard(input_vcard, output_vcard):
"""
Process a VCARD file to standardize telephone entries and add a second TEL line
@@ -103,6 +105,7 @@ def process_vcard(input_vcard, output_vcard):
with open(output_vcard, 'w', encoding='utf-8') as file:
file.writelines(output_lines)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."

View File

@@ -26,6 +26,7 @@ def _extract_encrypted_key(keyfile):
return _generate_hmac_of_hmac(key_stream)
if __name__ == "__main__":
key = open("encrypted_backup.key", "rb").read()
database = open("wa.db.crypt15", "rb").read()

View File

@@ -6,6 +6,7 @@ from unittest.mock import patch
from scripts.brazilian_number_processing import process_phone_number, process_vcard
class TestVCardProcessor(unittest.TestCase):
def test_process_phone_number(self):
@@ -248,7 +249,8 @@ END:VCARD
output_path = input_path + '.out'
try:
test_args = ['python' if os.name == 'nt' else 'python3', 'scripts/brazilian_number_processing.py', input_path, output_path]
test_args = ['python' if os.name == 'nt' else 'python3',
'scripts/brazilian_number_processing.py', input_path, output_path]
# We're just testing that the argument parsing works
subprocess.call(
test_args,
@@ -265,5 +267,6 @@ END:VCARD
if os.path.exists(output_path):
os.unlink(output_path)
if __name__ == '__main__':
unittest.main()

View File

@@ -43,7 +43,7 @@ def test_nuitka_binary():
"--assume-yes-for-downloads",
"--follow-imports",
"Whatsapp_Chat_Exporter/__main__.py",
"--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility
"--output-filename=wtsexporter.exe" # use .exe on all platforms for compatibility
]
compile_result = subprocess.run(

View File

@@ -8,12 +8,15 @@ def test_readVCardsFile():
data_dir = os.path.join(os.path.dirname(__file__), "data")
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0
def test_create_number_to_name_dicts():
pass
def test_fuzzy_match_numbers():
pass
def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567'