mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-28 21:30:43 +00:00
autopep8
This commit is contained in:
@@ -452,7 +452,8 @@ def decrypt_android_backup(args) -> int:
|
|||||||
elif "crypt15" in args.backup:
|
elif "crypt15" in args.backup:
|
||||||
crypt = Crypt.CRYPT15
|
crypt = Crypt.CRYPT15
|
||||||
else:
|
else:
|
||||||
logger.error(f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
|
logger.error(
|
||||||
|
f"Unknown backup format. The backup file must be crypt12, crypt14 or crypt15.{CLEAR_LINE}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Get key
|
# Get key
|
||||||
|
|||||||
@@ -121,6 +121,7 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
|
|||||||
)
|
)
|
||||||
return db
|
return db
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
|
def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> bytes:
|
||||||
"""Decrypt a crypt14 database using multithreading for brute-force offset detection.
|
"""Decrypt a crypt14 database using multithreading for brute-force offset detection.
|
||||||
|
|
||||||
@@ -194,7 +195,8 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
|
|||||||
return db
|
return db
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
|
||||||
future_to_offset = {executor.submit(attempt_decrypt, offset): offset for offset in offset_combinations}
|
future_to_offset = {executor.submit(attempt_decrypt, offset)
|
||||||
|
: offset for offset in offset_combinations}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for future in concurrent.futures.as_completed(future_to_offset):
|
for future in concurrent.futures.as_completed(future_to_offset):
|
||||||
@@ -217,7 +219,6 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
|
|||||||
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
|
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
|
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
|
||||||
"""Decrypt a crypt12 database.
|
"""Decrypt a crypt12 database.
|
||||||
|
|
||||||
@@ -319,7 +320,7 @@ def decrypt_backup(
|
|||||||
if crypt is not Crypt.CRYPT15 and len(key) != 158:
|
if crypt is not Crypt.CRYPT15 and len(key) != 158:
|
||||||
raise InvalidKeyError("The key file must be 158 bytes")
|
raise InvalidKeyError("The key file must be 158 bytes")
|
||||||
|
|
||||||
#signature check, this is check is used in crypt 12 and 14
|
# signature check, this is check is used in crypt 12 and 14
|
||||||
if crypt != Crypt.CRYPT15:
|
if crypt != Crypt.CRYPT15:
|
||||||
t1 = key[30:62]
|
t1 = key[30:62]
|
||||||
|
|
||||||
@@ -329,7 +330,6 @@ def decrypt_backup(
|
|||||||
if t1 != database[3:35] and crypt == Crypt.CRYPT12:
|
if t1 != database[3:35] and crypt == Crypt.CRYPT12:
|
||||||
raise ValueError("The signature of key file and backup file mismatch")
|
raise ValueError("The signature of key file and backup file mismatch")
|
||||||
|
|
||||||
|
|
||||||
if crypt == Crypt.CRYPT15:
|
if crypt == Crypt.CRYPT15:
|
||||||
if keyfile_stream:
|
if keyfile_stream:
|
||||||
main_key, hex_key = _extract_enc_key(key)
|
main_key, hex_key = _extract_enc_key(key)
|
||||||
@@ -353,7 +353,6 @@ def decrypt_backup(
|
|||||||
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
|
except (InvalidFileFormatError, OffsetNotFoundError, ValueError) as e:
|
||||||
raise DecryptionError(f"Decryption failed: {e}") from e
|
raise DecryptionError(f"Decryption failed: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
with open(output, "wb") as f:
|
with open(output, "wb") as f:
|
||||||
f.write(db)
|
f.write(db)
|
||||||
|
|||||||
@@ -37,9 +37,11 @@ def contacts(db, data, enrich_from_vcards):
|
|||||||
|
|
||||||
if total_row_number == 0:
|
if total_row_number == 0:
|
||||||
if enrich_from_vcards is not None:
|
if enrich_from_vcards is not None:
|
||||||
logger.info("No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
|
logger.info(
|
||||||
|
"No contacts profiles found in the default database, contacts will be imported from the specified vCard file.")
|
||||||
else:
|
else:
|
||||||
logger.warning("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
|
logger.warning(
|
||||||
|
"No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
logger.info(f"Processed {total_row_number} contacts\n")
|
logger.info(f"Processed {total_row_number} contacts\n")
|
||||||
@@ -106,8 +108,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
try:
|
try:
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
|
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
|
||||||
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT count()
|
cursor.execute(f"""SELECT count()
|
||||||
FROM messages
|
FROM messages
|
||||||
@@ -123,8 +127,10 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
||||||
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT count()
|
cursor.execute(f"""SELECT count()
|
||||||
FROM message
|
FROM message
|
||||||
@@ -146,8 +152,10 @@ def _get_messages_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
"""Get cursor for legacy database schema."""
|
"""Get cursor for legacy database schema."""
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
|
empty_filter = get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")
|
||||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT messages.key_remote_jid,
|
cursor.execute(f"""SELECT messages.key_remote_jid,
|
||||||
messages._id,
|
messages._id,
|
||||||
@@ -209,8 +217,10 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
"""Get cursor for new database schema."""
|
"""Get cursor for new database schema."""
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
|
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
|
||||||
message._id,
|
message._id,
|
||||||
@@ -295,7 +305,8 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
|||||||
|
|
||||||
# Get or create the chat
|
# Get or create the chat
|
||||||
if not data.get_chat(content["key_remote_jid"]):
|
if not data.get_chat(content["key_remote_jid"]):
|
||||||
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(Device.ANDROID, content["chat_subject"]))
|
current_chat = data.add_chat(content["key_remote_jid"], ChatStore(
|
||||||
|
Device.ANDROID, content["chat_subject"]))
|
||||||
else:
|
else:
|
||||||
current_chat = data.get_chat(content["key_remote_jid"])
|
current_chat = data.get_chat(content["key_remote_jid"])
|
||||||
|
|
||||||
@@ -519,8 +530,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
try:
|
try:
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
||||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT count()
|
cursor.execute(f"""SELECT count()
|
||||||
FROM message_media
|
FROM message_media
|
||||||
@@ -538,8 +551,10 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
||||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT count()
|
cursor.execute(f"""SELECT count()
|
||||||
FROM message_media
|
FROM message_media
|
||||||
@@ -563,8 +578,10 @@ def _get_media_cursor_legacy(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
"""Get cursor for legacy media database schema."""
|
"""Get cursor for legacy media database schema."""
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT messages.key_remote_jid,
|
cursor.execute(f"""SELECT messages.key_remote_jid,
|
||||||
message_row_id,
|
message_row_id,
|
||||||
@@ -596,8 +613,10 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
|||||||
"""Get cursor for new media database schema."""
|
"""Get cursor for new media database schema."""
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
include_filter = get_chat_condition(
|
||||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||||
|
exclude_filter = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||||
|
|
||||||
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
|
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
|
||||||
message_row_id,
|
message_row_id,
|
||||||
@@ -696,8 +715,10 @@ def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
|
|||||||
"""Execute vCard query for modern WhatsApp database schema."""
|
"""Execute vCard query for modern WhatsApp database schema."""
|
||||||
|
|
||||||
# Build the filter conditions
|
# Build the filter conditions
|
||||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
chat_filter_include = get_chat_condition(
|
||||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||||
|
chat_filter_exclude = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
||||||
|
|
||||||
@@ -726,8 +747,10 @@ def _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty):
|
|||||||
"""Execute vCard query for legacy WhatsApp database schema."""
|
"""Execute vCard query for legacy WhatsApp database schema."""
|
||||||
|
|
||||||
# Build the filter conditions
|
# Build the filter conditions
|
||||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
chat_filter_include = get_chat_condition(
|
||||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||||
|
chat_filter_exclude = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||||
|
|
||||||
@@ -918,7 +941,7 @@ def create_html(
|
|||||||
no_avatar=False,
|
no_avatar=False,
|
||||||
experimental=False,
|
experimental=False,
|
||||||
headline=None
|
headline=None
|
||||||
):
|
):
|
||||||
"""Generate HTML chat files from data."""
|
"""Generate HTML chat files from data."""
|
||||||
template = setup_template(template, no_avatar, experimental)
|
template = setup_template(template, no_avatar, experimental)
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import struct
|
|||||||
import codecs
|
import codecs
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
class BPListWriter(object):
|
class BPListWriter(object):
|
||||||
def __init__(self, objects):
|
def __init__(self, objects):
|
||||||
self.bplist = ""
|
self.bplist = ""
|
||||||
@@ -57,6 +58,7 @@ class BPListWriter(object):
|
|||||||
else:
|
else:
|
||||||
raise Exception('BPlist not yet generated')
|
raise Exception('BPlist not yet generated')
|
||||||
|
|
||||||
|
|
||||||
class BPListReader(object):
|
class BPListReader(object):
|
||||||
def __init__(self, s):
|
def __init__(self, s):
|
||||||
self.data = s
|
self.data = s
|
||||||
@@ -135,7 +137,7 @@ class BPListReader(object):
|
|||||||
|
|
||||||
def __unpackDate(self, offset):
|
def __unpackDate(self, offset):
|
||||||
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
|
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
|
||||||
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td)
|
return datetime(year=2001, month=1, day=1) + timedelta(seconds=td)
|
||||||
|
|
||||||
def __unpackItem(self, offset):
|
def __unpackItem(self, offset):
|
||||||
'''__unpackItem(offset)
|
'''__unpackItem(offset)
|
||||||
@@ -154,44 +156,53 @@ class BPListReader(object):
|
|||||||
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
|
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
|
||||||
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
|
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
|
||||||
else:
|
else:
|
||||||
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed')
|
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset) + 'failed')
|
||||||
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||||
return self.__unpackInt(offset)
|
return self.__unpackInt(offset)
|
||||||
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||||
return self.__unpackFloat(offset)
|
return self.__unpackFloat(offset)
|
||||||
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
|
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
|
||||||
return self.__unpackDate(offset)
|
return self.__unpackDate(offset)
|
||||||
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
|
# data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
|
||||||
|
elif obj_type == 0x40:
|
||||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||||
return self.data[objref:objref+obj_count] # XXX: we return data as str
|
return self.data[objref:objref+obj_count] # XXX: we return data as str
|
||||||
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
|
# string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
|
||||||
|
elif obj_type == 0x50:
|
||||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||||
return self.data[objref:objref+obj_count]
|
return self.data[objref:objref+obj_count]
|
||||||
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
|
# string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
|
||||||
|
elif obj_type == 0x60:
|
||||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||||
return self.data[objref:objref+obj_count*2].decode('utf-16be')
|
return self.data[objref:objref+obj_count*2].decode('utf-16be')
|
||||||
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
|
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
|
||||||
# FIXME: Accept as a string for now
|
# FIXME: Accept as a string for now
|
||||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||||
return self.data[objref:objref+obj_count]
|
return self.data[objref:objref+obj_count]
|
||||||
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
# array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||||
|
elif obj_type == 0xA0:
|
||||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||||
arr = []
|
arr = []
|
||||||
for i in range(obj_count):
|
for i in range(obj_count):
|
||||||
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
arr.append(self.__unpackIntStruct(
|
||||||
|
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||||
return arr
|
return arr
|
||||||
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
# set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||||
|
elif obj_type == 0xC0:
|
||||||
# XXX: not serializable via apple implementation
|
# XXX: not serializable via apple implementation
|
||||||
raise Exception("0xC0 Not Implemented") # FIXME: implement
|
raise Exception("0xC0 Not Implemented") # FIXME: implement
|
||||||
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
|
# dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
|
||||||
|
elif obj_type == 0xD0:
|
||||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||||
keys = []
|
keys = []
|
||||||
for i in range(obj_count):
|
for i in range(obj_count):
|
||||||
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
keys.append(self.__unpackIntStruct(
|
||||||
|
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||||
values = []
|
values = []
|
||||||
objref += obj_count*self.object_ref_size
|
objref += obj_count*self.object_ref_size
|
||||||
for i in range(obj_count):
|
for i in range(obj_count):
|
||||||
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
values.append(self.__unpackIntStruct(
|
||||||
|
self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||||
dic = {}
|
dic = {}
|
||||||
for i in range(obj_count):
|
for i in range(obj_count):
|
||||||
dic[keys[i]] = values[i]
|
dic[keys[i]] = values[i]
|
||||||
@@ -212,7 +223,7 @@ class BPListReader(object):
|
|||||||
return newArr
|
return newArr
|
||||||
if type(obj) == dict:
|
if type(obj) == dict:
|
||||||
newDic = {}
|
newDic = {}
|
||||||
for k,v in obj.items():
|
for k, v in obj.items():
|
||||||
key_resolved = self.__resolveObject(k)
|
key_resolved = self.__resolveObject(k)
|
||||||
if isinstance(key_resolved, str):
|
if isinstance(key_resolved, str):
|
||||||
rk = key_resolved
|
rk = key_resolved
|
||||||
@@ -232,8 +243,9 @@ class BPListReader(object):
|
|||||||
raise Exception('Bad magic')
|
raise Exception('Bad magic')
|
||||||
|
|
||||||
# read trailer
|
# read trailer
|
||||||
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:])
|
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack(
|
||||||
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
|
'!6xBB4xI4xI4xI', self.data[-32:])
|
||||||
|
# print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
|
||||||
|
|
||||||
# read offset table
|
# read offset table
|
||||||
self.offset_table = self.data[self.table_offset:-32]
|
self.offset_table = self.data[self.table_offset:-32]
|
||||||
@@ -243,19 +255,19 @@ class BPListReader(object):
|
|||||||
offset_entry = ot[:self.offset_size]
|
offset_entry = ot[:self.offset_size]
|
||||||
ot = ot[self.offset_size:]
|
ot = ot[self.offset_size:]
|
||||||
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
|
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
|
||||||
#print "** plist offsets:",self.offsets
|
# print "** plist offsets:",self.offsets
|
||||||
|
|
||||||
# read object table
|
# read object table
|
||||||
self.objects = []
|
self.objects = []
|
||||||
k = 0
|
k = 0
|
||||||
for i in self.offsets:
|
for i in self.offsets:
|
||||||
obj = self.__unpackItem(i)
|
obj = self.__unpackItem(i)
|
||||||
#print "** plist unpacked",k,type(obj),obj,"at",i
|
# print "** plist unpacked",k,type(obj),obj,"at",i
|
||||||
k += 1
|
k += 1
|
||||||
self.objects.append(obj)
|
self.objects.append(obj)
|
||||||
|
|
||||||
# rebuild object tree
|
# rebuild object tree
|
||||||
#for i in range(len(self.objects)):
|
# for i in range(len(self.objects)):
|
||||||
# self.__resolveObject(i)
|
# self.__resolveObject(i)
|
||||||
|
|
||||||
# return root object
|
# return root object
|
||||||
@@ -267,16 +279,21 @@ class BPListReader(object):
|
|||||||
return parser.parse()
|
return parser.parse()
|
||||||
|
|
||||||
# helpers for testing
|
# helpers for testing
|
||||||
|
|
||||||
|
|
||||||
def plist(obj):
|
def plist(obj):
|
||||||
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
|
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
|
||||||
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None)
|
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(
|
||||||
|
obj, NSPropertyListBinaryFormat_v1_0, 0, None)
|
||||||
return str(b.bytes())
|
return str(b.bytes())
|
||||||
|
|
||||||
|
|
||||||
def unplist(s):
|
def unplist(s):
|
||||||
from Foundation import NSData, NSPropertyListSerialization
|
from Foundation import NSData, NSPropertyListSerialization
|
||||||
d = NSData.dataWithBytes_length_(s, len(s))
|
d = NSData.dataWithBytes_length_(s, len(s))
|
||||||
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
|
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
@@ -66,8 +66,10 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
|||||||
cursor2 = db.cursor()
|
cursor2 = db.cursor()
|
||||||
|
|
||||||
# Build the chat filter conditions
|
# Build the chat filter conditions
|
||||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
chat_filter_include = get_chat_condition(
|
||||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||||
|
chat_filter_exclude = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||||
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
||||||
|
|
||||||
# Process contacts first
|
# Process contacts first
|
||||||
@@ -314,8 +316,10 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
|||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
|
|
||||||
# Build filter conditions
|
# Build filter conditions
|
||||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios")
|
chat_filter_include = get_chat_condition(
|
||||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||||
|
chat_filter_exclude = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||||
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
||||||
|
|
||||||
# Get media count
|
# Get media count
|
||||||
@@ -398,7 +402,8 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
|||||||
|
|
||||||
# Handle separate media option
|
# Handle separate media option
|
||||||
if separate_media:
|
if separate_media:
|
||||||
chat_display_name = slugify(current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
|
chat_display_name = slugify(
|
||||||
|
current_chat.name or message.sender or content["ZCONTACTJID"].split('@')[0], True)
|
||||||
current_filename = file_path.split("/")[-1]
|
current_filename = file_path.split("/")[-1]
|
||||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||||
@@ -421,8 +426,10 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
|||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
|
|
||||||
# Build filter conditions
|
# Build filter conditions
|
||||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
chat_filter_include = get_chat_condition(
|
||||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||||
|
chat_filter_exclude = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")
|
||||||
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
date_filter = f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''
|
||||||
|
|
||||||
# Fetch vCard mentions
|
# Fetch vCard mentions
|
||||||
@@ -485,7 +492,8 @@ def process_vcard_item(content, path, data):
|
|||||||
|
|
||||||
# Create vCard summary and update message
|
# Create vCard summary and update message
|
||||||
vcard_summary = "This media include the following vCard file(s):<br>"
|
vcard_summary = "This media include the following vCard file(s):<br>"
|
||||||
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)])
|
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name,
|
||||||
|
fp in zip(vcard_names, file_paths)])
|
||||||
|
|
||||||
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
|
message = data.get_chat(content["ZCONTACTJID"]).get_message(content["ZMESSAGE"])
|
||||||
message.data = vcard_summary
|
message.data = vcard_summary
|
||||||
@@ -500,8 +508,10 @@ def calls(db, data, timezone_offset, filter_chat):
|
|||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
|
|
||||||
# Build filter conditions
|
# Build filter conditions
|
||||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
chat_filter_include = get_chat_condition(
|
||||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
filter_chat[0], True, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
||||||
|
chat_filter_exclude = get_chat_condition(
|
||||||
|
filter_chat[1], False, ["ZGROUPCALLCREATORUSERJIDSTRING"], None, "ios")
|
||||||
|
|
||||||
# Get call count
|
# Get call count
|
||||||
call_count_query = f"""
|
call_count_query = f"""
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ else:
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class BackupExtractor:
|
class BackupExtractor:
|
||||||
"""
|
"""
|
||||||
A class to handle the extraction of WhatsApp data from iOS backups,
|
A class to handle the extraction of WhatsApp data from iOS backups,
|
||||||
@@ -234,4 +235,3 @@ def extract_media(base_dir, identifiers, decrypt_chunk_size):
|
|||||||
"""
|
"""
|
||||||
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
|
extractor = BackupExtractor(base_dir, identifiers, decrypt_chunk_size)
|
||||||
extractor.extract()
|
extractor.extract()
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ CLEAR_LINE = "\x1b[K\n"
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def convert_time_unit(time_second: int) -> str:
|
def convert_time_unit(time_second: int) -> str:
|
||||||
"""Converts a time duration in seconds to a human-readable string.
|
"""Converts a time duration in seconds to a human-readable string.
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ Contributed by @magpires https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issue
|
|||||||
import re
|
import re
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
def process_phone_number(raw_phone):
|
def process_phone_number(raw_phone):
|
||||||
"""
|
"""
|
||||||
Process the raw phone string from the VCARD and return two formatted numbers:
|
Process the raw phone string from the VCARD and return two formatted numbers:
|
||||||
@@ -70,6 +71,7 @@ def process_phone_number(raw_phone):
|
|||||||
|
|
||||||
return original_formatted, modified_formatted
|
return original_formatted, modified_formatted
|
||||||
|
|
||||||
|
|
||||||
def process_vcard(input_vcard, output_vcard):
|
def process_vcard(input_vcard, output_vcard):
|
||||||
"""
|
"""
|
||||||
Process a VCARD file to standardize telephone entries and add a second TEL line
|
Process a VCARD file to standardize telephone entries and add a second TEL line
|
||||||
@@ -103,6 +105,7 @@ def process_vcard(input_vcard, output_vcard):
|
|||||||
with open(output_vcard, 'w', encoding='utf-8') as file:
|
with open(output_vcard, 'w', encoding='utf-8') as file:
|
||||||
file.writelines(output_lines)
|
file.writelines(output_lines)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."
|
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ def _extract_encrypted_key(keyfile):
|
|||||||
|
|
||||||
return _generate_hmac_of_hmac(key_stream)
|
return _generate_hmac_of_hmac(key_stream)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
key = open("encrypted_backup.key", "rb").read()
|
key = open("encrypted_backup.key", "rb").read()
|
||||||
database = open("wa.db.crypt15", "rb").read()
|
database = open("wa.db.crypt15", "rb").read()
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
from scripts.brazilian_number_processing import process_phone_number, process_vcard
|
from scripts.brazilian_number_processing import process_phone_number, process_vcard
|
||||||
|
|
||||||
|
|
||||||
class TestVCardProcessor(unittest.TestCase):
|
class TestVCardProcessor(unittest.TestCase):
|
||||||
|
|
||||||
def test_process_phone_number(self):
|
def test_process_phone_number(self):
|
||||||
@@ -248,7 +249,8 @@ END:VCARD
|
|||||||
output_path = input_path + '.out'
|
output_path = input_path + '.out'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
test_args = ['python' if os.name == 'nt' else 'python3', 'scripts/brazilian_number_processing.py', input_path, output_path]
|
test_args = ['python' if os.name == 'nt' else 'python3',
|
||||||
|
'scripts/brazilian_number_processing.py', input_path, output_path]
|
||||||
# We're just testing that the argument parsing works
|
# We're just testing that the argument parsing works
|
||||||
subprocess.call(
|
subprocess.call(
|
||||||
test_args,
|
test_args,
|
||||||
@@ -265,5 +267,6 @@ END:VCARD
|
|||||||
if os.path.exists(output_path):
|
if os.path.exists(output_path):
|
||||||
os.unlink(output_path)
|
os.unlink(output_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -8,12 +8,15 @@ def test_readVCardsFile():
|
|||||||
data_dir = os.path.join(os.path.dirname(__file__), "data")
|
data_dir = os.path.join(os.path.dirname(__file__), "data")
|
||||||
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0
|
assert len(read_vcards_file(os.path.join(data_dir, "contacts.vcf"), "852")) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_create_number_to_name_dicts():
|
def test_create_number_to_name_dicts():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def test_fuzzy_match_numbers():
|
def test_fuzzy_match_numbers():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_number():
|
def test_normalize_number():
|
||||||
assert normalize_number('0531234567', '1') == '1531234567'
|
assert normalize_number('0531234567', '1') == '1531234567'
|
||||||
assert normalize_number('001531234567', '2') == '1531234567'
|
assert normalize_number('001531234567', '2') == '1531234567'
|
||||||
|
|||||||
Reference in New Issue
Block a user