Refactor to use tqdm for showing progress

This commit is contained in:
KnugiHK
2026-01-17 13:18:31 +08:00
parent 1c7d6f7912
commit d200130335
7 changed files with 264 additions and 302 deletions

View File

@@ -19,6 +19,7 @@ from Whatsapp_Chat_Exporter.utility import telegram_json_format
from argparse import ArgumentParser, SUPPRESS
from datetime import datetime
from getpass import getpass
from tqdm import tqdm
from sys import exit
from typing import Optional, List, Dict
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
@@ -665,24 +666,27 @@ def export_multiple_json(args, data: Dict) -> None:
# Export each chat
total = len(data.keys())
for index, jik in enumerate(data.keys()):
if data[jik]["name"] is not None:
contact = data[jik]["name"].replace('/', '')
else:
contact = jik.replace('+', '')
with tqdm(total=total, desc="Generating JSON files", unit="file", leave=False) as pbar:
for jik in data.keys():
if data[jik]["name"] is not None:
contact = data[jik]["name"].replace('/', '')
else:
contact = jik.replace('+', '')
if args.telegram:
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
else:
messages = {jik: data[jik]}
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
file_content = json.dumps(
messages,
ensure_ascii=not args.avoid_encoding_json,
indent=args.pretty_print_json
)
f.write(file_content)
logger.info(f"Writing JSON file...({index + 1}/{total})\r")
if args.telegram:
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
else:
messages = {jik: data[jik]}
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
file_content = json.dumps(
messages,
ensure_ascii=not args.avoid_encoding_json,
indent=args.pretty_print_json
)
f.write(file_content)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Generated {total} JSON files in {total_time:.2f} seconds{CLEAR_LINE}")
def process_exported_chat(args, data: ChatCollection) -> None:

View File

@@ -1,13 +1,11 @@
import time
import hmac
import io
import logging
import threading
import zlib
import concurrent.futures
from tqdm import tqdm
from typing import Tuple, Union
from hashlib import sha256
from sys import exit
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType
try:
@@ -165,82 +163,64 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
# Attempt known offsets first
for offsets in CRYPT14_OFFSETS:
iv = database[offsets["iv"]:offsets["iv"] + 16]
db_ciphertext = database[offsets["db"]:]
iv = offsets["iv"]
db = offsets["db"]
try:
decrypted_db = _decrypt_database(db_ciphertext, main_key, iv)
decrypted_db = _attempt_decrypt_task((iv, iv + 16, db), database, main_key)
except (zlib.error, ValueError):
pass # Try next offset
continue
else:
logger.debug(
f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}"
f"Decryption successful with known offsets: IV {iv}, DB {db}{CLEAR_LINE}"
)
return decrypted_db # Successful decryption
def animate_message(stop_event):
base_msg = "Common offsets failed. Initiating brute-force with multithreading"
dots = ["", ".", "..", "..."]
i = 0
while not stop_event.is_set():
logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r")
time.sleep(0.3)
i += 1
logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}")
offset_max = 200
logger.info(f"Common offsets failed. Attempt to brute-force...{CLEAR_LINE}")
with tqdm(total=offset_max ** 2, desc="Brute-forcing offsets", unit="trial", leave=False) as pbar:
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
# Map futures to their offsets
future_to_offset = {
executor.submit(_attempt_decrypt_task, offset, database, main_key): offset
for offset in brute_force_offset(offset_max, offset_max)
}
stop_event = threading.Event()
anim_thread = threading.Thread(target=animate_message, args=(stop_event,))
anim_thread.start()
try:
for future in concurrent.futures.as_completed(future_to_offset):
pbar.update(1)
result = future.result()
if result is not None:
# Success! Shutdown other tasks immediately
executor.shutdown(wait=False, cancel_futures=True)
start_iv, _, start_db = future_to_offset[future]
logger.info(
f"The offsets of your IV and database are {start_iv} and "
f"{start_db}, respectively. To include your offsets in the "
"program, please report it by creating an issue on GitHub: "
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
f"\nShutting down other threads...{CLEAR_LINE}"
)
return result
# Convert brute force generator into a list for parallel processing
offset_combinations = list(brute_force_offset())
def attempt_decrypt(offset_tuple):
"""Attempt decryption with the given offsets."""
start_iv, end_iv, start_db = offset_tuple
iv = database[start_iv:end_iv]
db_ciphertext = database[start_db:]
logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}")
try:
db = _decrypt_database(db_ciphertext, main_key, iv)
except (zlib.error, ValueError):
return None # Decryption failed, move to next
else:
stop_event.set()
anim_thread.join()
logger.info(
f"The offsets of your IV and database are {start_iv} and "
f"{start_db}, respectively. To include your offsets in the "
"program, please report it by creating an issue on GitHub: "
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
f"\nShutting down other threads...{CLEAR_LINE}"
)
return db
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
future_to_offset = {executor.submit(attempt_decrypt, offset)
: offset for offset in offset_combinations}
try:
for future in concurrent.futures.as_completed(future_to_offset):
result = future.result()
if result is not None:
# Shutdown remaining threads
executor.shutdown(wait=False, cancel_futures=True)
return result
except KeyboardInterrupt:
stop_event.set()
anim_thread.join()
logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
executor.shutdown(wait=False, cancel_futures=True)
exit(1)
finally:
stop_event.set()
anim_thread.join()
except KeyboardInterrupt:
executor.shutdown(wait=False, cancel_futures=True)
raise KeyboardInterrupt("Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
def _attempt_decrypt_task(offset_tuple, database, main_key):
"""Attempt decryption with the given offsets."""
start_iv, end_iv, start_db = offset_tuple
iv = database[start_iv:end_iv]
db_ciphertext = database[start_db:]
try:
return _decrypt_database(db_ciphertext, main_key, iv)
except (zlib.error, ValueError):
return None
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
"""Decrypt a crypt12 database.

View File

@@ -4,6 +4,7 @@ import logging
import sqlite3
import os
import shutil
from tqdm import tqdm
from pathlib import Path
from mimetypes import MimeTypes
from markupsafe import escape as htmle
@@ -47,12 +48,15 @@ def contacts(db, data, enrich_from_vcards):
logger.info(f"Processed {total_row_number} contacts\n")
c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;")
row = c.fetchone()
while row is not None:
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
if row["status"] is not None:
current_chat.status = row["status"]
row = c.fetchone()
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while (row := _fetch_row_safely(c)) is not None:
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
if row["status"] is not None:
current_chat.status = row["status"]
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
return True
@@ -72,7 +76,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
"""
c = db.cursor()
total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing messages...(0/{total_row_number})\r")
try:
content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat)
@@ -84,22 +87,12 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
except Exception as e:
raise e
i = 0
# Fetch the first row safely
content = _fetch_row_safely(content_cursor)
while content is not None:
_process_single_message(data, content, table_message, timezone_offset)
i += 1
if i % 1000 == 0:
logger.info(f"Processing messages...({i}/{total_row_number})\r")
# Fetch the next row safely
content = _fetch_row_safely(content_cursor)
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
while (content := _fetch_row_safely(content_cursor)) is not None:
_process_single_message(data, content, table_message, timezone_offset)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}")
# Helper functions for message processing
@@ -499,8 +492,6 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
"""
c = db.cursor()
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
logger.info(f"Processing media...(0/{total_row_number})\r")
try:
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
except sqlite3.OperationalError:
@@ -512,18 +503,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
# Ensure thumbnails directory exists
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
i = 0
while content is not None:
_process_single_media(data, content, media_folder, mime, separate_media)
i += 1
if i % 100 == 0:
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = content_cursor.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
while (content := _fetch_row_safely(content_cursor)) is not None:
_process_single_media(data, content, media_folder, mime, separate_media)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}")
# Helper functions for media processing
@@ -701,17 +686,17 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty)
total_row_number = len(rows)
logger.info(f"Processing vCards...(0/{total_row_number})\r")
# Create vCards directory if it doesn't exist
path = os.path.join(media_folder, "vCards")
Path(path).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows):
_process_vcard_row(row, path, data)
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
for row in rows:
_process_vcard_row(row, path, data)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}")
def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
"""Execute vCard query for modern WhatsApp database schema."""
@@ -816,15 +801,15 @@ def calls(db, data, timezone_offset, filter_chat):
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call
content = calls_data.fetchone()
while content is not None:
_process_call_record(content, chat, data, timezone_offset)
content = calls_data.fetchone()
with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
while (content := _fetch_row_safely(calls_data)) is not None:
_process_call_record(content, chat, data, timezone_offset)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Add the calls chat to the data
data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}")
def _get_calls_count(c, filter_chat):
"""Get the count of call records that match the filter."""
@@ -948,7 +933,6 @@ def create_html(
template = setup_template(template, no_avatar, experimental)
total_row_number = len(data)
logger.info(f"Generating chats...(0/{total_row_number})\r")
# Create output directory if it doesn't exist
if not os.path.isdir(output_folder):
@@ -956,43 +940,42 @@ def create_html(
w3css = get_status_location(output_folder, offline_static)
for current, contact in enumerate(data):
current_chat = data.get_chat(contact)
if len(current_chat) == 0:
# Skip empty chats
continue
with tqdm(total=total_row_number, desc="Generating HTML", unit="file", leave=False) as pbar:
for contact in data:
current_chat = data.get_chat(contact)
if len(current_chat) == 0:
# Skip empty chats
continue
safe_file_name, name = get_file_name(contact, current_chat)
safe_file_name, name = get_file_name(contact, current_chat)
if maximum_size is not None:
_generate_paginated_chat(
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
maximum_size,
headline
)
else:
_generate_single_chat(
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
headline
)
if current % 10 == 0:
logger.info(f"Generating chats...({current}/{total_row_number})\r")
logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}")
if maximum_size is not None:
_generate_paginated_chat(
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
maximum_size,
headline
)
else:
_generate_single_chat(
current_chat,
safe_file_name,
name,
contact,
output_folder,
template,
w3css,
headline
)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Generated {total_row_number} chats in {total_time:.2f} seconds{CLEAR_LINE}")
def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline):
"""Generate a single HTML file for a chat."""

View File

@@ -4,6 +4,7 @@ import os
import logging
from datetime import datetime
from mimetypes import MimeTypes
from tqdm import tqdm
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
@@ -34,17 +35,16 @@ def messages(path, data, assume_first_as_me=False):
# Second pass: process the messages
with open(path, "r", encoding="utf8") as file:
for index, line in enumerate(file):
you, user_identification_done = process_line(
line, index, chat, path, you,
assume_first_as_me, user_identification_done
)
with tqdm(total=total_row_number, desc="Processing messages & media", unit="msg&media", leave=False) as pbar:
for index, line in enumerate(file):
you, user_identification_done = process_line(
line, index, chat, path, you,
assume_first_as_me, user_identification_done
)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} messages & media in {total_time:.2f} seconds{CLEAR_LINE}")
# Show progress
if index % 1000 == 0:
logger.info(f"Processing messages & media...({index}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}")
return data

View File

@@ -4,6 +4,7 @@ import os
import logging
import shutil
from glob import glob
from tqdm import tqdm
from pathlib import Path
from mimetypes import MimeTypes
from markupsafe import escape as htmle
@@ -23,17 +24,18 @@ def contacts(db, data):
logger.info(f"Pre-processing contacts...({total_row_number})\r")
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
content = c.fetchone()
while content is not None:
zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net"
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while (content := c.fetchone()) is not None:
zwhatsapp_id = content["ZWHATSAPPID"]
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
zwhatsapp_id += "@s.whatsapp.net"
current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat)
content = c.fetchone()
logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}")
current_chat = ChatStore(Device.IOS)
current_chat.status = content["ZABOUTTEXT"]
data.add_chat(zwhatsapp_id, current_chat)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Pre-processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
def process_contact_avatars(current_chat, media_folder, contact_id):
@@ -92,7 +94,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
"""
c.execute(contact_query)
total_row_number = c.fetchone()[0]
logger.info(f"Processing contacts...({total_row_number})\r")
# Get distinct contacts
contacts_query = f"""
@@ -114,24 +115,24 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
c.execute(contacts_query)
# Process each contact
content = c.fetchone()
while content is not None:
contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"]
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
while (content := c.fetchone()) is not None:
contact_name = get_contact_name(content)
contact_id = content["ZCONTACTJID"]
# Add or update chat
if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
else:
current_chat = data.get_chat(contact_id)
current_chat.name = contact_name
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
# Add or update chat
if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
else:
current_chat = data.get_chat(contact_id)
current_chat.name = contact_name
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
# Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id)
content = c.fetchone()
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
# Process avatar images
process_contact_avatars(current_chat, media_folder, contact_id)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
# Get message count
message_count_query = f"""
@@ -190,46 +191,42 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]}
# Process each message
i = 0
content = c.fetchone()
while content is not None:
contact_id = content["ZCONTACTJID"]
message_pk = content["Z_PK"]
is_group_message = content["ZGROUPINFO"] is not None
with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
while (content := c.fetchone()) is not None:
contact_id = content["ZCONTACTJID"]
message_pk = content["Z_PK"]
is_group_message = content["ZGROUPINFO"] is not None
# Ensure chat exists
if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
process_contact_avatars(current_chat, media_folder, contact_id)
else:
current_chat = data.get_chat(contact_id)
# Ensure chat exists
if contact_id not in data:
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
process_contact_avatars(current_chat, media_folder, contact_id)
else:
current_chat = data.get_chat(contact_id)
# Create message object
ts = APPLE_TIME + content["ZMESSAGEDATE"]
message = Message(
from_me=content["ZISFROMME"],
timestamp=ts,
time=ts,
key_id=content["ZSTANZAID"][:17],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
message_type=content["ZMESSAGETYPE"],
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp
)
# Create message object
ts = APPLE_TIME + content["ZMESSAGEDATE"]
message = Message(
from_me=content["ZISFROMME"],
timestamp=ts,
time=ts,
key_id=content["ZSTANZAID"][:17],
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
message_type=content["ZMESSAGETYPE"],
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
read_timestamp=None # TODO: Add timestamp
)
# Process message data
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
# Process message data
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
# Add valid messages to chat
if not invalid:
current_chat.add_message(message_pk, message)
# Add valid messages to chat
if not invalid:
current_chat.add_message(message_pk, message)
# Update progress
i += 1
if i % 1000 == 0:
logger.info(f"Processing messages...({i}/{total_row_number})\r")
content = c.fetchone()
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}")
def process_message_data(message, content, is_group_message, data, message_map, no_reply):
@@ -371,17 +368,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
# Process each media item
mime = MimeTypes()
i = 0
content = c.fetchone()
while content is not None:
process_media_item(content, data, media_folder, mime, separate_media)
# Update progress
i += 1
if i % 100 == 0:
logger.info(f"Processing media...({i}/{total_row_number})\r")
content = c.fetchone()
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
while (content := c.fetchone()) is not None:
process_media_item(content, data, media_folder, mime, separate_media)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}")
def process_media_item(content, data, media_folder, mime, separate_media):
@@ -467,10 +459,12 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
Path(path).mkdir(parents=True, exist_ok=True)
# Process each vCard
for index, content in enumerate(contents):
process_vcard_item(content, path, data)
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
for content in contents:
process_vcard_item(content, path, data)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}")
def process_vcard_item(content, path, data):
@@ -530,8 +524,6 @@ def calls(db, data, timezone_offset, filter_chat):
if total_row_number == 0:
return
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
# Fetch call records
calls_query = f"""
SELECT ZCALLIDSTRING,
@@ -556,14 +548,15 @@ def calls(db, data, timezone_offset, filter_chat):
# Create calls chat
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
# Process each call
content = c.fetchone()
while content is not None:
process_call_record(content, chat, data, timezone_offset)
content = c.fetchone()
with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
while (content := c.fetchone()) is not None:
process_call_record(content, chat, data, timezone_offset)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
# Add calls chat to data
data.add_chat("000000000000000", chat)
logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}")
def process_call_record(content, chat, data, timezone_offset):

View File

@@ -6,6 +6,7 @@ import sqlite3
import os
import getpass
from sys import exit, platform as osname
from tqdm import tqdm
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier
from Whatsapp_Chat_Exporter.bplist import BPListReader
try:
@@ -89,7 +90,7 @@ class BackupExtractor:
Args:
password (str): The password for the encrypted backup.
"""
logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}")
logger.info(f"Trying to open the iOS backup...{CLEAR_LINE}")
self.backup = EncryptedBackup(
backup_directory=self.base_dir,
passphrase=password,
@@ -97,7 +98,7 @@ class BackupExtractor:
check_same_thread=False,
decrypt_chunk_size=self.decrypt_chunk_size,
)
logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}")
logger.info(f"iOS backup is opened successfully{CLEAR_LINE}")
logger.info("Decrypting WhatsApp database...\r")
try:
self.backup.extract_file(
@@ -130,9 +131,12 @@ class BackupExtractor:
def _extract_decrypted_files(self):
"""Extract all WhatsApp files after decryption"""
pbar = tqdm(desc="Decrypting and extracting files", unit="file", leave=False)
def extract_progress_handler(file_id, domain, relative_path, n, total_files):
if n % 100 == 0:
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
if pbar.total is None:
pbar.total = total_files
pbar.n = n
pbar.refresh()
return True
self.backup.extract_files(
@@ -141,7 +145,9 @@ class BackupExtractor:
preserve_folders=True,
filter_callback=extract_progress_handler
)
logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}")
total_time = pbar.format_dict['elapsed']
pbar.close()
logger.info(f"All required files are decrypted and extracted in {total_time:.2f} seconds{CLEAR_LINE}")
def _extract_unencrypted_backup(self):
"""
@@ -192,7 +198,6 @@ class BackupExtractor:
c = manifest.cursor()
c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'")
total_row_number = c.fetchone()[0]
logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r")
c.execute(
f"""
SELECT fileID, relativePath, flags, file AS metadata,
@@ -205,33 +210,30 @@ class BackupExtractor:
if not os.path.isdir(_wts_id):
os.mkdir(_wts_id)
row = c.fetchone()
while row is not None:
if not row["relativePath"]: # Skip empty relative paths
row = c.fetchone()
continue
with tqdm(total=total_row_number, desc="Extracting WhatsApp files", unit="file", leave=False) as pbar:
while (row := c.fetchone()) is not None:
if not row["relativePath"]: # Skip empty relative paths
continue
destination = os.path.join(_wts_id, row["relativePath"])
hashes = row["fileID"]
folder = hashes[:2]
flags = row["flags"]
destination = os.path.join(_wts_id, row["relativePath"])
hashes = row["fileID"]
folder = hashes[:2]
flags = row["flags"]
if flags == 2: # Directory
try:
os.mkdir(destination)
except FileExistsError:
pass
elif flags == 1: # File
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
metadata = BPListReader(row["metadata"]).parse()
creation = metadata["$objects"][1]["Birth"]
modification = metadata["$objects"][1]["LastModified"]
os.utime(destination, (modification, modification))
if row["_index"] % 100 == 0:
logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r")
row = c.fetchone()
logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}")
if flags == 2: # Directory
try:
os.mkdir(destination)
except FileExistsError:
pass
elif flags == 1: # File
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
metadata = BPListReader(row["metadata"]).parse()
_creation = metadata["$objects"][1]["Birth"]
modification = metadata["$objects"][1]["LastModified"]
os.utime(destination, (modification, modification))
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Extracted {total_row_number} WhatsApp files in {total_time:.2f} seconds{CLEAR_LINE}")
def extract_media(base_dir, identifiers, decrypt_chunk_size):

View File

@@ -5,13 +5,13 @@ import json
import os
import unicodedata
import re
import string
import math
import shutil
from bleach import clean as sanitize
from markupsafe import Markup
from datetime import datetime, timedelta
from enum import IntEnum
from tqdm import tqdm
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
from typing import Dict, List, Optional, Tuple, Union
try:
@@ -248,13 +248,13 @@ def import_from_json(json_file: str, data: ChatCollection):
with open(json_file, "r") as f:
temp_data = json.loads(f.read())
total_row_number = len(tuple(temp_data.keys()))
logger.info(f"Importing chats from JSON...(0/{total_row_number})\r")
for index, (jid, chat_data) in enumerate(temp_data.items()):
chat = ChatStore.from_json(chat_data)
data.add_chat(jid, chat)
logger.info(
f"Importing chats from JSON...({index + 1}/{total_row_number})\r")
logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}")
with tqdm(total=total_row_number, desc="Importing chats from JSON", unit="chat", leave=False) as pbar:
for jid, chat_data in temp_data.items():
chat = ChatStore.from_json(chat_data)
data.add_chat(jid, chat)
pbar.update(1)
total_time = pbar.format_dict['elapsed']
logger.info(f"Imported {total_row_number} chats from JSON in {total_time:.2f} seconds{CLEAR_LINE}")
def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
@@ -439,7 +439,7 @@ CRYPT14_OFFSETS = (
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
{"iv": 67, "db": 158},
{"iv": 67, "db": 196}
{"iv": 67, "db": 196},
)