From d200130335f336ac2bc7263ac88292d741d2dfe1 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Sat, 17 Jan 2026 13:18:31 +0800 Subject: [PATCH] Refactor to use tqdm for showing progress --- Whatsapp_Chat_Exporter/__main__.py | 38 ++--- Whatsapp_Chat_Exporter/android_crypt.py | 114 ++++++-------- Whatsapp_Chat_Exporter/android_handler.py | 151 ++++++++---------- Whatsapp_Chat_Exporter/exported_handler.py | 20 +-- Whatsapp_Chat_Exporter/ios_handler.py | 161 ++++++++++---------- Whatsapp_Chat_Exporter/ios_media_handler.py | 64 ++++---- Whatsapp_Chat_Exporter/utility.py | 18 +-- 7 files changed, 264 insertions(+), 302 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index 07a341b..d30c937 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -19,6 +19,7 @@ from Whatsapp_Chat_Exporter.utility import telegram_json_format from argparse import ArgumentParser, SUPPRESS from datetime import datetime from getpass import getpass +from tqdm import tqdm from sys import exit from typing import Optional, List, Dict from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards @@ -665,24 +666,27 @@ def export_multiple_json(args, data: Dict) -> None: # Export each chat total = len(data.keys()) - for index, jik in enumerate(data.keys()): - if data[jik]["name"] is not None: - contact = data[jik]["name"].replace('/', '') - else: - contact = jik.replace('+', '') + with tqdm(total=total, desc="Generating JSON files", unit="file", leave=False) as pbar: + for jik in data.keys(): + if data[jik]["name"] is not None: + contact = data[jik]["name"].replace('/', '') + else: + contact = jik.replace('+', '') - if args.telegram: - messages = telegram_json_format(jik, data[jik], args.timezone_offset) - else: - messages = {jik: data[jik]} - with open(f"{json_path}/{safe_name(contact)}.json", "w") as f: - file_content = json.dumps( - messages, - ensure_ascii=not args.avoid_encoding_json, - indent=args.pretty_print_json - ) - f.write(file_content) - logger.info(f"Writing JSON file...({index + 1}/{total})\r") + if args.telegram: + messages = telegram_json_format(jik, data[jik], args.timezone_offset) + else: + messages = {jik: data[jik]} + with open(f"{json_path}/{safe_name(contact)}.json", "w") as f: + file_content = json.dumps( + messages, + ensure_ascii=not args.avoid_encoding_json, + indent=args.pretty_print_json + ) + f.write(file_content) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Generated {total} JSON files in {total_time:.2f} seconds{CLEAR_LINE}") def process_exported_chat(args, data: ChatCollection) -> None: diff --git a/Whatsapp_Chat_Exporter/android_crypt.py b/Whatsapp_Chat_Exporter/android_crypt.py index b8ed13a..aceb6cd 100644 --- a/Whatsapp_Chat_Exporter/android_crypt.py +++ b/Whatsapp_Chat_Exporter/android_crypt.py @@ -1,13 +1,11 @@ -import time import hmac import io import logging -import threading import zlib import concurrent.futures +from tqdm import tqdm from typing import Tuple, Union from hashlib import sha256 -from sys import exit from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType try: @@ -165,82 +163,64 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) -> # Attempt known offsets first for offsets in CRYPT14_OFFSETS: - iv = database[offsets["iv"]:offsets["iv"] + 16] - db_ciphertext = database[offsets["db"]:] + iv = offsets["iv"] + db = offsets["db"] try: - decrypted_db = _decrypt_database(db_ciphertext, main_key, iv) + decrypted_db = _attempt_decrypt_task((iv, iv + 16, db), database, main_key) except (zlib.error, ValueError): - pass # Try next offset + continue else: logger.debug( - f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}" + f"Decryption successful with known offsets: IV {iv}, DB {db}{CLEAR_LINE}" ) return decrypted_db # Successful decryption - def animate_message(stop_event): - base_msg = "Common offsets failed. Initiating brute-force with multithreading" - dots = ["", ".", "..", "..."] - i = 0 - while not stop_event.is_set(): - logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r") - time.sleep(0.3) - i += 1 - logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}") + offset_max = 200 + logger.info(f"Common offsets failed. Attempt to brute-force...{CLEAR_LINE}") + with tqdm(total=offset_max ** 2, desc="Brute-forcing offsets", unit="trial", leave=False) as pbar: + with concurrent.futures.ThreadPoolExecutor(max_worker) as executor: + # Map futures to their offsets + future_to_offset = { + executor.submit(_attempt_decrypt_task, offset, database, main_key): offset + for offset in brute_force_offset(offset_max, offset_max) + } - stop_event = threading.Event() - anim_thread = threading.Thread(target=animate_message, args=(stop_event,)) - anim_thread.start() + try: + for future in concurrent.futures.as_completed(future_to_offset): + pbar.update(1) + result = future.result() + + if result is not None: + # Success! Shutdown other tasks immediately + executor.shutdown(wait=False, cancel_futures=True) + + start_iv, _, start_db = future_to_offset[future] + logger.info( + f"The offsets of your IV and database are {start_iv} and " + f"{start_db}, respectively. To include your offsets in the " + "program, please report it by creating an issue on GitHub: " + "https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47" + f"\nShutting down other threads...{CLEAR_LINE}" + ) + return result - # Convert brute force generator into a list for parallel processing - offset_combinations = list(brute_force_offset()) - - def attempt_decrypt(offset_tuple): - """Attempt decryption with the given offsets.""" - start_iv, end_iv, start_db = offset_tuple - iv = database[start_iv:end_iv] - db_ciphertext = database[start_db:] - logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}") - - try: - db = _decrypt_database(db_ciphertext, main_key, iv) - except (zlib.error, ValueError): - return None # Decryption failed, move to next - else: - stop_event.set() - anim_thread.join() - logger.info( - f"The offsets of your IV and database are {start_iv} and " - f"{start_db}, respectively. To include your offsets in the " - "program, please report it by creating an issue on GitHub: " - "https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47" - f"\nShutting down other threads...{CLEAR_LINE}" - ) - return db - - with concurrent.futures.ThreadPoolExecutor(max_worker) as executor: - future_to_offset = {executor.submit(attempt_decrypt, offset) - : offset for offset in offset_combinations} - - try: - for future in concurrent.futures.as_completed(future_to_offset): - result = future.result() - if result is not None: - # Shutdown remaining threads - executor.shutdown(wait=False, cancel_futures=True) - return result - - except KeyboardInterrupt: - stop_event.set() - anim_thread.join() - logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}") - executor.shutdown(wait=False, cancel_futures=True) - exit(1) - finally: - stop_event.set() - anim_thread.join() + except KeyboardInterrupt: + executor.shutdown(wait=False, cancel_futures=True) + raise KeyboardInterrupt("Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}") raise OffsetNotFoundError("Could not find the correct offsets for decryption.") +def _attempt_decrypt_task(offset_tuple, database, main_key): + """Attempt decryption with the given offsets.""" + start_iv, end_iv, start_db = offset_tuple + iv = database[start_iv:end_iv] + db_ciphertext = database[start_db:] + + try: + return _decrypt_database(db_ciphertext, main_key, iv) + except (zlib.error, ValueError): + return None + def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes: """Decrypt a crypt12 database. diff --git a/Whatsapp_Chat_Exporter/android_handler.py b/Whatsapp_Chat_Exporter/android_handler.py index 3bcb769..1edabb8 100644 --- a/Whatsapp_Chat_Exporter/android_handler.py +++ b/Whatsapp_Chat_Exporter/android_handler.py @@ -4,6 +4,7 @@ import logging import sqlite3 import os import shutil +from tqdm import tqdm from pathlib import Path from mimetypes import MimeTypes from markupsafe import escape as htmle @@ -47,12 +48,15 @@ def contacts(db, data, enrich_from_vcards): logger.info(f"Processed {total_row_number} contacts\n") c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;") - row = c.fetchone() - while row is not None: - current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"])) - if row["status"] is not None: - current_chat.status = row["status"] - row = c.fetchone() + + with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar: + while (row := _fetch_row_safely(c)) is not None: + current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"])) + if row["status"] is not None: + current_chat.status = row["status"] + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}") return True @@ -72,7 +76,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, """ c = db.cursor() total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat) - logger.info(f"Processing messages...(0/{total_row_number})\r") try: content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat) @@ -84,22 +87,12 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, except Exception as e: raise e - i = 0 - # Fetch the first row safely - content = _fetch_row_safely(content_cursor) - - while content is not None: - _process_single_message(data, content, table_message, timezone_offset) - - i += 1 - if i % 1000 == 0: - logger.info(f"Processing messages...({i}/{total_row_number})\r") - - # Fetch the next row safely - content = _fetch_row_safely(content_cursor) - - logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}") - + with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar: + while (content := _fetch_row_safely(content_cursor)) is not None: + _process_single_message(data, content, table_message, timezone_offset) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}") # Helper functions for message processing @@ -499,8 +492,6 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa """ c = db.cursor() total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat) - logger.info(f"Processing media...(0/{total_row_number})\r") - try: content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat) except sqlite3.OperationalError: @@ -512,18 +503,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa # Ensure thumbnails directory exists Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True) - i = 0 - while content is not None: - _process_single_media(data, content, media_folder, mime, separate_media) - - i += 1 - if i % 100 == 0: - logger.info(f"Processing media...({i}/{total_row_number})\r") - - content = content_cursor.fetchone() - - logger.info(f"Processed {total_row_number} media{CLEAR_LINE}") - + with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar: + while (content := _fetch_row_safely(content_cursor)) is not None: + _process_single_media(data, content, media_folder, mime, separate_media) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}") # Helper functions for media processing @@ -701,17 +686,17 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty): rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty) total_row_number = len(rows) - logger.info(f"Processing vCards...(0/{total_row_number})\r") # Create vCards directory if it doesn't exist path = os.path.join(media_folder, "vCards") Path(path).mkdir(parents=True, exist_ok=True) - for index, row in enumerate(rows): - _process_vcard_row(row, path, data) - logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") - logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}") - + with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar: + for row in rows: + _process_vcard_row(row, path, data) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}") def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty): """Execute vCard query for modern WhatsApp database schema.""" @@ -816,15 +801,15 @@ def calls(db, data, timezone_offset, filter_chat): chat = ChatStore(Device.ANDROID, "WhatsApp Calls") # Process each call - content = calls_data.fetchone() - while content is not None: - _process_call_record(content, chat, data, timezone_offset) - content = calls_data.fetchone() + with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar: + while (content := _fetch_row_safely(calls_data)) is not None: + _process_call_record(content, chat, data, timezone_offset) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] # Add the calls chat to the data data.add_chat("000000000000000", chat) - logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}") - + logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}") def _get_calls_count(c, filter_chat): """Get the count of call records that match the filter.""" @@ -948,7 +933,6 @@ def create_html( template = setup_template(template, no_avatar, experimental) total_row_number = len(data) - logger.info(f"Generating chats...(0/{total_row_number})\r") # Create output directory if it doesn't exist if not os.path.isdir(output_folder): @@ -956,43 +940,42 @@ def create_html( w3css = get_status_location(output_folder, offline_static) - for current, contact in enumerate(data): - current_chat = data.get_chat(contact) - if len(current_chat) == 0: - # Skip empty chats - continue + with tqdm(total=total_row_number, desc="Generating HTML", unit="file", leave=False) as pbar: + for contact in data: + current_chat = data.get_chat(contact) + if len(current_chat) == 0: + # Skip empty chats + continue - safe_file_name, name = get_file_name(contact, current_chat) + safe_file_name, name = get_file_name(contact, current_chat) - if maximum_size is not None: - _generate_paginated_chat( - current_chat, - safe_file_name, - name, - contact, - output_folder, - template, - w3css, - maximum_size, - headline - ) - else: - _generate_single_chat( - current_chat, - safe_file_name, - name, - contact, - output_folder, - template, - w3css, - headline - ) - - if current % 10 == 0: - logger.info(f"Generating chats...({current}/{total_row_number})\r") - - logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}") + if maximum_size is not None: + _generate_paginated_chat( + current_chat, + safe_file_name, + name, + contact, + output_folder, + template, + w3css, + maximum_size, + headline + ) + else: + _generate_single_chat( + current_chat, + safe_file_name, + name, + contact, + output_folder, + template, + w3css, + headline + ) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Generated {total_row_number} chats in {total_time:.2f} seconds{CLEAR_LINE}") def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline): """Generate a single HTML file for a chat.""" diff --git a/Whatsapp_Chat_Exporter/exported_handler.py b/Whatsapp_Chat_Exporter/exported_handler.py index 9e53c23..caa3bc1 100644 --- a/Whatsapp_Chat_Exporter/exported_handler.py +++ b/Whatsapp_Chat_Exporter/exported_handler.py @@ -4,6 +4,7 @@ import os import logging from datetime import datetime from mimetypes import MimeTypes +from tqdm import tqdm from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device @@ -34,17 +35,16 @@ def messages(path, data, assume_first_as_me=False): # Second pass: process the messages with open(path, "r", encoding="utf8") as file: - for index, line in enumerate(file): - you, user_identification_done = process_line( - line, index, chat, path, you, - assume_first_as_me, user_identification_done - ) + with tqdm(total=total_row_number, desc="Processing messages & media", unit="msg&media", leave=False) as pbar: + for index, line in enumerate(file): + you, user_identification_done = process_line( + line, index, chat, path, you, + assume_first_as_me, user_identification_done + ) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} messages & media in {total_time:.2f} seconds{CLEAR_LINE}") - # Show progress - if index % 1000 == 0: - logger.info(f"Processing messages & media...({index}/{total_row_number})\r") - - logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}") return data diff --git a/Whatsapp_Chat_Exporter/ios_handler.py b/Whatsapp_Chat_Exporter/ios_handler.py index 5a3230e..c18ef8d 100644 --- a/Whatsapp_Chat_Exporter/ios_handler.py +++ b/Whatsapp_Chat_Exporter/ios_handler.py @@ -4,6 +4,7 @@ import os import logging import shutil from glob import glob +from tqdm import tqdm from pathlib import Path from mimetypes import MimeTypes from markupsafe import escape as htmle @@ -23,17 +24,18 @@ def contacts(db, data): logger.info(f"Pre-processing contacts...({total_row_number})\r") c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""") - content = c.fetchone() - while content is not None: - zwhatsapp_id = content["ZWHATSAPPID"] - if not zwhatsapp_id.endswith("@s.whatsapp.net"): - zwhatsapp_id += "@s.whatsapp.net" + with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar: + while (content := c.fetchone()) is not None: + zwhatsapp_id = content["ZWHATSAPPID"] + if not zwhatsapp_id.endswith("@s.whatsapp.net"): + zwhatsapp_id += "@s.whatsapp.net" - current_chat = ChatStore(Device.IOS) - current_chat.status = content["ZABOUTTEXT"] - data.add_chat(zwhatsapp_id, current_chat) - content = c.fetchone() - logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}") + current_chat = ChatStore(Device.IOS) + current_chat.status = content["ZABOUTTEXT"] + data.add_chat(zwhatsapp_id, current_chat) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Pre-processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}") def process_contact_avatars(current_chat, media_folder, contact_id): @@ -92,7 +94,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, """ c.execute(contact_query) total_row_number = c.fetchone()[0] - logger.info(f"Processing contacts...({total_row_number})\r") # Get distinct contacts contacts_query = f""" @@ -114,24 +115,24 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, c.execute(contacts_query) # Process each contact - content = c.fetchone() - while content is not None: - contact_name = get_contact_name(content) - contact_id = content["ZCONTACTJID"] + with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar: + while (content := c.fetchone()) is not None: + contact_name = get_contact_name(content) + contact_id = content["ZCONTACTJID"] - # Add or update chat - if contact_id not in data: - current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder)) - else: - current_chat = data.get_chat(contact_id) - current_chat.name = contact_name - current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg") + # Add or update chat + if contact_id not in data: + current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder)) + else: + current_chat = data.get_chat(contact_id) + current_chat.name = contact_name + current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg") - # Process avatar images - process_contact_avatars(current_chat, media_folder, contact_id) - content = c.fetchone() - - logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}") + # Process avatar images + process_contact_avatars(current_chat, media_folder, contact_id) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}") # Get message count message_count_query = f""" @@ -190,46 +191,42 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]} # Process each message - i = 0 - content = c.fetchone() - while content is not None: - contact_id = content["ZCONTACTJID"] - message_pk = content["Z_PK"] - is_group_message = content["ZGROUPINFO"] is not None + with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar: + while (content := c.fetchone()) is not None: + contact_id = content["ZCONTACTJID"] + message_pk = content["Z_PK"] + is_group_message = content["ZGROUPINFO"] is not None - # Ensure chat exists - if contact_id not in data: - current_chat = data.add_chat(contact_id, ChatStore(Device.IOS)) - process_contact_avatars(current_chat, media_folder, contact_id) - else: - current_chat = data.get_chat(contact_id) + # Ensure chat exists + if contact_id not in data: + current_chat = data.add_chat(contact_id, ChatStore(Device.IOS)) + process_contact_avatars(current_chat, media_folder, contact_id) + else: + current_chat = data.get_chat(contact_id) - # Create message object - ts = APPLE_TIME + content["ZMESSAGEDATE"] - message = Message( - from_me=content["ZISFROMME"], - timestamp=ts, - time=ts, - key_id=content["ZSTANZAID"][:17], - timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET, - message_type=content["ZMESSAGETYPE"], - received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None, - read_timestamp=None # TODO: Add timestamp - ) + # Create message object + ts = APPLE_TIME + content["ZMESSAGEDATE"] + message = Message( + from_me=content["ZISFROMME"], + timestamp=ts, + time=ts, + key_id=content["ZSTANZAID"][:17], + timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET, + message_type=content["ZMESSAGETYPE"], + received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None, + read_timestamp=None # TODO: Add timestamp + ) - # Process message data - invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply) + # Process message data + invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply) - # Add valid messages to chat - if not invalid: - current_chat.add_message(message_pk, message) + # Add valid messages to chat + if not invalid: + current_chat.add_message(message_pk, message) - # Update progress - i += 1 - if i % 1000 == 0: - logger.info(f"Processing messages...({i}/{total_row_number})\r") - content = c.fetchone() - logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}") + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}") def process_message_data(message, content, is_group_message, data, message_map, no_reply): @@ -371,17 +368,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa # Process each media item mime = MimeTypes() - i = 0 - content = c.fetchone() - while content is not None: - process_media_item(content, data, media_folder, mime, separate_media) - - # Update progress - i += 1 - if i % 100 == 0: - logger.info(f"Processing media...({i}/{total_row_number})\r") - content = c.fetchone() - logger.info(f"Processed {total_row_number} media{CLEAR_LINE}") + with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar: + while (content := c.fetchone()) is not None: + process_media_item(content, data, media_folder, mime, separate_media) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}") def process_media_item(content, data, media_folder, mime, separate_media): @@ -467,10 +459,12 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty): Path(path).mkdir(parents=True, exist_ok=True) # Process each vCard - for index, content in enumerate(contents): - process_vcard_item(content, path, data) - logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r") - logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}") + with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar: + for content in contents: + process_vcard_item(content, path, data) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}") def process_vcard_item(content, path, data): @@ -530,8 +524,6 @@ def calls(db, data, timezone_offset, filter_chat): if total_row_number == 0: return - logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n") - # Fetch call records calls_query = f""" SELECT ZCALLIDSTRING, @@ -556,14 +548,15 @@ def calls(db, data, timezone_offset, filter_chat): # Create calls chat chat = ChatStore(Device.ANDROID, "WhatsApp Calls") - # Process each call - content = c.fetchone() - while content is not None: - process_call_record(content, chat, data, timezone_offset) - content = c.fetchone() + with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar: + while (content := c.fetchone()) is not None: + process_call_record(content, chat, data, timezone_offset) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] # Add calls chat to data data.add_chat("000000000000000", chat) + logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}") def process_call_record(content, chat, data, timezone_offset): diff --git a/Whatsapp_Chat_Exporter/ios_media_handler.py b/Whatsapp_Chat_Exporter/ios_media_handler.py index 4416727..17fba78 100644 --- a/Whatsapp_Chat_Exporter/ios_media_handler.py +++ b/Whatsapp_Chat_Exporter/ios_media_handler.py @@ -6,6 +6,7 @@ import sqlite3 import os import getpass from sys import exit, platform as osname +from tqdm import tqdm from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier from Whatsapp_Chat_Exporter.bplist import BPListReader try: @@ -89,7 +90,7 @@ class BackupExtractor: Args: password (str): The password for the encrypted backup. """ - logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}") + logger.info(f"Trying to open the iOS backup...{CLEAR_LINE}") self.backup = EncryptedBackup( backup_directory=self.base_dir, passphrase=password, @@ -97,7 +98,7 @@ class BackupExtractor: check_same_thread=False, decrypt_chunk_size=self.decrypt_chunk_size, ) - logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}") + logger.info(f"iOS backup is opened successfully{CLEAR_LINE}") logger.info("Decrypting WhatsApp database...\r") try: self.backup.extract_file( @@ -130,9 +131,12 @@ class BackupExtractor: def _extract_decrypted_files(self): """Extract all WhatsApp files after decryption""" + pbar = tqdm(desc="Decrypting and extracting files", unit="file", leave=False) def extract_progress_handler(file_id, domain, relative_path, n, total_files): - if n % 100 == 0: - logger.info(f"Decrypting and extracting files...({n}/{total_files})\r") + if pbar.total is None: + pbar.total = total_files + pbar.n = n + pbar.refresh() return True self.backup.extract_files( @@ -141,7 +145,9 @@ class BackupExtractor: preserve_folders=True, filter_callback=extract_progress_handler ) - logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}") + total_time = pbar.format_dict['elapsed'] + pbar.close() + logger.info(f"All required files are decrypted and extracted in {total_time:.2f} seconds{CLEAR_LINE}") def _extract_unencrypted_backup(self): """ @@ -192,7 +198,6 @@ class BackupExtractor: c = manifest.cursor() c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'") total_row_number = c.fetchone()[0] - logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r") c.execute( f""" SELECT fileID, relativePath, flags, file AS metadata, @@ -205,33 +210,30 @@ class BackupExtractor: if not os.path.isdir(_wts_id): os.mkdir(_wts_id) - row = c.fetchone() - while row is not None: - if not row["relativePath"]: # Skip empty relative paths - row = c.fetchone() - continue + with tqdm(total=total_row_number, desc="Extracting WhatsApp files", unit="file", leave=False) as pbar: + while (row := c.fetchone()) is not None: + if not row["relativePath"]: # Skip empty relative paths + continue - destination = os.path.join(_wts_id, row["relativePath"]) - hashes = row["fileID"] - folder = hashes[:2] - flags = row["flags"] + destination = os.path.join(_wts_id, row["relativePath"]) + hashes = row["fileID"] + folder = hashes[:2] + flags = row["flags"] - if flags == 2: # Directory - try: - os.mkdir(destination) - except FileExistsError: - pass - elif flags == 1: # File - shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination) - metadata = BPListReader(row["metadata"]).parse() - creation = metadata["$objects"][1]["Birth"] - modification = metadata["$objects"][1]["LastModified"] - os.utime(destination, (modification, modification)) - - if row["_index"] % 100 == 0: - logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r") - row = c.fetchone() - logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}") + if flags == 2: # Directory + try: + os.mkdir(destination) + except FileExistsError: + pass + elif flags == 1: # File + shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination) + metadata = BPListReader(row["metadata"]).parse() + _creation = metadata["$objects"][1]["Birth"] + modification = metadata["$objects"][1]["LastModified"] + os.utime(destination, (modification, modification)) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Extracted {total_row_number} WhatsApp files in {total_time:.2f} seconds{CLEAR_LINE}") def extract_media(base_dir, identifiers, decrypt_chunk_size): diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py index a147dfb..e132b62 100644 --- a/Whatsapp_Chat_Exporter/utility.py +++ b/Whatsapp_Chat_Exporter/utility.py @@ -5,13 +5,13 @@ import json import os import unicodedata import re -import string import math import shutil from bleach import clean as sanitize from markupsafe import Markup from datetime import datetime, timedelta from enum import IntEnum +from tqdm import tqdm from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing from typing import Dict, List, Optional, Tuple, Union try: @@ -248,13 +248,13 @@ def import_from_json(json_file: str, data: ChatCollection): with open(json_file, "r") as f: temp_data = json.loads(f.read()) total_row_number = len(tuple(temp_data.keys())) - logger.info(f"Importing chats from JSON...(0/{total_row_number})\r") - for index, (jid, chat_data) in enumerate(temp_data.items()): - chat = ChatStore.from_json(chat_data) - data.add_chat(jid, chat) - logger.info( - f"Importing chats from JSON...({index + 1}/{total_row_number})\r") - logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}") + with tqdm(total=total_row_number, desc="Importing chats from JSON", unit="chat", leave=False) as pbar: + for jid, chat_data in temp_data.items(): + chat = ChatStore.from_json(chat_data) + data.add_chat(jid, chat) + pbar.update(1) + total_time = pbar.format_dict['elapsed'] + logger.info(f"Imported {total_row_number} chats from JSON in {total_time:.2f} seconds{CLEAR_LINE}") def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool): @@ -439,7 +439,7 @@ CRYPT14_OFFSETS = ( {"iv": 67, "db": 193}, {"iv": 67, "db": 194}, {"iv": 67, "db": 158}, - {"iv": 67, "db": 196} + {"iv": 67, "db": 196}, )