mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-02-08 10:20:33 +00:00
Refactor to use tqdm for showing progress
This commit is contained in:
@@ -19,6 +19,7 @@ from Whatsapp_Chat_Exporter.utility import telegram_json_format
|
||||
from argparse import ArgumentParser, SUPPRESS
|
||||
from datetime import datetime
|
||||
from getpass import getpass
|
||||
from tqdm import tqdm
|
||||
from sys import exit
|
||||
from typing import Optional, List, Dict
|
||||
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
|
||||
@@ -665,24 +666,27 @@ def export_multiple_json(args, data: Dict) -> None:
|
||||
|
||||
# Export each chat
|
||||
total = len(data.keys())
|
||||
for index, jik in enumerate(data.keys()):
|
||||
if data[jik]["name"] is not None:
|
||||
contact = data[jik]["name"].replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
with tqdm(total=total, desc="Generating JSON files", unit="file", leave=False) as pbar:
|
||||
for jik in data.keys():
|
||||
if data[jik]["name"] is not None:
|
||||
contact = data[jik]["name"].replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
|
||||
if args.telegram:
|
||||
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
|
||||
else:
|
||||
messages = {jik: data[jik]}
|
||||
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
|
||||
file_content = json.dumps(
|
||||
messages,
|
||||
ensure_ascii=not args.avoid_encoding_json,
|
||||
indent=args.pretty_print_json
|
||||
)
|
||||
f.write(file_content)
|
||||
logger.info(f"Writing JSON file...({index + 1}/{total})\r")
|
||||
if args.telegram:
|
||||
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
|
||||
else:
|
||||
messages = {jik: data[jik]}
|
||||
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
|
||||
file_content = json.dumps(
|
||||
messages,
|
||||
ensure_ascii=not args.avoid_encoding_json,
|
||||
indent=args.pretty_print_json
|
||||
)
|
||||
f.write(file_content)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Generated {total} JSON files in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_exported_chat(args, data: ChatCollection) -> None:
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import time
|
||||
import hmac
|
||||
import io
|
||||
import logging
|
||||
import threading
|
||||
import zlib
|
||||
import concurrent.futures
|
||||
from tqdm import tqdm
|
||||
from typing import Tuple, Union
|
||||
from hashlib import sha256
|
||||
from sys import exit
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType
|
||||
|
||||
try:
|
||||
@@ -165,82 +163,64 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
|
||||
|
||||
# Attempt known offsets first
|
||||
for offsets in CRYPT14_OFFSETS:
|
||||
iv = database[offsets["iv"]:offsets["iv"] + 16]
|
||||
db_ciphertext = database[offsets["db"]:]
|
||||
iv = offsets["iv"]
|
||||
db = offsets["db"]
|
||||
try:
|
||||
decrypted_db = _decrypt_database(db_ciphertext, main_key, iv)
|
||||
decrypted_db = _attempt_decrypt_task((iv, iv + 16, db), database, main_key)
|
||||
except (zlib.error, ValueError):
|
||||
pass # Try next offset
|
||||
continue
|
||||
else:
|
||||
logger.debug(
|
||||
f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}"
|
||||
f"Decryption successful with known offsets: IV {iv}, DB {db}{CLEAR_LINE}"
|
||||
)
|
||||
return decrypted_db # Successful decryption
|
||||
|
||||
def animate_message(stop_event):
|
||||
base_msg = "Common offsets failed. Initiating brute-force with multithreading"
|
||||
dots = ["", ".", "..", "..."]
|
||||
i = 0
|
||||
while not stop_event.is_set():
|
||||
logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r")
|
||||
time.sleep(0.3)
|
||||
i += 1
|
||||
logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}")
|
||||
offset_max = 200
|
||||
logger.info(f"Common offsets failed. Attempt to brute-force...{CLEAR_LINE}")
|
||||
with tqdm(total=offset_max ** 2, desc="Brute-forcing offsets", unit="trial", leave=False) as pbar:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
|
||||
# Map futures to their offsets
|
||||
future_to_offset = {
|
||||
executor.submit(_attempt_decrypt_task, offset, database, main_key): offset
|
||||
for offset in brute_force_offset(offset_max, offset_max)
|
||||
}
|
||||
|
||||
stop_event = threading.Event()
|
||||
anim_thread = threading.Thread(target=animate_message, args=(stop_event,))
|
||||
anim_thread.start()
|
||||
try:
|
||||
for future in concurrent.futures.as_completed(future_to_offset):
|
||||
pbar.update(1)
|
||||
result = future.result()
|
||||
|
||||
if result is not None:
|
||||
# Success! Shutdown other tasks immediately
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
start_iv, _, start_db = future_to_offset[future]
|
||||
logger.info(
|
||||
f"The offsets of your IV and database are {start_iv} and "
|
||||
f"{start_db}, respectively. To include your offsets in the "
|
||||
"program, please report it by creating an issue on GitHub: "
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
|
||||
f"\nShutting down other threads...{CLEAR_LINE}"
|
||||
)
|
||||
return result
|
||||
|
||||
# Convert brute force generator into a list for parallel processing
|
||||
offset_combinations = list(brute_force_offset())
|
||||
|
||||
def attempt_decrypt(offset_tuple):
|
||||
"""Attempt decryption with the given offsets."""
|
||||
start_iv, end_iv, start_db = offset_tuple
|
||||
iv = database[start_iv:end_iv]
|
||||
db_ciphertext = database[start_db:]
|
||||
logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}")
|
||||
|
||||
try:
|
||||
db = _decrypt_database(db_ciphertext, main_key, iv)
|
||||
except (zlib.error, ValueError):
|
||||
return None # Decryption failed, move to next
|
||||
else:
|
||||
stop_event.set()
|
||||
anim_thread.join()
|
||||
logger.info(
|
||||
f"The offsets of your IV and database are {start_iv} and "
|
||||
f"{start_db}, respectively. To include your offsets in the "
|
||||
"program, please report it by creating an issue on GitHub: "
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
|
||||
f"\nShutting down other threads...{CLEAR_LINE}"
|
||||
)
|
||||
return db
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
|
||||
future_to_offset = {executor.submit(attempt_decrypt, offset)
|
||||
: offset for offset in offset_combinations}
|
||||
|
||||
try:
|
||||
for future in concurrent.futures.as_completed(future_to_offset):
|
||||
result = future.result()
|
||||
if result is not None:
|
||||
# Shutdown remaining threads
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
return result
|
||||
|
||||
except KeyboardInterrupt:
|
||||
stop_event.set()
|
||||
anim_thread.join()
|
||||
logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
exit(1)
|
||||
finally:
|
||||
stop_event.set()
|
||||
anim_thread.join()
|
||||
except KeyboardInterrupt:
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
raise KeyboardInterrupt("Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
|
||||
|
||||
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
|
||||
|
||||
def _attempt_decrypt_task(offset_tuple, database, main_key):
|
||||
"""Attempt decryption with the given offsets."""
|
||||
start_iv, end_iv, start_db = offset_tuple
|
||||
iv = database[start_iv:end_iv]
|
||||
db_ciphertext = database[start_db:]
|
||||
|
||||
try:
|
||||
return _decrypt_database(db_ciphertext, main_key, iv)
|
||||
except (zlib.error, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
|
||||
"""Decrypt a crypt12 database.
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import sqlite3
|
||||
import os
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from markupsafe import escape as htmle
|
||||
@@ -47,12 +48,15 @@ def contacts(db, data, enrich_from_vcards):
|
||||
logger.info(f"Processed {total_row_number} contacts\n")
|
||||
|
||||
c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;")
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
|
||||
if row["status"] is not None:
|
||||
current_chat.status = row["status"]
|
||||
row = c.fetchone()
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
|
||||
while (row := _fetch_row_safely(c)) is not None:
|
||||
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
|
||||
if row["status"] is not None:
|
||||
current_chat.status = row["status"]
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
return True
|
||||
|
||||
@@ -72,7 +76,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
"""
|
||||
c = db.cursor()
|
||||
total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat)
|
||||
logger.info(f"Processing messages...(0/{total_row_number})\r")
|
||||
|
||||
try:
|
||||
content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat)
|
||||
@@ -84,22 +87,12 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
i = 0
|
||||
# Fetch the first row safely
|
||||
content = _fetch_row_safely(content_cursor)
|
||||
|
||||
while content is not None:
|
||||
_process_single_message(data, content, table_message, timezone_offset)
|
||||
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
logger.info(f"Processing messages...({i}/{total_row_number})\r")
|
||||
|
||||
# Fetch the next row safely
|
||||
content = _fetch_row_safely(content_cursor)
|
||||
|
||||
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
|
||||
while (content := _fetch_row_safely(content_cursor)) is not None:
|
||||
_process_single_message(data, content, table_message, timezone_offset)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
# Helper functions for message processing
|
||||
|
||||
@@ -499,8 +492,6 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
"""
|
||||
c = db.cursor()
|
||||
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
|
||||
logger.info(f"Processing media...(0/{total_row_number})\r")
|
||||
|
||||
try:
|
||||
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
|
||||
except sqlite3.OperationalError:
|
||||
@@ -512,18 +503,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
# Ensure thumbnails directory exists
|
||||
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
i = 0
|
||||
while content is not None:
|
||||
_process_single_media(data, content, media_folder, mime, separate_media)
|
||||
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
logger.info(f"Processing media...({i}/{total_row_number})\r")
|
||||
|
||||
content = content_cursor.fetchone()
|
||||
|
||||
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
|
||||
while (content := _fetch_row_safely(content_cursor)) is not None:
|
||||
_process_single_media(data, content, media_folder, mime, separate_media)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
# Helper functions for media processing
|
||||
|
||||
@@ -701,17 +686,17 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty)
|
||||
|
||||
total_row_number = len(rows)
|
||||
logger.info(f"Processing vCards...(0/{total_row_number})\r")
|
||||
|
||||
# Create vCards directory if it doesn't exist
|
||||
path = os.path.join(media_folder, "vCards")
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
_process_vcard_row(row, path, data)
|
||||
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
|
||||
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
|
||||
for row in rows:
|
||||
_process_vcard_row(row, path, data)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
|
||||
"""Execute vCard query for modern WhatsApp database schema."""
|
||||
@@ -816,15 +801,15 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
|
||||
# Process each call
|
||||
content = calls_data.fetchone()
|
||||
while content is not None:
|
||||
_process_call_record(content, chat, data, timezone_offset)
|
||||
content = calls_data.fetchone()
|
||||
with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
|
||||
while (content := _fetch_row_safely(calls_data)) is not None:
|
||||
_process_call_record(content, chat, data, timezone_offset)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
|
||||
# Add the calls chat to the data
|
||||
data.add_chat("000000000000000", chat)
|
||||
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
|
||||
|
||||
logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
def _get_calls_count(c, filter_chat):
|
||||
"""Get the count of call records that match the filter."""
|
||||
@@ -948,7 +933,6 @@ def create_html(
|
||||
template = setup_template(template, no_avatar, experimental)
|
||||
|
||||
total_row_number = len(data)
|
||||
logger.info(f"Generating chats...(0/{total_row_number})\r")
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
if not os.path.isdir(output_folder):
|
||||
@@ -956,43 +940,42 @@ def create_html(
|
||||
|
||||
w3css = get_status_location(output_folder, offline_static)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
current_chat = data.get_chat(contact)
|
||||
if len(current_chat) == 0:
|
||||
# Skip empty chats
|
||||
continue
|
||||
with tqdm(total=total_row_number, desc="Generating HTML", unit="file", leave=False) as pbar:
|
||||
for contact in data:
|
||||
current_chat = data.get_chat(contact)
|
||||
if len(current_chat) == 0:
|
||||
# Skip empty chats
|
||||
continue
|
||||
|
||||
safe_file_name, name = get_file_name(contact, current_chat)
|
||||
safe_file_name, name = get_file_name(contact, current_chat)
|
||||
|
||||
if maximum_size is not None:
|
||||
_generate_paginated_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
maximum_size,
|
||||
headline
|
||||
)
|
||||
else:
|
||||
_generate_single_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
headline
|
||||
)
|
||||
|
||||
if current % 10 == 0:
|
||||
logger.info(f"Generating chats...({current}/{total_row_number})\r")
|
||||
|
||||
logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}")
|
||||
if maximum_size is not None:
|
||||
_generate_paginated_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
maximum_size,
|
||||
headline
|
||||
)
|
||||
else:
|
||||
_generate_single_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
headline
|
||||
)
|
||||
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Generated {total_row_number} chats in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline):
|
||||
"""Generate a single HTML file for a chat."""
|
||||
|
||||
@@ -4,6 +4,7 @@ import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
from tqdm import tqdm
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
|
||||
|
||||
@@ -34,17 +35,16 @@ def messages(path, data, assume_first_as_me=False):
|
||||
|
||||
# Second pass: process the messages
|
||||
with open(path, "r", encoding="utf8") as file:
|
||||
for index, line in enumerate(file):
|
||||
you, user_identification_done = process_line(
|
||||
line, index, chat, path, you,
|
||||
assume_first_as_me, user_identification_done
|
||||
)
|
||||
with tqdm(total=total_row_number, desc="Processing messages & media", unit="msg&media", leave=False) as pbar:
|
||||
for index, line in enumerate(file):
|
||||
you, user_identification_done = process_line(
|
||||
line, index, chat, path, you,
|
||||
assume_first_as_me, user_identification_done
|
||||
)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} messages & media in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
# Show progress
|
||||
if index % 1000 == 0:
|
||||
logger.info(f"Processing messages & media...({index}/{total_row_number})\r")
|
||||
|
||||
logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}")
|
||||
return data
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import os
|
||||
import logging
|
||||
import shutil
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from markupsafe import escape as htmle
|
||||
@@ -23,17 +24,18 @@ def contacts(db, data):
|
||||
logger.info(f"Pre-processing contacts...({total_row_number})\r")
|
||||
|
||||
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
zwhatsapp_id = content["ZWHATSAPPID"]
|
||||
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
|
||||
zwhatsapp_id += "@s.whatsapp.net"
|
||||
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
zwhatsapp_id = content["ZWHATSAPPID"]
|
||||
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
|
||||
zwhatsapp_id += "@s.whatsapp.net"
|
||||
|
||||
current_chat = ChatStore(Device.IOS)
|
||||
current_chat.status = content["ZABOUTTEXT"]
|
||||
data.add_chat(zwhatsapp_id, current_chat)
|
||||
content = c.fetchone()
|
||||
logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}")
|
||||
current_chat = ChatStore(Device.IOS)
|
||||
current_chat.status = content["ZABOUTTEXT"]
|
||||
data.add_chat(zwhatsapp_id, current_chat)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Pre-processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_contact_avatars(current_chat, media_folder, contact_id):
|
||||
@@ -92,7 +94,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
"""
|
||||
c.execute(contact_query)
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Processing contacts...({total_row_number})\r")
|
||||
|
||||
# Get distinct contacts
|
||||
contacts_query = f"""
|
||||
@@ -114,24 +115,24 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
c.execute(contacts_query)
|
||||
|
||||
# Process each contact
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
contact_name = get_contact_name(content)
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
contact_name = get_contact_name(content)
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
|
||||
# Add or update chat
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
current_chat.name = contact_name
|
||||
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
|
||||
# Add or update chat
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
current_chat.name = contact_name
|
||||
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
|
||||
|
||||
# Process avatar images
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
content = c.fetchone()
|
||||
|
||||
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
|
||||
# Process avatar images
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} contacts in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
# Get message count
|
||||
message_count_query = f"""
|
||||
@@ -190,46 +191,42 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]}
|
||||
|
||||
# Process each message
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
message_pk = content["Z_PK"]
|
||||
is_group_message = content["ZGROUPINFO"] is not None
|
||||
with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
message_pk = content["Z_PK"]
|
||||
is_group_message = content["ZGROUPINFO"] is not None
|
||||
|
||||
# Ensure chat exists
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
# Ensure chat exists
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
|
||||
# Create message object
|
||||
ts = APPLE_TIME + content["ZMESSAGEDATE"]
|
||||
message = Message(
|
||||
from_me=content["ZISFROMME"],
|
||||
timestamp=ts,
|
||||
time=ts,
|
||||
key_id=content["ZSTANZAID"][:17],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
|
||||
message_type=content["ZMESSAGETYPE"],
|
||||
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
# Create message object
|
||||
ts = APPLE_TIME + content["ZMESSAGEDATE"]
|
||||
message = Message(
|
||||
from_me=content["ZISFROMME"],
|
||||
timestamp=ts,
|
||||
time=ts,
|
||||
key_id=content["ZSTANZAID"][:17],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
|
||||
message_type=content["ZMESSAGETYPE"],
|
||||
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
|
||||
# Process message data
|
||||
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
|
||||
# Process message data
|
||||
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
|
||||
|
||||
# Add valid messages to chat
|
||||
if not invalid:
|
||||
current_chat.add_message(message_pk, message)
|
||||
# Add valid messages to chat
|
||||
if not invalid:
|
||||
current_chat.add_message(message_pk, message)
|
||||
|
||||
# Update progress
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
logger.info(f"Processing messages...({i}/{total_row_number})\r")
|
||||
content = c.fetchone()
|
||||
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} messages in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_message_data(message, content, is_group_message, data, message_map, no_reply):
|
||||
@@ -371,17 +368,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
|
||||
# Process each media item
|
||||
mime = MimeTypes()
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
process_media_item(content, data, media_folder, mime, separate_media)
|
||||
|
||||
# Update progress
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
logger.info(f"Processing media...({i}/{total_row_number})\r")
|
||||
content = c.fetchone()
|
||||
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
|
||||
with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
process_media_item(content, data, media_folder, mime, separate_media)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} media in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
@@ -467,10 +459,12 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process each vCard
|
||||
for index, content in enumerate(contents):
|
||||
process_vcard_item(content, path, data)
|
||||
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
|
||||
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
|
||||
with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
|
||||
for content in contents:
|
||||
process_vcard_item(content, path, data)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} vCards in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_vcard_item(content, path, data):
|
||||
@@ -530,8 +524,6 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
if total_row_number == 0:
|
||||
return
|
||||
|
||||
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
|
||||
|
||||
# Fetch call records
|
||||
calls_query = f"""
|
||||
SELECT ZCALLIDSTRING,
|
||||
@@ -556,14 +548,15 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
# Create calls chat
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
|
||||
# Process each call
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
process_call_record(content, chat, data, timezone_offset)
|
||||
content = c.fetchone()
|
||||
with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
process_call_record(content, chat, data, timezone_offset)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
|
||||
# Add calls chat to data
|
||||
data.add_chat("000000000000000", chat)
|
||||
logger.info(f"Processed {total_row_number} calls in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_call_record(content, chat, data, timezone_offset):
|
||||
|
||||
@@ -6,6 +6,7 @@ import sqlite3
|
||||
import os
|
||||
import getpass
|
||||
from sys import exit, platform as osname
|
||||
from tqdm import tqdm
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier
|
||||
from Whatsapp_Chat_Exporter.bplist import BPListReader
|
||||
try:
|
||||
@@ -89,7 +90,7 @@ class BackupExtractor:
|
||||
Args:
|
||||
password (str): The password for the encrypted backup.
|
||||
"""
|
||||
logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}")
|
||||
logger.info(f"Trying to open the iOS backup...{CLEAR_LINE}")
|
||||
self.backup = EncryptedBackup(
|
||||
backup_directory=self.base_dir,
|
||||
passphrase=password,
|
||||
@@ -97,7 +98,7 @@ class BackupExtractor:
|
||||
check_same_thread=False,
|
||||
decrypt_chunk_size=self.decrypt_chunk_size,
|
||||
)
|
||||
logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}")
|
||||
logger.info(f"iOS backup is opened successfully{CLEAR_LINE}")
|
||||
logger.info("Decrypting WhatsApp database...\r")
|
||||
try:
|
||||
self.backup.extract_file(
|
||||
@@ -130,9 +131,12 @@ class BackupExtractor:
|
||||
|
||||
def _extract_decrypted_files(self):
|
||||
"""Extract all WhatsApp files after decryption"""
|
||||
pbar = tqdm(desc="Decrypting and extracting files", unit="file", leave=False)
|
||||
def extract_progress_handler(file_id, domain, relative_path, n, total_files):
|
||||
if n % 100 == 0:
|
||||
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
|
||||
if pbar.total is None:
|
||||
pbar.total = total_files
|
||||
pbar.n = n
|
||||
pbar.refresh()
|
||||
return True
|
||||
|
||||
self.backup.extract_files(
|
||||
@@ -141,7 +145,9 @@ class BackupExtractor:
|
||||
preserve_folders=True,
|
||||
filter_callback=extract_progress_handler
|
||||
)
|
||||
logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}")
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
pbar.close()
|
||||
logger.info(f"All required files are decrypted and extracted in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
def _extract_unencrypted_backup(self):
|
||||
"""
|
||||
@@ -192,7 +198,6 @@ class BackupExtractor:
|
||||
c = manifest.cursor()
|
||||
c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'")
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r")
|
||||
c.execute(
|
||||
f"""
|
||||
SELECT fileID, relativePath, flags, file AS metadata,
|
||||
@@ -205,33 +210,30 @@ class BackupExtractor:
|
||||
if not os.path.isdir(_wts_id):
|
||||
os.mkdir(_wts_id)
|
||||
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
if not row["relativePath"]: # Skip empty relative paths
|
||||
row = c.fetchone()
|
||||
continue
|
||||
with tqdm(total=total_row_number, desc="Extracting WhatsApp files", unit="file", leave=False) as pbar:
|
||||
while (row := c.fetchone()) is not None:
|
||||
if not row["relativePath"]: # Skip empty relative paths
|
||||
continue
|
||||
|
||||
destination = os.path.join(_wts_id, row["relativePath"])
|
||||
hashes = row["fileID"]
|
||||
folder = hashes[:2]
|
||||
flags = row["flags"]
|
||||
destination = os.path.join(_wts_id, row["relativePath"])
|
||||
hashes = row["fileID"]
|
||||
folder = hashes[:2]
|
||||
flags = row["flags"]
|
||||
|
||||
if flags == 2: # Directory
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1: # File
|
||||
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
|
||||
metadata = BPListReader(row["metadata"]).parse()
|
||||
creation = metadata["$objects"][1]["Birth"]
|
||||
modification = metadata["$objects"][1]["LastModified"]
|
||||
os.utime(destination, (modification, modification))
|
||||
|
||||
if row["_index"] % 100 == 0:
|
||||
logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r")
|
||||
row = c.fetchone()
|
||||
logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}")
|
||||
if flags == 2: # Directory
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1: # File
|
||||
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
|
||||
metadata = BPListReader(row["metadata"]).parse()
|
||||
_creation = metadata["$objects"][1]["Birth"]
|
||||
modification = metadata["$objects"][1]["LastModified"]
|
||||
os.utime(destination, (modification, modification))
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Extracted {total_row_number} WhatsApp files in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def extract_media(base_dir, identifiers, decrypt_chunk_size):
|
||||
|
||||
@@ -5,13 +5,13 @@ import json
|
||||
import os
|
||||
import unicodedata
|
||||
import re
|
||||
import string
|
||||
import math
|
||||
import shutil
|
||||
from bleach import clean as sanitize
|
||||
from markupsafe import Markup
|
||||
from datetime import datetime, timedelta
|
||||
from enum import IntEnum
|
||||
from tqdm import tqdm
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
try:
|
||||
@@ -248,13 +248,13 @@ def import_from_json(json_file: str, data: ChatCollection):
|
||||
with open(json_file, "r") as f:
|
||||
temp_data = json.loads(f.read())
|
||||
total_row_number = len(tuple(temp_data.keys()))
|
||||
logger.info(f"Importing chats from JSON...(0/{total_row_number})\r")
|
||||
for index, (jid, chat_data) in enumerate(temp_data.items()):
|
||||
chat = ChatStore.from_json(chat_data)
|
||||
data.add_chat(jid, chat)
|
||||
logger.info(
|
||||
f"Importing chats from JSON...({index + 1}/{total_row_number})\r")
|
||||
logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}")
|
||||
with tqdm(total=total_row_number, desc="Importing chats from JSON", unit="chat", leave=False) as pbar:
|
||||
for jid, chat_data in temp_data.items():
|
||||
chat = ChatStore.from_json(chat_data)
|
||||
data.add_chat(jid, chat)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Imported {total_row_number} chats from JSON in {total_time:.2f} seconds{CLEAR_LINE}")
|
||||
|
||||
|
||||
def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
|
||||
@@ -439,7 +439,7 @@ CRYPT14_OFFSETS = (
|
||||
{"iv": 67, "db": 193},
|
||||
{"iv": 67, "db": 194},
|
||||
{"iv": 67, "db": 158},
|
||||
{"iv": 67, "db": 196}
|
||||
{"iv": 67, "db": 196},
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user